version 1.0 task CollectMultipleMetrics { input { String? preCommand File bamFile File bamIndex File refFasta File refDict File refFastaIndex String basename Boolean collectAlignmentSummaryMetrics = true Boolean collectInsertSizeMetrics = true Boolean qualityScoreDistribution = true Boolean meanQualityByCycle = true Boolean collectBaseDistributionByCycle = true Boolean collectGcBiasMetrics = true #Boolean rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999 Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true String? picardJar Int memory = 4 Float memoryMultiplier = 3.0 } String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail mkdir -p $(dirname "~{basename}") ~{preCommand} ~{toolCommand} \ CollectMultipleMetrics \ I=~{bamFile} \ R=~{refFasta} \ O=~{basename} \ PROGRAM=null \ ~{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \ ~{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \ ~{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \ ~{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \ ~{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \ ~{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \ ~{true="PROGRAM=CollectSequencingArtifactMetrics" false="" collectSequencingArtifactMetrics} \ ~{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics} } output { File alignmentSummary = basename + ".alignment_summary_metrics" File baitBiasDetail = basename + ".bait_bias_detail_metrics" File baitBiasSummary = basename + ".bait_bias_summary_metrics" File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" File baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" File errorSummary = basename + ".error_summary_metrics" File gcBiasDetail = basename + ".gc_bias.detail_metrics" File gcBiasPdf = basename + ".gc_bias.pdf" File gcBiasSummary = basename + ".gc_bias.summary_metrics" File? insertSizeHistogramPdf = basename + ".insert_size_histogram.pdf" File? insertSize = basename + ".insert_size_metrics" File preAdapterDetail = basename + ".pre_adapter_detail_metrics" File preAdapterSummary = basename + ".pre_adapter_summary_metrics" File qualityByCycle = basename + ".quality_by_cycle_metrics" File qualityByCyclePdf = basename + ".quality_by_cycle.pdf" File qualityDistribution = basename + ".quality_distribution_metrics" File qualityDistributionPdf = basename + ".quality_distribution.pdf" File qualityYield = basename + ".quality_yield_metrics" } runtime { memory: ceil(memory * memoryMultiplier) } } task CollectRnaSeqMetrics { input { String? preCommand File bamFile File bamIndex File refRefflat String basename String strandSpecificity = "NONE" String? picardJar Int memory = 4 Float memoryMultiplier = 3.0 } String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail mkdir -p $(dirname "~{basename}") ~{preCommand} ~{toolCommand} \ CollectRnaSeqMetrics \ I=~{bamFile} \ O=~{basename}.RNA_Metrics \ CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \ STRAND_SPECIFICITY=~{strandSpecificity} \ REF_FLAT=~{refRefflat} } output { File? chart = basename + ".RNA_Metrics.pdf" File metrics = basename + ".RNA_Metrics" } runtime { memory: ceil(memory * memoryMultiplier) } } task CollectTargetedPcrMetrics { input { String? preCommand File bamFile File bamIndex File refFasta File refDict File refFastaIndex File ampliconIntervals Array[File]+ targetIntervals String basename String? picardJar Int memory = 4 Float memoryMultiplier = 3.0 } String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail mkdir -p $(dirname "~{basename}") ~{preCommand} ~{toolCommand} \ CollectTargetedPcrMetrics \ I=~{bamFile} \ R=~{refFasta} \ AMPLICON_INTERVALS=~{ampliconIntervals} \ TARGET_INTERVALS=~{sep=" TARGET_INTERVALS=" targetIntervals} \ O=~{basename}.targetPcrMetrics \ PER_BASE_COVERAGE=~{basename}.targetPcrPerBaseCoverage \ PER_TARGET_COVERAGE=~{basename}.targetPcrPerTargetCoverage } output { File perTargetCoverage = basename + ".targetPcrPerTargetCoverage" File perBaseCoverage = basename + ".targetPcrPerBaseCoverage" File metrics = basename + ".targetPcrMetrics" } runtime { memory: ceil(memory * memoryMultiplier) } } # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs task GatherBamFiles { input { String? preCommand Array[File]+ input_bams String output_bam_path Int? compression_level String? picardJar Int memory = 4 Float memoryMultiplier = 3.0 } String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail ~{preCommand} ~{toolCommand} \ GatherBamFiles \ INPUT=~{sep=' INPUT=' input_bams} \ OUTPUT=~{output_bam_path} \ CREATE_INDEX=true \ CREATE_MD5_FILE=true } output { File output_bam = "~{output_bam_path}" File output_bam_index = sub(output_bam_path, ".bam$", ".bai") File output_bam_md5 = "~{output_bam_path}.md5" } runtime { memory: ceil(memory * memoryMultiplier) } } # Mark duplicate reads to avoid counting non-independent observations task MarkDuplicates { input { String? preCommand Array[File] input_bams String output_bam_path String metrics_path Int? compression_level String? picardJar Int memory = 4 Float memoryMultiplier = 3.0 # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection # This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing String? read_name_regex } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly # This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment. # While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname" String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail ~{preCommand} mkdir -p $(dirname ~{output_bam_path}) ~{toolCommand} \ MarkDuplicates \ INPUT=~{sep=' INPUT=' input_bams} \ OUTPUT=~{output_bam_path} \ METRICS_FILE=~{metrics_path} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ CLEAR_DT="false" \ CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false } output { File output_bam = output_bam_path File output_bam_index = sub(output_bam_path, ".bam$", ".bai") File duplicate_metrics = metrics_path } runtime { memory: ceil(memory * memoryMultiplier) } } # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs task MergeVCFs { input { String? preCommand Array[File] inputVCFs Array[File] inputVCFsIndexes String outputVCFpath Int? compressionLevel String? picardJar Int memory = 4 Float memoryMultiplier = 3.0 } # Using MergeVcfs instead of GatherVcfs so we can create indices # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail ~{preCommand} ~{toolCommand} \ MergeVcfs \ INPUT=~{sep=' INPUT=' inputVCFs} \ OUTPUT=~{outputVCFpath} } output { File outputVCF = outputVCFpath File outputVCFindex = outputVCFpath + ".tbi" } runtime { memory: ceil(memory * memoryMultiplier) } } task SamToFastq { input { String? preCommand File inputBam String outputRead1 String? outputRead2 String? outputUnpaired String? picardJar Int memory = 16 # High memory default to avoid crashes. Float memoryMultiplier = 3.0 } String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail ~{preCommand} ~{toolCommand} \ SamToFastq \ I=~{inputBam} \ ~{"FASTQ=" + outputRead1} \ ~{"SECOND_END_FASTQ=" + outputRead2} \ ~{"UNPAIRED_FASTQ=" + outputUnpaired} } output { File read1 = outputRead1 File? read2 = outputRead2 File? unpairedRead = outputUnpaired } runtime { memory: ceil(memory * memoryMultiplier) } } task ScatterIntervalList { input { String? preCommand File interval_list Int scatter_count String? picardJar Int memory = 4 Float memoryMultiplier = 3.0 } String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail ~{preCommand} mkdir scatter_list ~{toolCommand} \ IntervalListTools \ SCATTER_COUNT=~{scatter_count} \ SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ UNIQUE=true \ SORT=true \ INPUT=~{interval_list} \ OUTPUT=scatter_list } output { Array[File] out = glob("scatter_list/*/*.interval_list") Int interval_count = read_int(stdout()) } runtime { memory: ceil(memory * memoryMultiplier) } } task SortVcf { input { String? preCommand String? picardJar Array[File]+ vcfFiles String outputVcf File? sequenceDict Int memory = 4 Float memoryMultiplier = 3.0 } String toolCommand = if defined(picardJar) then "java -Xmx" + memory + "G -jar " + picardJar else "picard -Xmx" + memory + "G" command { set -e -o pipefail ~{preCommand} ~{toolCommand} \ SortVcf \ I=~{sep=" I=" vcfFiles} \ ~{"SEQUENCE_DICTIONARY=" + sequenceDict} \ O=~{outputVcf} } output { File vcfFile = outputVcf File vcfIndex = outputVcf + ".tbi" } runtime { memory: ceil(memory * memoryMultiplier) } }