Skip to content
Snippets Groups Projects
Commit 58c0da16 authored by Ruben Vorderman's avatar Ruben Vorderman
Browse files

picard to 1.0

parent 94719df0
No related branches found
No related tags found
1 merge request!34Move all files to version 1.0
task CollectMultipleMetrics { version 1.0
String? preCommand
File bamFile
File bamIndex
File refFasta
File refDict
File refFastaIndex
String basename
# These should proably be optional, but I'm not sure how to handle the ouput in that
# case (without a null literal).
Boolean collectAlignmentSummaryMetrics = true
Boolean collectInsertSizeMetrics = true
Boolean qualityScoreDistribution = true
Boolean meanQualityByCycle = true
Boolean collectBaseDistributionByCycle = true
Boolean collectGcBiasMetrics = true
#Boolean? rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999
Boolean collectSequencingArtifactMetrics = true
Boolean collectQualityYieldMetrics = true
String? picardJar
Float? memory
Float? memoryMultiplier
task CollectMultipleMetrics {
input {
String? preCommand
File bamFile
File bamIndex
File refFasta
File refDict
File refFastaIndex
String basename
# These should proably be optional, but I'm not sure how to handle the ouput in that
# case (without a null literal).
Boolean collectAlignmentSummaryMetrics = true
Boolean collectInsertSizeMetrics = true
Boolean qualityScoreDistribution = true
Boolean meanQualityByCycle = true
Boolean collectBaseDistributionByCycle = true
Boolean collectGcBiasMetrics = true
#Boolean? rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999
Boolean collectSequencingArtifactMetrics = true
Boolean collectQualityYieldMetrics = true
String? picardJar
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0])) Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar) String toolCommand = if defined(picardJar)
...@@ -32,23 +35,23 @@ task CollectMultipleMetrics { ...@@ -32,23 +35,23 @@ task CollectMultipleMetrics {
command { command {
set -e -o pipefail set -e -o pipefail
mkdir -p $(dirname "${basename}") mkdir -p $(dirname "~{basename}")
${preCommand} ~{preCommand}
${toolCommand} \ ~{toolCommand} \
CollectMultipleMetrics \ CollectMultipleMetrics \
I=${bamFile} \ I=~{bamFile} \
R=${refFasta} \ R=~{refFasta} \
O=${basename} \ O=~{basename} \
PROGRAM=null \ PROGRAM=null \
${true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \ ~{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \
${true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \ ~{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \
${true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \ ~{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \
${true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \ ~{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \
${true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \ ~{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \
${true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \ ~{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \
${true="PROGRAM=CollectSequencingArtifactMetrics" false="" ~{true="PROGRAM=CollectSequencingArtifactMetrics" false=""
collectSequencingArtifactMetrics} \ collectSequencingArtifactMetrics} \
${true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics} ~{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics}
} }
output { output {
...@@ -78,18 +81,19 @@ task CollectMultipleMetrics { ...@@ -78,18 +81,19 @@ task CollectMultipleMetrics {
} }
task CollectRnaSeqMetrics { task CollectRnaSeqMetrics {
String? preCommand input {
File bamFile String? preCommand
File bamIndex File bamFile
File refRefflat File bamIndex
String basename File refRefflat
String? strandSpecificity = "NONE" String basename
String? strandSpecificity = "NONE"
String? picardJar
String? picardJar
Float? memory
Float? memoryMultiplier Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0])) Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar) String toolCommand = if defined(picardJar)
...@@ -98,15 +102,15 @@ task CollectRnaSeqMetrics { ...@@ -98,15 +102,15 @@ task CollectRnaSeqMetrics {
command { command {
set -e -o pipefail set -e -o pipefail
mkdir -p $(dirname "${basename}") mkdir -p $(dirname "~{basename}")
${preCommand} ~{preCommand}
${toolCommand} \ ~{toolCommand} \
CollectRnaSeqMetrics \ CollectRnaSeqMetrics \
I=${bamFile} \ I=~{bamFile} \
O=${basename}.RNA_Metrics \ O=~{basename}.RNA_Metrics \
CHART_OUTPUT=${basename}.RNA_Metrics.pdf \ CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \
${"STRAND_SPECIFICITY=" + strandSpecificity} \ ~{"STRAND_SPECIFICITY=" + strandSpecificity} \
REF_FLAT=${refRefflat} REF_FLAT=~{refRefflat}
} }
output { output {
...@@ -120,20 +124,22 @@ task CollectRnaSeqMetrics { ...@@ -120,20 +124,22 @@ task CollectRnaSeqMetrics {
} }
task CollectTargetedPcrMetrics { task CollectTargetedPcrMetrics {
String? preCommand input {
File bamFile String? preCommand
File bamIndex File bamFile
File refFasta File bamIndex
File refDict File refFasta
File refFastaIndex File refDict
File ampliconIntervals File refFastaIndex
Array[File]+ targetIntervals File ampliconIntervals
String basename Array[File]+ targetIntervals
String basename
String? picardJar
String? picardJar
Float? memory
Float? memoryMultiplier Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0])) Int mem = ceil(select_first([memory, 4.0]))
...@@ -143,17 +149,17 @@ task CollectTargetedPcrMetrics { ...@@ -143,17 +149,17 @@ task CollectTargetedPcrMetrics {
command { command {
set -e -o pipefail set -e -o pipefail
mkdir -p $(dirname "${basename}") mkdir -p $(dirname "~{basename}")
${preCommand} ~{preCommand}
${toolCommand} \ ~{toolCommand} \
CollectTargetedPcrMetrics \ CollectTargetedPcrMetrics \
I=${bamFile} \ I=~{bamFile} \
R=${refFasta} \ R=~{refFasta} \
AMPLICON_INTERVALS=${ampliconIntervals} \ AMPLICON_INTERVALS=~{ampliconIntervals} \
TARGET_INTERVALS=${sep=" TARGET_INTERVALS=" targetIntervals} \ TARGET_INTERVALS=~{sep=" TARGET_INTERVALS=" targetIntervals} \
O=${basename}.targetPcrMetrics \ O=~{basename}.targetPcrMetrics \
PER_BASE_COVERAGE=${basename}.targetPcrPerBaseCoverage \ PER_BASE_COVERAGE=~{basename}.targetPcrPerBaseCoverage \
PER_TARGET_COVERAGE=${basename}.targetPcrPerTargetCoverage PER_TARGET_COVERAGE=~{basename}.targetPcrPerTargetCoverage
} }
output { output {
...@@ -169,14 +175,16 @@ task CollectTargetedPcrMetrics { ...@@ -169,14 +175,16 @@ task CollectTargetedPcrMetrics {
# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs
task GatherBamFiles { task GatherBamFiles {
String? preCommand input {
Array[File]+ input_bams String? preCommand
String output_bam_path Array[File]+ input_bams
Int? compression_level String output_bam_path
String? picardJar Int? compression_level
String? picardJar
Float? memory
Float? memoryMultiplier Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0])) Int mem = ceil(select_first([memory, 4.0]))
...@@ -186,19 +194,19 @@ task GatherBamFiles { ...@@ -186,19 +194,19 @@ task GatherBamFiles {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ~{preCommand}
${toolCommand} \ ~{toolCommand} \
GatherBamFiles \ GatherBamFiles \
INPUT=${sep=' INPUT=' input_bams} \ INPUT=~{sep=' INPUT=' input_bams} \
OUTPUT=${output_bam_path} \ OUTPUT=~{output_bam_path} \
CREATE_INDEX=true \ CREATE_INDEX=true \
CREATE_MD5_FILE=true CREATE_MD5_FILE=true
} }
output { output {
File output_bam = "${output_bam_path}" File output_bam = "~{output_bam_path}"
File output_bam_index = sub(output_bam_path, ".bam$", ".bai") File output_bam_index = sub(output_bam_path, ".bam$", ".bai")
File output_bam_md5 = "${output_bam_path}.md5" File output_bam_md5 = "~{output_bam_path}.md5"
} }
runtime { runtime {
...@@ -208,21 +216,23 @@ task GatherBamFiles { ...@@ -208,21 +216,23 @@ task GatherBamFiles {
# Mark duplicate reads to avoid counting non-independent observations # Mark duplicate reads to avoid counting non-independent observations
task MarkDuplicates { task MarkDuplicates {
String? preCommand input {
Array[File] input_bams String? preCommand
String output_bam_path Array[File] input_bams
String metrics_path String output_bam_path
Int? compression_level String metrics_path
String? picardJar Int? compression_level
String? picardJar
Float? memory
Float? memoryMultiplier Float? memory
Float? memoryMultiplier
# The program default for READ_NAME_REGEX is appropriate in nearly every case.
# Sometimes we wish to supply "null" in order to turn off optical duplicate detection # The program default for READ_NAME_REGEX is appropriate in nearly every case.
# This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing # Sometimes we wish to supply "null" in order to turn off optical duplicate detection
String? read_name_regex # This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing
String? read_name_regex
}
# Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly # Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly
# This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment. # This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment.
# While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname" # While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"
...@@ -234,15 +244,15 @@ task MarkDuplicates { ...@@ -234,15 +244,15 @@ task MarkDuplicates {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ~{preCommand}
mkdir -p $(dirname ${output_bam_path}) mkdir -p $(dirname ~{output_bam_path})
${toolCommand} \ ~{toolCommand} \
MarkDuplicates \ MarkDuplicates \
INPUT=${sep=' INPUT=' input_bams} \ INPUT=~{sep=' INPUT=' input_bams} \
OUTPUT=${output_bam_path} \ OUTPUT=~{output_bam_path} \
METRICS_FILE=${metrics_path} \ METRICS_FILE=~{metrics_path} \
VALIDATION_STRINGENCY=SILENT \ VALIDATION_STRINGENCY=SILENT \
${"READ_NAME_REGEX=" + read_name_regex} \ ~{"READ_NAME_REGEX=" + read_name_regex} \
OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
CLEAR_DT="false" \ CLEAR_DT="false" \
CREATE_INDEX=true \ CREATE_INDEX=true \
...@@ -262,16 +272,17 @@ task MarkDuplicates { ...@@ -262,16 +272,17 @@ task MarkDuplicates {
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs { task MergeVCFs {
String? preCommand input {
Array[File] inputVCFs String? preCommand
Array[File] inputVCFsIndexes Array[File] inputVCFs
String outputVCFpath Array[File] inputVCFsIndexes
Int? compressionLevel String outputVCFpath
String? picardJar Int? compressionLevel
String? picardJar
Float? memory
Float? memoryMultiplier Float? memory
Float? memoryMultiplier
}
# Using MergeVcfs instead of GatherVcfs so we can create indices # Using MergeVcfs instead of GatherVcfs so we can create indices
# See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket
Int mem = ceil(select_first([memory, 4.0])) Int mem = ceil(select_first([memory, 4.0]))
...@@ -282,11 +293,11 @@ task MergeVCFs { ...@@ -282,11 +293,11 @@ task MergeVCFs {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ~{preCommand}
${toolCommand} \ ~{toolCommand} \
MergeVcfs \ MergeVcfs \
INPUT=${sep=' INPUT=' inputVCFs} \ INPUT=~{sep=' INPUT=' inputVCFs} \
OUTPUT=${outputVCFpath} OUTPUT=~{outputVCFpath}
} }
output { output {
...@@ -300,14 +311,16 @@ task MergeVCFs { ...@@ -300,14 +311,16 @@ task MergeVCFs {
} }
task SamToFastq { task SamToFastq {
String? preCommand input {
File inputBam String? preCommand
String outputRead1 File inputBam
String? outputRead2 String outputRead1
String? outputUnpaired String? outputRead2
String? picardJar String? outputUnpaired
Float? memory String? picardJar
Float? memoryMultiplier Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 16.0])) # High memory default to avoid crashes. Int mem = ceil(select_first([memory, 16.0])) # High memory default to avoid crashes.
String toolCommand = if defined(picardJar) String toolCommand = if defined(picardJar)
...@@ -316,13 +329,13 @@ task SamToFastq { ...@@ -316,13 +329,13 @@ task SamToFastq {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ~{preCommand}
${toolCommand} \ ~{toolCommand} \
SamToFastq \ SamToFastq \
I=${inputBam} \ I=~{inputBam} \
${"FASTQ=" + outputRead1} \ ~{"FASTQ=" + outputRead1} \
${"SECOND_END_FASTQ=" + outputRead2} \ ~{"SECOND_END_FASTQ=" + outputRead2} \
${"UNPAIRED_FASTQ=" + outputUnpaired} ~{"UNPAIRED_FASTQ=" + outputUnpaired}
} }
output { output {
...@@ -337,14 +350,15 @@ task SamToFastq { ...@@ -337,14 +350,15 @@ task SamToFastq {
} }
task ScatterIntervalList { task ScatterIntervalList {
String? preCommand input {
File interval_list String? preCommand
Int scatter_count File interval_list
String? picardJar Int scatter_count
String? picardJar
Float? memory
Float? memoryMultiplier Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0])) Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar) String toolCommand = if defined(picardJar)
...@@ -353,15 +367,15 @@ task ScatterIntervalList { ...@@ -353,15 +367,15 @@ task ScatterIntervalList {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ~{preCommand}
mkdir scatter_list mkdir scatter_list
${toolCommand} \ ~{toolCommand} \
IntervalListTools \ IntervalListTools \
SCATTER_COUNT=${scatter_count} \ SCATTER_COUNT=~{scatter_count} \
SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \
UNIQUE=true \ UNIQUE=true \
SORT=true \ SORT=true \
INPUT=${interval_list} \ INPUT=~{interval_list} \
OUTPUT=scatter_list OUTPUT=scatter_list
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment