Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tasks
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
biowdl
tasks
Commits
58c0da16
Commit
58c0da16
authored
6 years ago
by
Ruben Vorderman
Browse files
Options
Downloads
Patches
Plain Diff
picard to 1.0
parent
94719df0
No related branches found
No related tags found
1 merge request
!34
Move all files to version 1.0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
picard.wdl
+172
-158
172 additions, 158 deletions
picard.wdl
with
172 additions
and
158 deletions
picard.wdl
+
172
−
158
View file @
58c0da16
task CollectMultipleMetrics {
version 1.0
String? preCommand
File bamFile
File bamIndex
File refFasta
File refDict
File refFastaIndex
String basename
# These should proably be optional, but I'm not sure how to handle the ouput in that
# case (without a null literal).
Boolean collectAlignmentSummaryMetrics = true
Boolean collectInsertSizeMetrics = true
Boolean qualityScoreDistribution = true
Boolean meanQualityByCycle = true
Boolean collectBaseDistributionByCycle = true
Boolean collectGcBiasMetrics = true
#Boolean? rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999
Boolean collectSequencingArtifactMetrics = true
Boolean collectQualityYieldMetrics = true
String? picardJar
Float? memory
Float? memoryMultiplier
task CollectMultipleMetrics {
input {
String? preCommand
File bamFile
File bamIndex
File refFasta
File refDict
File refFastaIndex
String basename
# These should proably be optional, but I'm not sure how to handle the ouput in that
# case (without a null literal).
Boolean collectAlignmentSummaryMetrics = true
Boolean collectInsertSizeMetrics = true
Boolean qualityScoreDistribution = true
Boolean meanQualityByCycle = true
Boolean collectBaseDistributionByCycle = true
Boolean collectGcBiasMetrics = true
#Boolean? rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999
Boolean collectSequencingArtifactMetrics = true
Boolean collectQualityYieldMetrics = true
String? picardJar
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar)
String toolCommand = if defined(picardJar)
...
@@ -32,23 +35,23 @@ task CollectMultipleMetrics {
...
@@ -32,23 +35,23 @@ task CollectMultipleMetrics {
command {
command {
set -e -o pipefail
set -e -o pipefail
mkdir -p $(dirname "
$
{basename}")
mkdir -p $(dirname "
~
{basename}")
$
{preCommand}
~
{preCommand}
$
{toolCommand} \
~
{toolCommand} \
CollectMultipleMetrics \
CollectMultipleMetrics \
I=
$
{bamFile} \
I=
~
{bamFile} \
R=
$
{refFasta} \
R=
~
{refFasta} \
O=
$
{basename} \
O=
~
{basename} \
PROGRAM=null \
PROGRAM=null \
$
{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \
~
{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \
$
{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \
~
{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \
$
{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \
~
{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \
$
{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \
~
{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \
$
{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \
~
{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \
$
{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \
~
{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \
$
{true="PROGRAM=CollectSequencingArtifactMetrics" false=""
~
{true="PROGRAM=CollectSequencingArtifactMetrics" false=""
collectSequencingArtifactMetrics} \
collectSequencingArtifactMetrics} \
$
{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics}
~
{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics}
}
}
output {
output {
...
@@ -78,18 +81,19 @@ task CollectMultipleMetrics {
...
@@ -78,18 +81,19 @@ task CollectMultipleMetrics {
}
}
task CollectRnaSeqMetrics {
task CollectRnaSeqMetrics {
String? preCommand
input {
File bamFile
String? preCommand
File bamIndex
File bamFile
File refRefflat
File bamIndex
String basename
File refRefflat
String? strandSpecificity = "NONE"
String basename
String? strandSpecificity = "NONE"
String? picardJar
String? picardJar
Float? memory
Float? memoryMultiplier
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar)
String toolCommand = if defined(picardJar)
...
@@ -98,15 +102,15 @@ task CollectRnaSeqMetrics {
...
@@ -98,15 +102,15 @@ task CollectRnaSeqMetrics {
command {
command {
set -e -o pipefail
set -e -o pipefail
mkdir -p $(dirname "
$
{basename}")
mkdir -p $(dirname "
~
{basename}")
$
{preCommand}
~
{preCommand}
$
{toolCommand} \
~
{toolCommand} \
CollectRnaSeqMetrics \
CollectRnaSeqMetrics \
I=
$
{bamFile} \
I=
~
{bamFile} \
O=
$
{basename}.RNA_Metrics \
O=
~
{basename}.RNA_Metrics \
CHART_OUTPUT=
$
{basename}.RNA_Metrics.pdf \
CHART_OUTPUT=
~
{basename}.RNA_Metrics.pdf \
$
{"STRAND_SPECIFICITY=" + strandSpecificity} \
~
{"STRAND_SPECIFICITY=" + strandSpecificity} \
REF_FLAT=
$
{refRefflat}
REF_FLAT=
~
{refRefflat}
}
}
output {
output {
...
@@ -120,20 +124,22 @@ task CollectRnaSeqMetrics {
...
@@ -120,20 +124,22 @@ task CollectRnaSeqMetrics {
}
}
task CollectTargetedPcrMetrics {
task CollectTargetedPcrMetrics {
String? preCommand
input {
File bamFile
String? preCommand
File bamIndex
File bamFile
File refFasta
File bamIndex
File refDict
File refFasta
File refFastaIndex
File refDict
File ampliconIntervals
File refFastaIndex
Array[File]+ targetIntervals
File ampliconIntervals
String basename
Array[File]+ targetIntervals
String basename
String? picardJar
String? picardJar
Float? memory
Float? memoryMultiplier
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
Int mem = ceil(select_first([memory, 4.0]))
...
@@ -143,17 +149,17 @@ task CollectTargetedPcrMetrics {
...
@@ -143,17 +149,17 @@ task CollectTargetedPcrMetrics {
command {
command {
set -e -o pipefail
set -e -o pipefail
mkdir -p $(dirname "
$
{basename}")
mkdir -p $(dirname "
~
{basename}")
$
{preCommand}
~
{preCommand}
$
{toolCommand} \
~
{toolCommand} \
CollectTargetedPcrMetrics \
CollectTargetedPcrMetrics \
I=
$
{bamFile} \
I=
~
{bamFile} \
R=
$
{refFasta} \
R=
~
{refFasta} \
AMPLICON_INTERVALS=
$
{ampliconIntervals} \
AMPLICON_INTERVALS=
~
{ampliconIntervals} \
TARGET_INTERVALS=
$
{sep=" TARGET_INTERVALS=" targetIntervals} \
TARGET_INTERVALS=
~
{sep=" TARGET_INTERVALS=" targetIntervals} \
O=
$
{basename}.targetPcrMetrics \
O=
~
{basename}.targetPcrMetrics \
PER_BASE_COVERAGE=
$
{basename}.targetPcrPerBaseCoverage \
PER_BASE_COVERAGE=
~
{basename}.targetPcrPerBaseCoverage \
PER_TARGET_COVERAGE=
$
{basename}.targetPcrPerTargetCoverage
PER_TARGET_COVERAGE=
~
{basename}.targetPcrPerTargetCoverage
}
}
output {
output {
...
@@ -169,14 +175,16 @@ task CollectTargetedPcrMetrics {
...
@@ -169,14 +175,16 @@ task CollectTargetedPcrMetrics {
# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs
# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs
task GatherBamFiles {
task GatherBamFiles {
String? preCommand
input {
Array[File]+ input_bams
String? preCommand
String output_bam_path
Array[File]+ input_bams
Int? compression_level
String output_bam_path
String? picardJar
Int? compression_level
String? picardJar
Float? memory
Float? memoryMultiplier
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
Int mem = ceil(select_first([memory, 4.0]))
...
@@ -186,19 +194,19 @@ task GatherBamFiles {
...
@@ -186,19 +194,19 @@ task GatherBamFiles {
command {
command {
set -e -o pipefail
set -e -o pipefail
$
{preCommand}
~
{preCommand}
$
{toolCommand} \
~
{toolCommand} \
GatherBamFiles \
GatherBamFiles \
INPUT=
$
{sep=' INPUT=' input_bams} \
INPUT=
~
{sep=' INPUT=' input_bams} \
OUTPUT=
$
{output_bam_path} \
OUTPUT=
~
{output_bam_path} \
CREATE_INDEX=true \
CREATE_INDEX=true \
CREATE_MD5_FILE=true
CREATE_MD5_FILE=true
}
}
output {
output {
File output_bam = "
$
{output_bam_path}"
File output_bam = "
~
{output_bam_path}"
File output_bam_index = sub(output_bam_path, ".bam$", ".bai")
File output_bam_index = sub(output_bam_path, ".bam$", ".bai")
File output_bam_md5 = "
$
{output_bam_path}.md5"
File output_bam_md5 = "
~
{output_bam_path}.md5"
}
}
runtime {
runtime {
...
@@ -208,21 +216,23 @@ task GatherBamFiles {
...
@@ -208,21 +216,23 @@ task GatherBamFiles {
# Mark duplicate reads to avoid counting non-independent observations
# Mark duplicate reads to avoid counting non-independent observations
task MarkDuplicates {
task MarkDuplicates {
String? preCommand
input {
Array[File] input_bams
String? preCommand
String output_bam_path
Array[File] input_bams
String metrics_path
String output_bam_path
Int? compression_level
String metrics_path
String? picardJar
Int? compression_level
String? picardJar
Float? memory
Float? memoryMultiplier
Float? memory
Float? memoryMultiplier
# The program default for READ_NAME_REGEX is appropriate in nearly every case.
# Sometimes we wish to supply "null" in order to turn off optical duplicate detection
# The program default for READ_NAME_REGEX is appropriate in nearly every case.
# This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing
# Sometimes we wish to supply "null" in order to turn off optical duplicate detection
String? read_name_regex
# This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing
String? read_name_regex
}
# Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly
# Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly
# This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment.
# This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment.
# While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"
# While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"
...
@@ -234,15 +244,15 @@ task MarkDuplicates {
...
@@ -234,15 +244,15 @@ task MarkDuplicates {
command {
command {
set -e -o pipefail
set -e -o pipefail
$
{preCommand}
~
{preCommand}
mkdir -p $(dirname
$
{output_bam_path})
mkdir -p $(dirname
~
{output_bam_path})
$
{toolCommand} \
~
{toolCommand} \
MarkDuplicates \
MarkDuplicates \
INPUT=
$
{sep=' INPUT=' input_bams} \
INPUT=
~
{sep=' INPUT=' input_bams} \
OUTPUT=
$
{output_bam_path} \
OUTPUT=
~
{output_bam_path} \
METRICS_FILE=
$
{metrics_path} \
METRICS_FILE=
~
{metrics_path} \
VALIDATION_STRINGENCY=SILENT \
VALIDATION_STRINGENCY=SILENT \
$
{"READ_NAME_REGEX=" + read_name_regex} \
~
{"READ_NAME_REGEX=" + read_name_regex} \
OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
CLEAR_DT="false" \
CLEAR_DT="false" \
CREATE_INDEX=true \
CREATE_INDEX=true \
...
@@ -262,16 +272,17 @@ task MarkDuplicates {
...
@@ -262,16 +272,17 @@ task MarkDuplicates {
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs {
task MergeVCFs {
String? preCommand
input {
Array[File] inputVCFs
String? preCommand
Array[File] inputVCFsIndexes
Array[File] inputVCFs
String outputVCFpath
Array[File] inputVCFsIndexes
Int? compressionLevel
String outputVCFpath
String? picardJar
Int? compressionLevel
String? picardJar
Float? memory
Float? memoryMultiplier
Float? memory
Float? memoryMultiplier
}
# Using MergeVcfs instead of GatherVcfs so we can create indices
# Using MergeVcfs instead of GatherVcfs so we can create indices
# See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket
# See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket
Int mem = ceil(select_first([memory, 4.0]))
Int mem = ceil(select_first([memory, 4.0]))
...
@@ -282,11 +293,11 @@ task MergeVCFs {
...
@@ -282,11 +293,11 @@ task MergeVCFs {
command {
command {
set -e -o pipefail
set -e -o pipefail
$
{preCommand}
~
{preCommand}
$
{toolCommand} \
~
{toolCommand} \
MergeVcfs \
MergeVcfs \
INPUT=
$
{sep=' INPUT=' inputVCFs} \
INPUT=
~
{sep=' INPUT=' inputVCFs} \
OUTPUT=
$
{outputVCFpath}
OUTPUT=
~
{outputVCFpath}
}
}
output {
output {
...
@@ -300,14 +311,16 @@ task MergeVCFs {
...
@@ -300,14 +311,16 @@ task MergeVCFs {
}
}
task SamToFastq {
task SamToFastq {
String? preCommand
input {
File inputBam
String? preCommand
String outputRead1
File inputBam
String? outputRead2
String outputRead1
String? outputUnpaired
String? outputRead2
String? picardJar
String? outputUnpaired
Float? memory
String? picardJar
Float? memoryMultiplier
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 16.0])) # High memory default to avoid crashes.
Int mem = ceil(select_first([memory, 16.0])) # High memory default to avoid crashes.
String toolCommand = if defined(picardJar)
String toolCommand = if defined(picardJar)
...
@@ -316,13 +329,13 @@ task SamToFastq {
...
@@ -316,13 +329,13 @@ task SamToFastq {
command {
command {
set -e -o pipefail
set -e -o pipefail
$
{preCommand}
~
{preCommand}
$
{toolCommand} \
~
{toolCommand} \
SamToFastq \
SamToFastq \
I=
$
{inputBam} \
I=
~
{inputBam} \
$
{"FASTQ=" + outputRead1} \
~
{"FASTQ=" + outputRead1} \
$
{"SECOND_END_FASTQ=" + outputRead2} \
~
{"SECOND_END_FASTQ=" + outputRead2} \
$
{"UNPAIRED_FASTQ=" + outputUnpaired}
~
{"UNPAIRED_FASTQ=" + outputUnpaired}
}
}
output {
output {
...
@@ -337,14 +350,15 @@ task SamToFastq {
...
@@ -337,14 +350,15 @@ task SamToFastq {
}
}
task ScatterIntervalList {
task ScatterIntervalList {
String? preCommand
input {
File interval_list
String? preCommand
Int scatter_count
File interval_list
String? picardJar
Int scatter_count
String? picardJar
Float? memory
Float? memoryMultiplier
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar)
String toolCommand = if defined(picardJar)
...
@@ -353,15 +367,15 @@ task ScatterIntervalList {
...
@@ -353,15 +367,15 @@ task ScatterIntervalList {
command {
command {
set -e -o pipefail
set -e -o pipefail
$
{preCommand}
~
{preCommand}
mkdir scatter_list
mkdir scatter_list
$
{toolCommand} \
~
{toolCommand} \
IntervalListTools \
IntervalListTools \
SCATTER_COUNT=
$
{scatter_count} \
SCATTER_COUNT=
~
{scatter_count} \
SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \
SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \
UNIQUE=true \
UNIQUE=true \
SORT=true \
SORT=true \
INPUT=
$
{interval_list} \
INPUT=
~
{interval_list} \
OUTPUT=scatter_list
OUTPUT=scatter_list
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment