Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tasks
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
biowdl
tasks
Commits
58c0da16
Commit
58c0da16
authored
6 years ago
by
Ruben Vorderman
Browse files
Options
Downloads
Patches
Plain Diff
picard to 1.0
parent
94719df0
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!34
Move all files to version 1.0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
picard.wdl
+172
-158
172 additions, 158 deletions
picard.wdl
with
172 additions
and
158 deletions
picard.wdl
+
172
−
158
View file @
58c0da16
task CollectMultipleMetrics {
String? preCommand
File bamFile
File bamIndex
File refFasta
File refDict
File refFastaIndex
String basename
# These should proably be optional, but I'm not sure how to handle the ouput in that
# case (without a null literal).
Boolean collectAlignmentSummaryMetrics = true
Boolean collectInsertSizeMetrics = true
Boolean qualityScoreDistribution = true
Boolean meanQualityByCycle = true
Boolean collectBaseDistributionByCycle = true
Boolean collectGcBiasMetrics = true
#Boolean? rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999
Boolean collectSequencingArtifactMetrics = true
Boolean collectQualityYieldMetrics = true
String? picardJar
Float? memory
Float? memoryMultiplier
version 1.0
task CollectMultipleMetrics {
input {
String? preCommand
File bamFile
File bamIndex
File refFasta
File refDict
File refFastaIndex
String basename
# These should proably be optional, but I'm not sure how to handle the ouput in that
# case (without a null literal).
Boolean collectAlignmentSummaryMetrics = true
Boolean collectInsertSizeMetrics = true
Boolean qualityScoreDistribution = true
Boolean meanQualityByCycle = true
Boolean collectBaseDistributionByCycle = true
Boolean collectGcBiasMetrics = true
#Boolean? rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999
Boolean collectSequencingArtifactMetrics = true
Boolean collectQualityYieldMetrics = true
String? picardJar
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar)
...
...
@@ -32,23 +35,23 @@ task CollectMultipleMetrics {
command {
set -e -o pipefail
mkdir -p $(dirname "
$
{basename}")
$
{preCommand}
$
{toolCommand} \
mkdir -p $(dirname "
~
{basename}")
~
{preCommand}
~
{toolCommand} \
CollectMultipleMetrics \
I=
$
{bamFile} \
R=
$
{refFasta} \
O=
$
{basename} \
I=
~
{bamFile} \
R=
~
{refFasta} \
O=
~
{basename} \
PROGRAM=null \
$
{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \
$
{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \
$
{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \
$
{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \
$
{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \
$
{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \
$
{true="PROGRAM=CollectSequencingArtifactMetrics" false=""
~
{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \
~
{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \
~
{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \
~
{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \
~
{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \
~
{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \
~
{true="PROGRAM=CollectSequencingArtifactMetrics" false=""
collectSequencingArtifactMetrics} \
$
{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics}
~
{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics}
}
output {
...
...
@@ -78,18 +81,19 @@ task CollectMultipleMetrics {
}
task CollectRnaSeqMetrics {
String? preCommand
File bamFile
File bamIndex
File refRefflat
String basename
String? strandSpecificity = "NONE"
String? picardJar
Float? memory
Float? memoryMultiplier
input {
String? preCommand
File bamFile
File bamIndex
File refRefflat
String basename
String? strandSpecificity = "NONE"
String? picardJar
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar)
...
...
@@ -98,15 +102,15 @@ task CollectRnaSeqMetrics {
command {
set -e -o pipefail
mkdir -p $(dirname "
$
{basename}")
$
{preCommand}
$
{toolCommand} \
mkdir -p $(dirname "
~
{basename}")
~
{preCommand}
~
{toolCommand} \
CollectRnaSeqMetrics \
I=
$
{bamFile} \
O=
$
{basename}.RNA_Metrics \
CHART_OUTPUT=
$
{basename}.RNA_Metrics.pdf \
$
{"STRAND_SPECIFICITY=" + strandSpecificity} \
REF_FLAT=
$
{refRefflat}
I=
~
{bamFile} \
O=
~
{basename}.RNA_Metrics \
CHART_OUTPUT=
~
{basename}.RNA_Metrics.pdf \
~
{"STRAND_SPECIFICITY=" + strandSpecificity} \
REF_FLAT=
~
{refRefflat}
}
output {
...
...
@@ -120,20 +124,22 @@ task CollectRnaSeqMetrics {
}
task CollectTargetedPcrMetrics {
String? preCommand
File bamFile
File bamIndex
File refFasta
File refDict
File refFastaIndex
File ampliconIntervals
Array[File]+ targetIntervals
String basename
String? picardJar
Float? memory
Float? memoryMultiplier
input {
String? preCommand
File bamFile
File bamIndex
File refFasta
File refDict
File refFastaIndex
File ampliconIntervals
Array[File]+ targetIntervals
String basename
String? picardJar
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
...
...
@@ -143,17 +149,17 @@ task CollectTargetedPcrMetrics {
command {
set -e -o pipefail
mkdir -p $(dirname "
$
{basename}")
$
{preCommand}
$
{toolCommand} \
mkdir -p $(dirname "
~
{basename}")
~
{preCommand}
~
{toolCommand} \
CollectTargetedPcrMetrics \
I=
$
{bamFile} \
R=
$
{refFasta} \
AMPLICON_INTERVALS=
$
{ampliconIntervals} \
TARGET_INTERVALS=
$
{sep=" TARGET_INTERVALS=" targetIntervals} \
O=
$
{basename}.targetPcrMetrics \
PER_BASE_COVERAGE=
$
{basename}.targetPcrPerBaseCoverage \
PER_TARGET_COVERAGE=
$
{basename}.targetPcrPerTargetCoverage
I=
~
{bamFile} \
R=
~
{refFasta} \
AMPLICON_INTERVALS=
~
{ampliconIntervals} \
TARGET_INTERVALS=
~
{sep=" TARGET_INTERVALS=" targetIntervals} \
O=
~
{basename}.targetPcrMetrics \
PER_BASE_COVERAGE=
~
{basename}.targetPcrPerBaseCoverage \
PER_TARGET_COVERAGE=
~
{basename}.targetPcrPerTargetCoverage
}
output {
...
...
@@ -169,14 +175,16 @@ task CollectTargetedPcrMetrics {
# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs
task GatherBamFiles {
String? preCommand
Array[File]+ input_bams
String output_bam_path
Int? compression_level
String? picardJar
Float? memory
Float? memoryMultiplier
input {
String? preCommand
Array[File]+ input_bams
String output_bam_path
Int? compression_level
String? picardJar
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
...
...
@@ -186,19 +194,19 @@ task GatherBamFiles {
command {
set -e -o pipefail
$
{preCommand}
$
{toolCommand} \
~
{preCommand}
~
{toolCommand} \
GatherBamFiles \
INPUT=
$
{sep=' INPUT=' input_bams} \
OUTPUT=
$
{output_bam_path} \
INPUT=
~
{sep=' INPUT=' input_bams} \
OUTPUT=
~
{output_bam_path} \
CREATE_INDEX=true \
CREATE_MD5_FILE=true
}
output {
File output_bam = "
$
{output_bam_path}"
File output_bam = "
~
{output_bam_path}"
File output_bam_index = sub(output_bam_path, ".bam$", ".bai")
File output_bam_md5 = "
$
{output_bam_path}.md5"
File output_bam_md5 = "
~
{output_bam_path}.md5"
}
runtime {
...
...
@@ -208,21 +216,23 @@ task GatherBamFiles {
# Mark duplicate reads to avoid counting non-independent observations
task MarkDuplicates {
String? preCommand
Array[File] input_bams
String output_bam_path
String metrics_path
Int? compression_level
String? picardJar
Float? memory
Float? memoryMultiplier
# The program default for READ_NAME_REGEX is appropriate in nearly every case.
# Sometimes we wish to supply "null" in order to turn off optical duplicate detection
# This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing
String? read_name_regex
input {
String? preCommand
Array[File] input_bams
String output_bam_path
String metrics_path
Int? compression_level
String? picardJar
Float? memory
Float? memoryMultiplier
# The program default for READ_NAME_REGEX is appropriate in nearly every case.
# Sometimes we wish to supply "null" in order to turn off optical duplicate detection
# This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing
String? read_name_regex
}
# Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly
# This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment.
# While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"
...
...
@@ -234,15 +244,15 @@ task MarkDuplicates {
command {
set -e -o pipefail
$
{preCommand}
mkdir -p $(dirname
$
{output_bam_path})
$
{toolCommand} \
~
{preCommand}
mkdir -p $(dirname
~
{output_bam_path})
~
{toolCommand} \
MarkDuplicates \
INPUT=
$
{sep=' INPUT=' input_bams} \
OUTPUT=
$
{output_bam_path} \
METRICS_FILE=
$
{metrics_path} \
INPUT=
~
{sep=' INPUT=' input_bams} \
OUTPUT=
~
{output_bam_path} \
METRICS_FILE=
~
{metrics_path} \
VALIDATION_STRINGENCY=SILENT \
$
{"READ_NAME_REGEX=" + read_name_regex} \
~
{"READ_NAME_REGEX=" + read_name_regex} \
OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
CLEAR_DT="false" \
CREATE_INDEX=true \
...
...
@@ -262,16 +272,17 @@ task MarkDuplicates {
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs {
String? preCommand
Array[File] inputVCFs
Array[File] inputVCFsIndexes
String outputVCFpath
Int? compressionLevel
String? picardJar
Float? memory
Float? memoryMultiplier
input {
String? preCommand
Array[File] inputVCFs
Array[File] inputVCFsIndexes
String outputVCFpath
Int? compressionLevel
String? picardJar
Float? memory
Float? memoryMultiplier
}
# Using MergeVcfs instead of GatherVcfs so we can create indices
# See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket
Int mem = ceil(select_first([memory, 4.0]))
...
...
@@ -282,11 +293,11 @@ task MergeVCFs {
command {
set -e -o pipefail
$
{preCommand}
$
{toolCommand} \
~
{preCommand}
~
{toolCommand} \
MergeVcfs \
INPUT=
$
{sep=' INPUT=' inputVCFs} \
OUTPUT=
$
{outputVCFpath}
INPUT=
~
{sep=' INPUT=' inputVCFs} \
OUTPUT=
~
{outputVCFpath}
}
output {
...
...
@@ -300,14 +311,16 @@ task MergeVCFs {
}
task SamToFastq {
String? preCommand
File inputBam
String outputRead1
String? outputRead2
String? outputUnpaired
String? picardJar
Float? memory
Float? memoryMultiplier
input {
String? preCommand
File inputBam
String outputRead1
String? outputRead2
String? outputUnpaired
String? picardJar
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 16.0])) # High memory default to avoid crashes.
String toolCommand = if defined(picardJar)
...
...
@@ -316,13 +329,13 @@ task SamToFastq {
command {
set -e -o pipefail
$
{preCommand}
$
{toolCommand} \
~
{preCommand}
~
{toolCommand} \
SamToFastq \
I=
$
{inputBam} \
$
{"FASTQ=" + outputRead1} \
$
{"SECOND_END_FASTQ=" + outputRead2} \
$
{"UNPAIRED_FASTQ=" + outputUnpaired}
I=
~
{inputBam} \
~
{"FASTQ=" + outputRead1} \
~
{"SECOND_END_FASTQ=" + outputRead2} \
~
{"UNPAIRED_FASTQ=" + outputUnpaired}
}
output {
...
...
@@ -337,14 +350,15 @@ task SamToFastq {
}
task ScatterIntervalList {
String? preCommand
File interval_list
Int scatter_count
String? picardJar
Float? memory
Float? memoryMultiplier
input {
String? preCommand
File interval_list
Int scatter_count
String? picardJar
Float? memory
Float? memoryMultiplier
}
Int mem = ceil(select_first([memory, 4.0]))
String toolCommand = if defined(picardJar)
...
...
@@ -353,15 +367,15 @@ task ScatterIntervalList {
command {
set -e -o pipefail
$
{preCommand}
~
{preCommand}
mkdir scatter_list
$
{toolCommand} \
~
{toolCommand} \
IntervalListTools \
SCATTER_COUNT=
$
{scatter_count} \
SCATTER_COUNT=
~
{scatter_count} \
SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \
UNIQUE=true \
SORT=true \
INPUT=
$
{interval_list} \
INPUT=
~
{interval_list} \
OUTPUT=scatter_list
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment