Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tasks
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
biowdl
tasks
Commits
c5eb0a76
Unverified
Commit
c5eb0a76
authored
6 years ago
by
Peter van 't Hof
Committed by
GitHub
6 years ago
Browse files
Options
Downloads
Plain Diff
Merge pull request #17 from biowdl/BIOWDL-25
Caching related changes
parents
e75a3008
4177a251
No related branches found
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
biopet.wdl
+11
-12
11 additions, 12 deletions
biopet.wdl
common.wdl
+3
-1
3 additions, 1 deletion
common.wdl
gatk.wdl
+137
-136
137 additions, 136 deletions
gatk.wdl
picard.wdl
+11
-11
11 additions, 11 deletions
picard.wdl
with
162 additions
and
160 deletions
biopet.wdl
+
11
−
12
View file @
c5eb0a76
...
@@ -117,25 +117,24 @@ task extractAdaptersFastqc {
...
@@ -117,25 +117,24 @@ task extractAdaptersFastqc {
task FastqSplitter {
task FastqSplitter {
String? preCommand
String? preCommand
File inputFastq
File inputFastq
String outputPath
Array[String] outputPaths
Int numberChunks
String toolJar
File toolJar
Array[Int] chunks = range(numberChunks)
command {
command {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
mkdir -p ${sep=' ' prefix(outputPath + "/chunk_", chunks)}
mkdir -p $(dirname ${sep=') $(dirname ' outputPaths})
if [ ${numberChunks} -gt 1 ]; then
if [ ${length(outputPaths)} -gt 1 ]; then
SEP="/${basename(inputFastq)} -o "
java -jar ${toolJar} \
java -jar ${toolJar} -I ${inputFastq} -o ${sep='$SEP' prefix(outputPath + "/chunk_", chunks)}/${basename(inputFastq)}
-I ${inputFastq} \
else
-o ${sep=' -o ' outputPaths}
ln -sf ${inputFastq} ${outputPath}/chunk_0/${basename(inputFastq)}
else
fi
ln -sf ${inputFastq} ${outputPaths[0]}
fi
}
}
output {
output {
Array[File]
outputFastqFiles = glob(outputPath + "/chunk_*/" + basename(inputFastq))
Array[File]
chunks = outputPaths
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
common.wdl
+
3
−
1
View file @
c5eb0a76
...
@@ -106,7 +106,9 @@ task appendToStringArray {
...
@@ -106,7 +106,9 @@ task appendToStringArray {
}
}
task createLink {
task createLink {
File inputFile
# Making this of type File will create a link to the copy of the file in the execution
# folder, instead of the actual file.
String inputFile
String outputPath
String outputPath
command {
command {
...
...
This diff is collapsed.
Click to expand it.
gatk.wdl
+
137
−
136
View file @
c5eb0a76
#
Generate
Base Quality Score Recalibration (BQSR) model
#
Apply
Base Quality Score Recalibration (BQSR) model
task
BaseRecalibrator
{
task
ApplyBQSR
{
String? preCommand
String? preCommand
String
gatk
_j
ar
File
gatk
J
ar
String
input
_b
am
File
input
B
am
String input_bam_i
ndex
File inputBamI
ndex
String
recalibration_report_filename
String
outputBamPath
Array[File]+ sequence_group_interval
File recalibrationReport
Array[File]+
known_indels_sites_VCFs
Array[File]+
sequenceGroupInterval
Array[File]+ known_indels_sites_indices
File refDict
File ref
_dict
File ref
Fasta
File ref
_f
asta
File ref
F
asta
Index
File ref_fasta_index
Int? compressionLevel
Float? memory
Float? memory
Float? memoryMultiplier
Float? memoryMultiplier
...
@@ -19,18 +19,23 @@ task BaseRecalibrator {
...
@@ -19,18 +19,23 @@ task BaseRecalibrator {
command {
command {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
BaseRecalibrator \
-Xms${mem}G -jar ${gatkJar} \
-R ${ref_fasta} \
ApplyBQSR \
-I ${input_bam} \
--create-output-bam-md5 \
--add-output-sam-program-record \
-R ${refFasta} \
-I ${inputBam} \
--use-original-qualities \
--use-original-qualities \
-O ${recalibration_report_filename} \
-O ${outputBamPath} \
--known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \
-bqsr ${recalibrationReport} \
-L ${sep=" -L " sequence_group_interval}
--static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
-L ${sep=" -L " sequenceGroupInterval}
}
}
output {
output {
File recalibration_report = "${recalibration_report_filename}"
File recalibrated_bam = outputBamPath
File recalibrated_bam_checksum = outputBamPath + ".md5"
}
}
runtime {
runtime {
...
@@ -38,18 +43,19 @@ task BaseRecalibrator {
...
@@ -38,18 +43,19 @@ task BaseRecalibrator {
}
}
}
}
#
Apply
Base Quality Score Recalibration (BQSR) model
#
Generate
Base Quality Score Recalibration (BQSR) model
task
ApplyBQSR
{
task
BaseRecalibrator
{
String? preCommand
String? preCommand
String gatk_jar
File gatkJar
String input_bam
File inputBam
String output_bam_path
File inputBamIndex
File recalibration_report
String recalibrationReportPath
Array[String] sequence_group_interval
Array[File]+ sequenceGroupInterval
File ref_dict
Array[File]+ knownIndelsSitesVCFs
File ref_fasta
Array[File]+ knownIndelsSitesIndices
File ref_fasta_index
File refDict
Int? compression_level
File refFasta
File refFastaIndex
Float? memory
Float? memory
Float? memoryMultiplier
Float? memoryMultiplier
...
@@ -58,23 +64,18 @@ task ApplyBQSR {
...
@@ -58,23 +64,18 @@ task ApplyBQSR {
command {
command {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \
java -Xms${mem}G -jar ${gatkJar} \
-Xms${mem}G -jar ${gatk_jar} \
BaseRecalibrator \
ApplyBQSR \
-R ${refFasta} \
--create-output-bam-md5 \
-I ${inputBam} \
--add-output-sam-program-record \
-R ${ref_fasta} \
-I ${input_bam} \
--use-original-qualities \
--use-original-qualities \
-O ${output_bam_path} \
-O ${recalibrationReportPath} \
-bqsr ${recalibration_report} \
--known-sites ${sep=" --known-sites " knownIndelsSitesVCFs} \
--static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
-L ${sep=" -L " sequenceGroupInterval}
-L ${sep=" -L " sequence_group_interval}
}
}
output {
output {
File recalibrated_bam = "${output_bam_path}"
File recalibrationReport = recalibrationReportPath
File recalibrated_bam_checksum = "${output_bam_path}.md5"
}
}
runtime {
runtime {
...
@@ -82,13 +83,21 @@ task ApplyBQSR {
...
@@ -82,13 +83,21 @@ task ApplyBQSR {
}
}
}
}
# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task CombineGVCFs {
task GatherBqsrReports {
String? preCommand
String? preCommand
String gatk_jar
Array[File]+ gvcfFiles
Array[File] input_bqsr_reports
Array[File]+ gvcfFileIndexes
String output_report_filepath
Array[File]+ intervals
String outputPath
String gatkJar
File refFasta
File refFastaIndex
File refDict
Int? compressionLevel
Float? memory
Float? memory
Float? memoryMultiplier
Float? memoryMultiplier
...
@@ -96,14 +105,24 @@ task GatherBqsrReports {
...
@@ -96,14 +105,24 @@ task GatherBqsrReports {
command {
command {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \
GatherBQSRReports \
if [ ${length(gvcfFiles)} -gt 1 ]; then
-I ${sep=' -I ' input_bqsr_reports} \
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-O ${output_report_filepath}
-Xmx${mem}G -jar ${gatkJar} \
CombineGVCFs \
-R ${refFasta} \
-O ${outputPath} \
-V ${sep=' -V ' gvcfFiles} \
-L ${sep=' -L ' intervals}
else # TODO this should be handeled in wdl
ln -sf ${select_first(gvcfFiles)} ${outputPath}
ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi
fi
}
}
output {
output {
File output_bqsr_report = "${output_report_filepath}"
File outputGVCF = outputPath
File outputGVCFindex = outputPath + ".tbi"
}
}
runtime {
runtime {
...
@@ -111,19 +130,12 @@ task GatherBqsrReports {
...
@@ -111,19 +130,12 @@ task GatherBqsrReports {
}
}
}
}
# C
all variants on a single sample with HaplotypeCaller to produce a GVCF
# C
ombine multiple recalibration tables from scattered BaseRecalibrator runs
task
HaplotypeCallerGvcf
{
task
GatherBqsrReports
{
String? preCommand
String? preCommand
Array[File]+ input_bams
String gatkJar
Array[File]+ input_bams_index
Array[File] inputBQSRreports
Array[File]+ interval_list
String outputReportPath
String gvcf_basename
File ref_dict
File ref_fasta
File ref_fasta_index
Float? contamination
Int? compression_level
String gatk_jar
Float? memory
Float? memory
Float? memoryMultiplier
Float? memoryMultiplier
...
@@ -132,20 +144,14 @@ task HaplotypeCallerGvcf {
...
@@ -132,20 +144,14 @@ task HaplotypeCallerGvcf {
command {
command {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \
java -Xms${mem}G -jar ${gatkJar} \
-Xmx${mem}G -jar ${gatk_jar} \
GatherBQSRReports \
HaplotypeCaller \
-I ${sep=' -I ' inputBQSRreports} \
-R ${ref_fasta} \
-O ${outputReportPath}
-O ${gvcf_basename}.vcf.gz \
-I ${sep=" -I " input_bams} \
-L ${sep=' -L ' interval_list} \
-contamination ${default=0 contamination} \
-ERC GVCF
}
}
output {
output {
File output_gvcf = "${gvcf_basename}.vcf.gz"
File outputBQSRreport = outputReportPath
File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi"
}
}
runtime {
runtime {
...
@@ -155,22 +161,22 @@ task HaplotypeCallerGvcf {
...
@@ -155,22 +161,22 @@ task HaplotypeCallerGvcf {
task GenotypeGVCFs {
task GenotypeGVCFs {
String? preCommand
String? preCommand
File gvcf
_f
iles
File gvcf
F
iles
File gvcf
_f
ile
_i
ndexes
File gvcf
F
ile
I
ndexes
Array[File]+ intervals
Array[File]+ intervals
String output
_basename
String output
Path
String gatk
_j
ar
String gatk
J
ar
File ref
_f
asta
File ref
F
asta
File ref
_f
asta
_i
ndex
File ref
F
asta
I
ndex
File ref
_d
ict
File ref
D
ict
File dbsnp
_vcf
File dbsnp
VCF
File dbsnp
_vcf_
index
File dbsnp
VCF
index
Int? compression
_l
evel
Int? compression
L
evel
Float? memory
Float? memory
Float? memoryMultiplier
Float? memoryMultiplier
...
@@ -179,22 +185,22 @@ task GenotypeGVCFs {
...
@@ -179,22 +185,22 @@ task GenotypeGVCFs {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression
_l
evel} \
java ${"-Dsamjdk.compression_level=" + compression
L
evel} \
-Xmx${mem}G -jar ${gatk
_j
ar} \
-Xmx${mem}G -jar ${gatk
J
ar} \
GenotypeGVCFs \
GenotypeGVCFs \
-R ${ref
_f
asta} \
-R ${ref
F
asta} \
-O ${output
_basename + ".vcf.gz"
} \
-O ${output
Path
} \
-D ${dbsnp
_vcf
} \
-D ${dbsnp
VCF
} \
-G StandardAnnotation \
-G StandardAnnotation \
--only-output-calls-starting-in-intervals \
--only-output-calls-starting-in-intervals \
-new-qual \
-new-qual \
-V ${gvcf
_f
iles} \
-V ${gvcf
F
iles} \
-L ${sep=' -L ' intervals}
-L ${sep=' -L ' intervals}
}
}
output {
output {
File output
_vcf
= output
_basename + ".vcf.gz"
File output
VCF
= output
Path
File output
_vcf_
index = output
_basename + ".vcf.gz
.tbi"
File output
VCF
index = output
Path + "
.tbi"
}
}
runtime{
runtime{
...
@@ -202,21 +208,20 @@ task GenotypeGVCFs {
...
@@ -202,21 +208,20 @@ task GenotypeGVCFs {
}
}
}
}
task CombineGVCFs {
# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
String? preCommand
String? preCommand
Array[File]+ gvcf_files
Array[File]+ inputBams
Array[File]+ gvcf_file_indexes
Array[File]+ inputBamsIndex
Array[File]+ intervals
Array[File]+ intervalList
String gvcfPath
String output_basename
File refDict
File refFasta
String gatk_jar
File refFastaIndex
Float? contamination
File ref_fasta
Int? compressionLevel
File ref_fasta_index
String gatkJar
File ref_dict
Int? compression_level
Float? memory
Float? memory
Float? memoryMultiplier
Float? memoryMultiplier
...
@@ -224,24 +229,20 @@ task CombineGVCFs {
...
@@ -224,24 +229,20 @@ task CombineGVCFs {
command {
command {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
if [ ${length(gvcf_files)} -gt 1 ]; then
-Xmx${mem}G -jar ${gatkJar} \
java ${"-Dsamjdk.compression_level=" + compression_level} \
HaplotypeCaller \
-Xmx${mem}G -jar ${gatk_jar} \
-R ${refFasta} \
CombineGVCFs \
-O ${gvcfPath} \
-R ${ref_fasta} \
-I ${sep=" -I " inputBams} \
-O ${output_basename + ".vcf.gz"} \
-L ${sep=' -L ' intervalList} \
-V ${sep=' -V ' gvcf_files} \
-contamination ${default=0 contamination} \
-L ${sep=' -L ' intervals}
-ERC GVCF
else
ln -sf ${select_first(gvcf_files)} ${output_basename + ".vcf.gz"}
ln -sf ${select_first(gvcf_files)}.tbi ${output_basename + ".vcf.gz.tbi"}
fi
}
}
output {
output {
File output
_gvcf = output_basename + ".vcf.gz"
File output
GVCF = gvcfPath
File output
_gvcf_
index =
output_basename + ".vcf.gz
.tbi"
File output
GVCF
index =
gvcfPath + "
.tbi"
}
}
runtime {
runtime {
...
@@ -252,13 +253,13 @@ task CombineGVCFs {
...
@@ -252,13 +253,13 @@ task CombineGVCFs {
task SplitNCigarReads {
task SplitNCigarReads {
String? preCommand
String? preCommand
File input
_b
am
File input
B
am
File input
_bam_i
ndex
File input
BamI
ndex
File ref
_f
asta
File ref
F
asta
File ref
_f
asta
_i
ndex
File ref
F
asta
I
ndex
File ref
_d
ict
File ref
D
ict
String output
_b
am
String output
B
am
String gatk
_j
ar
String gatk
J
ar
Array[File]+ intervals
Array[File]+ intervals
Float? memory
Float? memory
...
@@ -268,17 +269,17 @@ task SplitNCigarReads {
...
@@ -268,17 +269,17 @@ task SplitNCigarReads {
command {
command {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
java -Xms${mem}G -jar ${gatk
_j
ar} \
java -Xms${mem}G -jar ${gatk
J
ar} \
SplitNCigarReads \
SplitNCigarReads \
-I ${input
_b
am} \
-I ${input
B
am} \
-R ${ref
_f
asta} \
-R ${ref
F
asta} \
-O ${output
_b
am} \
-O ${output
B
am} \
-L ${sep=' -L ' intervals}
-L ${sep=' -L ' intervals}
}
}
output {
output {
File bam = output
_b
am
File bam = output
B
am
File bam
_i
ndex = sub(output
_b
am, "\\.bam$", ".bai")
File bam
I
ndex = sub(output
B
am, "\\.bam$", ".bai")
}
}
runtime {
runtime {
...
...
This diff is collapsed.
Click to expand it.
picard.wdl
+
11
−
11
View file @
c5eb0a76
...
@@ -120,11 +120,11 @@ task MarkDuplicates {
...
@@ -120,11 +120,11 @@ task MarkDuplicates {
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs {
task MergeVCFs {
String? preCommand
String? preCommand
Array[File] input
_vcf
s
Array[File] input
VCF
s
Array[File] input
_vcfs_i
ndexes
Array[File] input
VCFsI
ndexes
String output
_vcf_
path
String output
VCF
path
Int? compression
_l
evel
Int? compression
L
evel
String picard
_j
ar
String picard
J
ar
Float? memory
Float? memory
Float? memoryMultiplier
Float? memoryMultiplier
...
@@ -135,16 +135,16 @@ task MergeVCFs {
...
@@ -135,16 +135,16 @@ task MergeVCFs {
command {
command {
set -e -o pipefail
set -e -o pipefail
${preCommand}
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression
_l
evel} \
java ${"-Dsamjdk.compression_level=" + compression
L
evel} \
-Xmx${mem}G -jar ${picard
_j
ar} \
-Xmx${mem}G -jar ${picard
J
ar} \
MergeVcfs \
MergeVcfs \
INPUT=${sep=' INPUT=' input
_vcf
s} \
INPUT=${sep=' INPUT=' input
VCF
s} \
OUTPUT=${output
_vcf_
path}
OUTPUT=${output
VCF
path}
}
}
output {
output {
File output
_vcf
= output
_vcf_
path
File output
VCF
= output
VCF
path
File output
_vcf_
index = output
_vcf_
path + ".tbi"
File output
VCF
index = output
VCF
path + ".tbi"
}
}
runtime {
runtime {
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment