Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tasks
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
biowdl
tasks
Commits
c5eb0a76
Unverified
Commit
c5eb0a76
authored
6 years ago
by
Peter van 't Hof
Committed by
GitHub
6 years ago
Browse files
Options
Downloads
Plain Diff
Merge pull request #17 from biowdl/BIOWDL-25
Caching related changes
parents
e75a3008
4177a251
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
biopet.wdl
+11
-12
11 additions, 12 deletions
biopet.wdl
common.wdl
+3
-1
3 additions, 1 deletion
common.wdl
gatk.wdl
+137
-136
137 additions, 136 deletions
gatk.wdl
picard.wdl
+11
-11
11 additions, 11 deletions
picard.wdl
with
162 additions
and
160 deletions
biopet.wdl
+
11
−
12
View file @
c5eb0a76
...
...
@@ -117,25 +117,24 @@ task extractAdaptersFastqc {
task FastqSplitter {
String? preCommand
File inputFastq
String outputPath
Int numberChunks
File toolJar
Array[Int] chunks = range(numberChunks)
Array[String] outputPaths
String toolJar
command {
set -e -o pipefail
${preCommand}
mkdir -p ${sep=' ' prefix(outputPath + "/chunk_", chunks)}
if [ ${numberChunks} -gt 1 ]; then
SEP="/${basename(inputFastq)} -o "
java -jar ${toolJar} -I ${inputFastq} -o ${sep='$SEP' prefix(outputPath + "/chunk_", chunks)}/${basename(inputFastq)}
else
ln -sf ${inputFastq} ${outputPath}/chunk_0/${basename(inputFastq)}
fi
mkdir -p $(dirname ${sep=') $(dirname ' outputPaths})
if [ ${length(outputPaths)} -gt 1 ]; then
java -jar ${toolJar} \
-I ${inputFastq} \
-o ${sep=' -o ' outputPaths}
else
ln -sf ${inputFastq} ${outputPaths[0]}
fi
}
output {
Array[File]
outputFastqFiles = glob(outputPath + "/chunk_*/" + basename(inputFastq))
Array[File]
chunks = outputPaths
}
}
...
...
This diff is collapsed.
Click to expand it.
common.wdl
+
3
−
1
View file @
c5eb0a76
...
...
@@ -106,7 +106,9 @@ task appendToStringArray {
}
task createLink {
File inputFile
# Making this of type File will create a link to the copy of the file in the execution
# folder, instead of the actual file.
String inputFile
String outputPath
command {
...
...
This diff is collapsed.
Click to expand it.
gatk.wdl
+
137
−
136
View file @
c5eb0a76
#
Generate
Base Quality Score Recalibration (BQSR) model
task
BaseRecalibrator
{
#
Apply
Base Quality Score Recalibration (BQSR) model
task
ApplyBQSR
{
String? preCommand
String
gatk
_j
ar
String
input
_b
am
String input_bam_i
ndex
String
recalibration_report_filename
Array[File]+ sequence_group_interval
Array[File]+
known_indels_sites_VCFs
Array[File]+ known_indels_sites_indices
File ref
_dict
File ref
_f
asta
File ref_fasta_index
File
gatk
J
ar
File
input
B
am
File inputBamI
ndex
String
outputBamPath
File recalibrationReport
Array[File]+
sequenceGroupInterval
File refDict
File ref
Fasta
File ref
F
asta
Index
Int? compressionLevel
Float? memory
Float? memoryMultiplier
...
...
@@ -19,18 +19,23 @@ task BaseRecalibrator {
command {
set -e -o pipefail
${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \
BaseRecalibrator \
-R ${ref_fasta} \
-I ${input_bam} \
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xms${mem}G -jar ${gatkJar} \
ApplyBQSR \
--create-output-bam-md5 \
--add-output-sam-program-record \
-R ${refFasta} \
-I ${inputBam} \
--use-original-qualities \
-O ${recalibration_report_filename} \
--known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \
-L ${sep=" -L " sequence_group_interval}
-O ${outputBamPath} \
-bqsr ${recalibrationReport} \
--static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
-L ${sep=" -L " sequenceGroupInterval}
}
output {
File recalibration_report = "${recalibration_report_filename}"
File recalibrated_bam = outputBamPath
File recalibrated_bam_checksum = outputBamPath + ".md5"
}
runtime {
...
...
@@ -38,18 +43,19 @@ task BaseRecalibrator {
}
}
#
Apply
Base Quality Score Recalibration (BQSR) model
task
ApplyBQSR
{
#
Generate
Base Quality Score Recalibration (BQSR) model
task
BaseRecalibrator
{
String? preCommand
String gatk_jar
String input_bam
String output_bam_path
File recalibration_report
Array[String] sequence_group_interval
File ref_dict
File ref_fasta
File ref_fasta_index
Int? compression_level
File gatkJar
File inputBam
File inputBamIndex
String recalibrationReportPath
Array[File]+ sequenceGroupInterval
Array[File]+ knownIndelsSitesVCFs
Array[File]+ knownIndelsSitesIndices
File refDict
File refFasta
File refFastaIndex
Float? memory
Float? memoryMultiplier
...
...
@@ -58,23 +64,18 @@ task ApplyBQSR {
command {
set -e -o pipefail
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \
-Xms${mem}G -jar ${gatk_jar} \
ApplyBQSR \
--create-output-bam-md5 \
--add-output-sam-program-record \
-R ${ref_fasta} \
-I ${input_bam} \
java -Xms${mem}G -jar ${gatkJar} \
BaseRecalibrator \
-R ${refFasta} \
-I ${inputBam} \
--use-original-qualities \
-O ${output_bam_path} \
-bqsr ${recalibration_report} \
--static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
-L ${sep=" -L " sequence_group_interval}
-O ${recalibrationReportPath} \
--known-sites ${sep=" --known-sites " knownIndelsSitesVCFs} \
-L ${sep=" -L " sequenceGroupInterval}
}
output {
File recalibrated_bam = "${output_bam_path}"
File recalibrated_bam_checksum = "${output_bam_path}.md5"
File recalibrationReport = recalibrationReportPath
}
runtime {
...
...
@@ -82,13 +83,21 @@ task ApplyBQSR {
}
}
# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
task CombineGVCFs {
String? preCommand
String gatk_jar
Array[File] input_bqsr_reports
String output_report_filepath
Array[File]+ gvcfFiles
Array[File]+ gvcfFileIndexes
Array[File]+ intervals
String outputPath
String gatkJar
File refFasta
File refFastaIndex
File refDict
Int? compressionLevel
Float? memory
Float? memoryMultiplier
...
...
@@ -96,14 +105,24 @@ task GatherBqsrReports {
command {
set -e -o pipefail
${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \
GatherBQSRReports \
-I ${sep=' -I ' input_bqsr_reports} \
-O ${output_report_filepath}
if [ ${length(gvcfFiles)} -gt 1 ]; then
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xmx${mem}G -jar ${gatkJar} \
CombineGVCFs \
-R ${refFasta} \
-O ${outputPath} \
-V ${sep=' -V ' gvcfFiles} \
-L ${sep=' -L ' intervals}
else # TODO this should be handeled in wdl
ln -sf ${select_first(gvcfFiles)} ${outputPath}
ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi
fi
}
output {
File output_bqsr_report = "${output_report_filepath}"
File outputGVCF = outputPath
File outputGVCFindex = outputPath + ".tbi"
}
runtime {
...
...
@@ -111,19 +130,12 @@ task GatherBqsrReports {
}
}
# C
all variants on a single sample with HaplotypeCaller to produce a GVCF
task
HaplotypeCallerGvcf
{
# C
ombine multiple recalibration tables from scattered BaseRecalibrator runs
task
GatherBqsrReports
{
String? preCommand
Array[File]+ input_bams
Array[File]+ input_bams_index
Array[File]+ interval_list
String gvcf_basename
File ref_dict
File ref_fasta
File ref_fasta_index
Float? contamination
Int? compression_level
String gatk_jar
String gatkJar
Array[File] inputBQSRreports
String outputReportPath
Float? memory
Float? memoryMultiplier
...
...
@@ -132,20 +144,14 @@ task HaplotypeCallerGvcf {
command {
set -e -o pipefail
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \
-Xmx${mem}G -jar ${gatk_jar} \
HaplotypeCaller \
-R ${ref_fasta} \
-O ${gvcf_basename}.vcf.gz \
-I ${sep=" -I " input_bams} \
-L ${sep=' -L ' interval_list} \
-contamination ${default=0 contamination} \
-ERC GVCF
java -Xms${mem}G -jar ${gatkJar} \
GatherBQSRReports \
-I ${sep=' -I ' inputBQSRreports} \
-O ${outputReportPath}
}
output {
File output_gvcf = "${gvcf_basename}.vcf.gz"
File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi"
File outputBQSRreport = outputReportPath
}
runtime {
...
...
@@ -155,22 +161,22 @@ task HaplotypeCallerGvcf {
task GenotypeGVCFs {
String? preCommand
File gvcf
_f
iles
File gvcf
_f
ile
_i
ndexes
File gvcf
F
iles
File gvcf
F
ile
I
ndexes
Array[File]+ intervals
String output
_basename
String output
Path
String gatk
_j
ar
String gatk
J
ar
File ref
_f
asta
File ref
_f
asta
_i
ndex
File ref
_d
ict
File ref
F
asta
File ref
F
asta
I
ndex
File ref
D
ict
File dbsnp
_vcf
File dbsnp
_vcf_
index
File dbsnp
VCF
File dbsnp
VCF
index
Int? compression
_l
evel
Int? compression
L
evel
Float? memory
Float? memoryMultiplier
...
...
@@ -179,22 +185,22 @@ task GenotypeGVCFs {
set -e -o pipefail
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression
_l
evel} \
-Xmx${mem}G -jar ${gatk
_j
ar} \
java ${"-Dsamjdk.compression_level=" + compression
L
evel} \
-Xmx${mem}G -jar ${gatk
J
ar} \
GenotypeGVCFs \
-R ${ref
_f
asta} \
-O ${output
_basename + ".vcf.gz"
} \
-D ${dbsnp
_vcf
} \
-R ${ref
F
asta} \
-O ${output
Path
} \
-D ${dbsnp
VCF
} \
-G StandardAnnotation \
--only-output-calls-starting-in-intervals \
-new-qual \
-V ${gvcf
_f
iles} \
-V ${gvcf
F
iles} \
-L ${sep=' -L ' intervals}
}
output {
File output
_vcf
= output
_basename + ".vcf.gz"
File output
_vcf_
index = output
_basename + ".vcf.gz
.tbi"
File output
VCF
= output
Path
File output
VCF
index = output
Path + "
.tbi"
}
runtime{
...
...
@@ -202,21 +208,20 @@ task GenotypeGVCFs {
}
}
task CombineGVCFs {
# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
String? preCommand
Array[File]+ gvcf_files
Array[File]+ gvcf_file_indexes
Array[File]+ intervals
String output_basename
String gatk_jar
File ref_fasta
File ref_fasta_index
File ref_dict
Array[File]+ inputBams
Array[File]+ inputBamsIndex
Array[File]+ intervalList
String gvcfPath
File refDict
File refFasta
File refFastaIndex
Float? contamination
Int? compressionLevel
String gatkJar
Int? compression_level
Float? memory
Float? memoryMultiplier
...
...
@@ -224,24 +229,20 @@ task CombineGVCFs {
command {
set -e -o pipefail
${preCommand}
if [ ${length(gvcf_files)} -gt 1 ]; then
java ${"-Dsamjdk.compression_level=" + compression_level} \
-Xmx${mem}G -jar ${gatk_jar} \
CombineGVCFs \
-R ${ref_fasta} \
-O ${output_basename + ".vcf.gz"} \
-V ${sep=' -V ' gvcf_files} \
-L ${sep=' -L ' intervals}
else
ln -sf ${select_first(gvcf_files)} ${output_basename + ".vcf.gz"}
ln -sf ${select_first(gvcf_files)}.tbi ${output_basename + ".vcf.gz.tbi"}
fi
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xmx${mem}G -jar ${gatkJar} \
HaplotypeCaller \
-R ${refFasta} \
-O ${gvcfPath} \
-I ${sep=" -I " inputBams} \
-L ${sep=' -L ' intervalList} \
-contamination ${default=0 contamination} \
-ERC GVCF
}
output {
File output
_gvcf = output_basename + ".vcf.gz"
File output
_gvcf_
index =
output_basename + ".vcf.gz
.tbi"
File output
GVCF = gvcfPath
File output
GVCF
index =
gvcfPath + "
.tbi"
}
runtime {
...
...
@@ -252,13 +253,13 @@ task CombineGVCFs {
task SplitNCigarReads {
String? preCommand
File input
_b
am
File input
_bam_i
ndex
File ref
_f
asta
File ref
_f
asta
_i
ndex
File ref
_d
ict
String output
_b
am
String gatk
_j
ar
File input
B
am
File input
BamI
ndex
File ref
F
asta
File ref
F
asta
I
ndex
File ref
D
ict
String output
B
am
String gatk
J
ar
Array[File]+ intervals
Float? memory
...
...
@@ -268,17 +269,17 @@ task SplitNCigarReads {
command {
set -e -o pipefail
${preCommand}
java -Xms${mem}G -jar ${gatk
_j
ar} \
java -Xms${mem}G -jar ${gatk
J
ar} \
SplitNCigarReads \
-I ${input
_b
am} \
-R ${ref
_f
asta} \
-O ${output
_b
am} \
-I ${input
B
am} \
-R ${ref
F
asta} \
-O ${output
B
am} \
-L ${sep=' -L ' intervals}
}
output {
File bam = output
_b
am
File bam
_i
ndex = sub(output
_b
am, "\\.bam$", ".bai")
File bam = output
B
am
File bam
I
ndex = sub(output
B
am, "\\.bam$", ".bai")
}
runtime {
...
...
This diff is collapsed.
Click to expand it.
picard.wdl
+
11
−
11
View file @
c5eb0a76
...
...
@@ -120,11 +120,11 @@ task MarkDuplicates {
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs {
String? preCommand
Array[File] input
_vcf
s
Array[File] input
_vcfs_i
ndexes
String output
_vcf_
path
Int? compression
_l
evel
String picard
_j
ar
Array[File] input
VCF
s
Array[File] input
VCFsI
ndexes
String output
VCF
path
Int? compression
L
evel
String picard
J
ar
Float? memory
Float? memoryMultiplier
...
...
@@ -135,16 +135,16 @@ task MergeVCFs {
command {
set -e -o pipefail
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression
_l
evel} \
-Xmx${mem}G -jar ${picard
_j
ar} \
java ${"-Dsamjdk.compression_level=" + compression
L
evel} \
-Xmx${mem}G -jar ${picard
J
ar} \
MergeVcfs \
INPUT=${sep=' INPUT=' input
_vcf
s} \
OUTPUT=${output
_vcf_
path}
INPUT=${sep=' INPUT=' input
VCF
s} \
OUTPUT=${output
VCF
path}
}
output {
File output
_vcf
= output
_vcf_
path
File output
_vcf_
index = output
_vcf_
path + ".tbi"
File output
VCF
= output
VCF
path
File output
VCF
index = output
VCF
path + ".tbi"
}
runtime {
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment