Skip to content
Snippets Groups Projects
Unverified Commit c5eb0a76 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub
Browse files

Merge pull request #17 from biowdl/BIOWDL-25

Caching related changes
parents e75a3008 4177a251
No related branches found
No related tags found
No related merge requests found
...@@ -117,25 +117,24 @@ task extractAdaptersFastqc { ...@@ -117,25 +117,24 @@ task extractAdaptersFastqc {
task FastqSplitter { task FastqSplitter {
String? preCommand String? preCommand
File inputFastq File inputFastq
String outputPath Array[String] outputPaths
Int numberChunks String toolJar
File toolJar
Array[Int] chunks = range(numberChunks)
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
mkdir -p ${sep=' ' prefix(outputPath + "/chunk_", chunks)} mkdir -p $(dirname ${sep=') $(dirname ' outputPaths})
if [ ${numberChunks} -gt 1 ]; then if [ ${length(outputPaths)} -gt 1 ]; then
SEP="/${basename(inputFastq)} -o " java -jar ${toolJar} \
java -jar ${toolJar} -I ${inputFastq} -o ${sep='$SEP' prefix(outputPath + "/chunk_", chunks)}/${basename(inputFastq)} -I ${inputFastq} \
else -o ${sep=' -o ' outputPaths}
ln -sf ${inputFastq} ${outputPath}/chunk_0/${basename(inputFastq)} else
fi ln -sf ${inputFastq} ${outputPaths[0]}
fi
} }
output { output {
Array[File] outputFastqFiles = glob(outputPath + "/chunk_*/" + basename(inputFastq)) Array[File] chunks = outputPaths
} }
} }
......
...@@ -106,7 +106,9 @@ task appendToStringArray { ...@@ -106,7 +106,9 @@ task appendToStringArray {
} }
task createLink { task createLink {
File inputFile # Making this of type File will create a link to the copy of the file in the execution
# folder, instead of the actual file.
String inputFile
String outputPath String outputPath
command { command {
......
# Generate Base Quality Score Recalibration (BQSR) model # Apply Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator { task ApplyBQSR {
String? preCommand String? preCommand
String gatk_jar File gatkJar
String input_bam File inputBam
String input_bam_index File inputBamIndex
String recalibration_report_filename String outputBamPath
Array[File]+ sequence_group_interval File recalibrationReport
Array[File]+ known_indels_sites_VCFs Array[File]+ sequenceGroupInterval
Array[File]+ known_indels_sites_indices File refDict
File ref_dict File refFasta
File ref_fasta File refFastaIndex
File ref_fasta_index Int? compressionLevel
Float? memory Float? memory
Float? memoryMultiplier Float? memoryMultiplier
...@@ -19,18 +19,23 @@ task BaseRecalibrator { ...@@ -19,18 +19,23 @@ task BaseRecalibrator {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \ java ${"-Dsamjdk.compression_level=" + compressionLevel} \
BaseRecalibrator \ -Xms${mem}G -jar ${gatkJar} \
-R ${ref_fasta} \ ApplyBQSR \
-I ${input_bam} \ --create-output-bam-md5 \
--add-output-sam-program-record \
-R ${refFasta} \
-I ${inputBam} \
--use-original-qualities \ --use-original-qualities \
-O ${recalibration_report_filename} \ -O ${outputBamPath} \
--known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \ -bqsr ${recalibrationReport} \
-L ${sep=" -L " sequence_group_interval} --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
-L ${sep=" -L " sequenceGroupInterval}
} }
output { output {
File recalibration_report = "${recalibration_report_filename}" File recalibrated_bam = outputBamPath
File recalibrated_bam_checksum = outputBamPath + ".md5"
} }
runtime { runtime {
...@@ -38,18 +43,19 @@ task BaseRecalibrator { ...@@ -38,18 +43,19 @@ task BaseRecalibrator {
} }
} }
# Apply Base Quality Score Recalibration (BQSR) model # Generate Base Quality Score Recalibration (BQSR) model
task ApplyBQSR { task BaseRecalibrator {
String? preCommand String? preCommand
String gatk_jar File gatkJar
String input_bam File inputBam
String output_bam_path File inputBamIndex
File recalibration_report String recalibrationReportPath
Array[String] sequence_group_interval Array[File]+ sequenceGroupInterval
File ref_dict Array[File]+ knownIndelsSitesVCFs
File ref_fasta Array[File]+ knownIndelsSitesIndices
File ref_fasta_index File refDict
Int? compression_level File refFasta
File refFastaIndex
Float? memory Float? memory
Float? memoryMultiplier Float? memoryMultiplier
...@@ -58,23 +64,18 @@ task ApplyBQSR { ...@@ -58,23 +64,18 @@ task ApplyBQSR {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \ java -Xms${mem}G -jar ${gatkJar} \
-Xms${mem}G -jar ${gatk_jar} \ BaseRecalibrator \
ApplyBQSR \ -R ${refFasta} \
--create-output-bam-md5 \ -I ${inputBam} \
--add-output-sam-program-record \
-R ${ref_fasta} \
-I ${input_bam} \
--use-original-qualities \ --use-original-qualities \
-O ${output_bam_path} \ -O ${recalibrationReportPath} \
-bqsr ${recalibration_report} \ --known-sites ${sep=" --known-sites " knownIndelsSitesVCFs} \
--static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \ -L ${sep=" -L " sequenceGroupInterval}
-L ${sep=" -L " sequence_group_interval}
} }
output { output {
File recalibrated_bam = "${output_bam_path}" File recalibrationReport = recalibrationReportPath
File recalibrated_bam_checksum = "${output_bam_path}.md5"
} }
runtime { runtime {
...@@ -82,13 +83,21 @@ task ApplyBQSR { ...@@ -82,13 +83,21 @@ task ApplyBQSR {
} }
} }
# Combine multiple recalibration tables from scattered BaseRecalibrator runs task CombineGVCFs {
task GatherBqsrReports {
String? preCommand String? preCommand
String gatk_jar Array[File]+ gvcfFiles
Array[File] input_bqsr_reports Array[File]+ gvcfFileIndexes
String output_report_filepath Array[File]+ intervals
String outputPath
String gatkJar
File refFasta
File refFastaIndex
File refDict
Int? compressionLevel
Float? memory Float? memory
Float? memoryMultiplier Float? memoryMultiplier
...@@ -96,14 +105,24 @@ task GatherBqsrReports { ...@@ -96,14 +105,24 @@ task GatherBqsrReports {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \
GatherBQSRReports \ if [ ${length(gvcfFiles)} -gt 1 ]; then
-I ${sep=' -I ' input_bqsr_reports} \ java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-O ${output_report_filepath} -Xmx${mem}G -jar ${gatkJar} \
CombineGVCFs \
-R ${refFasta} \
-O ${outputPath} \
-V ${sep=' -V ' gvcfFiles} \
-L ${sep=' -L ' intervals}
else # TODO this should be handeled in wdl
ln -sf ${select_first(gvcfFiles)} ${outputPath}
ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi
fi
} }
output { output {
File output_bqsr_report = "${output_report_filepath}" File outputGVCF = outputPath
File outputGVCFindex = outputPath + ".tbi"
} }
runtime { runtime {
...@@ -111,19 +130,12 @@ task GatherBqsrReports { ...@@ -111,19 +130,12 @@ task GatherBqsrReports {
} }
} }
# Call variants on a single sample with HaplotypeCaller to produce a GVCF # Combine multiple recalibration tables from scattered BaseRecalibrator runs
task HaplotypeCallerGvcf { task GatherBqsrReports {
String? preCommand String? preCommand
Array[File]+ input_bams String gatkJar
Array[File]+ input_bams_index Array[File] inputBQSRreports
Array[File]+ interval_list String outputReportPath
String gvcf_basename
File ref_dict
File ref_fasta
File ref_fasta_index
Float? contamination
Int? compression_level
String gatk_jar
Float? memory Float? memory
Float? memoryMultiplier Float? memoryMultiplier
...@@ -132,20 +144,14 @@ task HaplotypeCallerGvcf { ...@@ -132,20 +144,14 @@ task HaplotypeCallerGvcf {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \ java -Xms${mem}G -jar ${gatkJar} \
-Xmx${mem}G -jar ${gatk_jar} \ GatherBQSRReports \
HaplotypeCaller \ -I ${sep=' -I ' inputBQSRreports} \
-R ${ref_fasta} \ -O ${outputReportPath}
-O ${gvcf_basename}.vcf.gz \
-I ${sep=" -I " input_bams} \
-L ${sep=' -L ' interval_list} \
-contamination ${default=0 contamination} \
-ERC GVCF
} }
output { output {
File output_gvcf = "${gvcf_basename}.vcf.gz" File outputBQSRreport = outputReportPath
File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi"
} }
runtime { runtime {
...@@ -155,22 +161,22 @@ task HaplotypeCallerGvcf { ...@@ -155,22 +161,22 @@ task HaplotypeCallerGvcf {
task GenotypeGVCFs { task GenotypeGVCFs {
String? preCommand String? preCommand
File gvcf_files File gvcfFiles
File gvcf_file_indexes File gvcfFileIndexes
Array[File]+ intervals Array[File]+ intervals
String output_basename String outputPath
String gatk_jar String gatkJar
File ref_fasta File refFasta
File ref_fasta_index File refFastaIndex
File ref_dict File refDict
File dbsnp_vcf File dbsnpVCF
File dbsnp_vcf_index File dbsnpVCFindex
Int? compression_level Int? compressionLevel
Float? memory Float? memory
Float? memoryMultiplier Float? memoryMultiplier
...@@ -179,22 +185,22 @@ task GenotypeGVCFs { ...@@ -179,22 +185,22 @@ task GenotypeGVCFs {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \ java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xmx${mem}G -jar ${gatk_jar} \ -Xmx${mem}G -jar ${gatkJar} \
GenotypeGVCFs \ GenotypeGVCFs \
-R ${ref_fasta} \ -R ${refFasta} \
-O ${output_basename + ".vcf.gz"} \ -O ${outputPath} \
-D ${dbsnp_vcf} \ -D ${dbsnpVCF} \
-G StandardAnnotation \ -G StandardAnnotation \
--only-output-calls-starting-in-intervals \ --only-output-calls-starting-in-intervals \
-new-qual \ -new-qual \
-V ${gvcf_files} \ -V ${gvcfFiles} \
-L ${sep=' -L ' intervals} -L ${sep=' -L ' intervals}
} }
output { output {
File output_vcf = output_basename + ".vcf.gz" File outputVCF = outputPath
File output_vcf_index = output_basename + ".vcf.gz.tbi" File outputVCFindex = outputPath + ".tbi"
} }
runtime{ runtime{
...@@ -202,21 +208,20 @@ task GenotypeGVCFs { ...@@ -202,21 +208,20 @@ task GenotypeGVCFs {
} }
} }
task CombineGVCFs { # Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
String? preCommand String? preCommand
Array[File]+ gvcf_files Array[File]+ inputBams
Array[File]+ gvcf_file_indexes Array[File]+ inputBamsIndex
Array[File]+ intervals Array[File]+ intervalList
String gvcfPath
String output_basename File refDict
File refFasta
String gatk_jar File refFastaIndex
Float? contamination
File ref_fasta Int? compressionLevel
File ref_fasta_index String gatkJar
File ref_dict
Int? compression_level
Float? memory Float? memory
Float? memoryMultiplier Float? memoryMultiplier
...@@ -224,24 +229,20 @@ task CombineGVCFs { ...@@ -224,24 +229,20 @@ task CombineGVCFs {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
if [ ${length(gvcf_files)} -gt 1 ]; then -Xmx${mem}G -jar ${gatkJar} \
java ${"-Dsamjdk.compression_level=" + compression_level} \ HaplotypeCaller \
-Xmx${mem}G -jar ${gatk_jar} \ -R ${refFasta} \
CombineGVCFs \ -O ${gvcfPath} \
-R ${ref_fasta} \ -I ${sep=" -I " inputBams} \
-O ${output_basename + ".vcf.gz"} \ -L ${sep=' -L ' intervalList} \
-V ${sep=' -V ' gvcf_files} \ -contamination ${default=0 contamination} \
-L ${sep=' -L ' intervals} -ERC GVCF
else
ln -sf ${select_first(gvcf_files)} ${output_basename + ".vcf.gz"}
ln -sf ${select_first(gvcf_files)}.tbi ${output_basename + ".vcf.gz.tbi"}
fi
} }
output { output {
File output_gvcf = output_basename + ".vcf.gz" File outputGVCF = gvcfPath
File output_gvcf_index = output_basename + ".vcf.gz.tbi" File outputGVCFindex = gvcfPath + ".tbi"
} }
runtime { runtime {
...@@ -252,13 +253,13 @@ task CombineGVCFs { ...@@ -252,13 +253,13 @@ task CombineGVCFs {
task SplitNCigarReads { task SplitNCigarReads {
String? preCommand String? preCommand
File input_bam File inputBam
File input_bam_index File inputBamIndex
File ref_fasta File refFasta
File ref_fasta_index File refFastaIndex
File ref_dict File refDict
String output_bam String outputBam
String gatk_jar String gatkJar
Array[File]+ intervals Array[File]+ intervals
Float? memory Float? memory
...@@ -268,17 +269,17 @@ task SplitNCigarReads { ...@@ -268,17 +269,17 @@ task SplitNCigarReads {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \ java -Xms${mem}G -jar ${gatkJar} \
SplitNCigarReads \ SplitNCigarReads \
-I ${input_bam} \ -I ${inputBam} \
-R ${ref_fasta} \ -R ${refFasta} \
-O ${output_bam} \ -O ${outputBam} \
-L ${sep=' -L ' intervals} -L ${sep=' -L ' intervals}
} }
output { output {
File bam = output_bam File bam = outputBam
File bam_index = sub(output_bam, "\\.bam$", ".bai") File bamIndex = sub(outputBam, "\\.bam$", ".bai")
} }
runtime { runtime {
......
...@@ -120,11 +120,11 @@ task MarkDuplicates { ...@@ -120,11 +120,11 @@ task MarkDuplicates {
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs { task MergeVCFs {
String? preCommand String? preCommand
Array[File] input_vcfs Array[File] inputVCFs
Array[File] input_vcfs_indexes Array[File] inputVCFsIndexes
String output_vcf_path String outputVCFpath
Int? compression_level Int? compressionLevel
String picard_jar String picardJar
Float? memory Float? memory
Float? memoryMultiplier Float? memoryMultiplier
...@@ -135,16 +135,16 @@ task MergeVCFs { ...@@ -135,16 +135,16 @@ task MergeVCFs {
command { command {
set -e -o pipefail set -e -o pipefail
${preCommand} ${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \ java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xmx${mem}G -jar ${picard_jar} \ -Xmx${mem}G -jar ${picardJar} \
MergeVcfs \ MergeVcfs \
INPUT=${sep=' INPUT=' input_vcfs} \ INPUT=${sep=' INPUT=' inputVCFs} \
OUTPUT=${output_vcf_path} OUTPUT=${outputVCFpath}
} }
output { output {
File output_vcf = output_vcf_path File outputVCF = outputVCFpath
File output_vcf_index = output_vcf_path + ".tbi" File outputVCFindex = outputVCFpath + ".tbi"
} }
runtime { runtime {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment