Skip to content
Snippets Groups Projects
Unverified Commit c5eb0a76 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub
Browse files

Merge pull request #17 from biowdl/BIOWDL-25

Caching related changes
parents e75a3008 4177a251
No related branches found
No related tags found
No related merge requests found
......@@ -117,25 +117,24 @@ task extractAdaptersFastqc {
task FastqSplitter {
String? preCommand
File inputFastq
String outputPath
Int numberChunks
File toolJar
Array[Int] chunks = range(numberChunks)
Array[String] outputPaths
String toolJar
command {
set -e -o pipefail
${preCommand}
mkdir -p ${sep=' ' prefix(outputPath + "/chunk_", chunks)}
if [ ${numberChunks} -gt 1 ]; then
SEP="/${basename(inputFastq)} -o "
java -jar ${toolJar} -I ${inputFastq} -o ${sep='$SEP' prefix(outputPath + "/chunk_", chunks)}/${basename(inputFastq)}
else
ln -sf ${inputFastq} ${outputPath}/chunk_0/${basename(inputFastq)}
fi
mkdir -p $(dirname ${sep=') $(dirname ' outputPaths})
if [ ${length(outputPaths)} -gt 1 ]; then
java -jar ${toolJar} \
-I ${inputFastq} \
-o ${sep=' -o ' outputPaths}
else
ln -sf ${inputFastq} ${outputPaths[0]}
fi
}
output {
Array[File] outputFastqFiles = glob(outputPath + "/chunk_*/" + basename(inputFastq))
Array[File] chunks = outputPaths
}
}
......
......@@ -106,7 +106,9 @@ task appendToStringArray {
}
task createLink {
File inputFile
# Making this of type File will create a link to the copy of the file in the execution
# folder, instead of the actual file.
String inputFile
String outputPath
command {
......
# Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator {
# Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR {
String? preCommand
String gatk_jar
String input_bam
String input_bam_index
String recalibration_report_filename
Array[File]+ sequence_group_interval
Array[File]+ known_indels_sites_VCFs
Array[File]+ known_indels_sites_indices
File ref_dict
File ref_fasta
File ref_fasta_index
File gatkJar
File inputBam
File inputBamIndex
String outputBamPath
File recalibrationReport
Array[File]+ sequenceGroupInterval
File refDict
File refFasta
File refFastaIndex
Int? compressionLevel
Float? memory
Float? memoryMultiplier
......@@ -19,18 +19,23 @@ task BaseRecalibrator {
command {
set -e -o pipefail
${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \
BaseRecalibrator \
-R ${ref_fasta} \
-I ${input_bam} \
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xms${mem}G -jar ${gatkJar} \
ApplyBQSR \
--create-output-bam-md5 \
--add-output-sam-program-record \
-R ${refFasta} \
-I ${inputBam} \
--use-original-qualities \
-O ${recalibration_report_filename} \
--known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \
-L ${sep=" -L " sequence_group_interval}
-O ${outputBamPath} \
-bqsr ${recalibrationReport} \
--static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
-L ${sep=" -L " sequenceGroupInterval}
}
output {
File recalibration_report = "${recalibration_report_filename}"
File recalibrated_bam = outputBamPath
File recalibrated_bam_checksum = outputBamPath + ".md5"
}
runtime {
......@@ -38,18 +43,19 @@ task BaseRecalibrator {
}
}
# Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR {
# Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator {
String? preCommand
String gatk_jar
String input_bam
String output_bam_path
File recalibration_report
Array[String] sequence_group_interval
File ref_dict
File ref_fasta
File ref_fasta_index
Int? compression_level
File gatkJar
File inputBam
File inputBamIndex
String recalibrationReportPath
Array[File]+ sequenceGroupInterval
Array[File]+ knownIndelsSitesVCFs
Array[File]+ knownIndelsSitesIndices
File refDict
File refFasta
File refFastaIndex
Float? memory
Float? memoryMultiplier
......@@ -58,23 +64,18 @@ task ApplyBQSR {
command {
set -e -o pipefail
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \
-Xms${mem}G -jar ${gatk_jar} \
ApplyBQSR \
--create-output-bam-md5 \
--add-output-sam-program-record \
-R ${ref_fasta} \
-I ${input_bam} \
java -Xms${mem}G -jar ${gatkJar} \
BaseRecalibrator \
-R ${refFasta} \
-I ${inputBam} \
--use-original-qualities \
-O ${output_bam_path} \
-bqsr ${recalibration_report} \
--static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
-L ${sep=" -L " sequence_group_interval}
-O ${recalibrationReportPath} \
--known-sites ${sep=" --known-sites " knownIndelsSitesVCFs} \
-L ${sep=" -L " sequenceGroupInterval}
}
output {
File recalibrated_bam = "${output_bam_path}"
File recalibrated_bam_checksum = "${output_bam_path}.md5"
File recalibrationReport = recalibrationReportPath
}
runtime {
......@@ -82,13 +83,21 @@ task ApplyBQSR {
}
}
# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
task CombineGVCFs {
String? preCommand
String gatk_jar
Array[File] input_bqsr_reports
String output_report_filepath
Array[File]+ gvcfFiles
Array[File]+ gvcfFileIndexes
Array[File]+ intervals
String outputPath
String gatkJar
File refFasta
File refFastaIndex
File refDict
Int? compressionLevel
Float? memory
Float? memoryMultiplier
......@@ -96,14 +105,24 @@ task GatherBqsrReports {
command {
set -e -o pipefail
${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \
GatherBQSRReports \
-I ${sep=' -I ' input_bqsr_reports} \
-O ${output_report_filepath}
if [ ${length(gvcfFiles)} -gt 1 ]; then
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xmx${mem}G -jar ${gatkJar} \
CombineGVCFs \
-R ${refFasta} \
-O ${outputPath} \
-V ${sep=' -V ' gvcfFiles} \
-L ${sep=' -L ' intervals}
else # TODO this should be handeled in wdl
ln -sf ${select_first(gvcfFiles)} ${outputPath}
ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi
fi
}
output {
File output_bqsr_report = "${output_report_filepath}"
File outputGVCF = outputPath
File outputGVCFindex = outputPath + ".tbi"
}
runtime {
......@@ -111,19 +130,12 @@ task GatherBqsrReports {
}
}
# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
String? preCommand
Array[File]+ input_bams
Array[File]+ input_bams_index
Array[File]+ interval_list
String gvcf_basename
File ref_dict
File ref_fasta
File ref_fasta_index
Float? contamination
Int? compression_level
String gatk_jar
String gatkJar
Array[File] inputBQSRreports
String outputReportPath
Float? memory
Float? memoryMultiplier
......@@ -132,20 +144,14 @@ task HaplotypeCallerGvcf {
command {
set -e -o pipefail
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \
-Xmx${mem}G -jar ${gatk_jar} \
HaplotypeCaller \
-R ${ref_fasta} \
-O ${gvcf_basename}.vcf.gz \
-I ${sep=" -I " input_bams} \
-L ${sep=' -L ' interval_list} \
-contamination ${default=0 contamination} \
-ERC GVCF
java -Xms${mem}G -jar ${gatkJar} \
GatherBQSRReports \
-I ${sep=' -I ' inputBQSRreports} \
-O ${outputReportPath}
}
output {
File output_gvcf = "${gvcf_basename}.vcf.gz"
File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi"
File outputBQSRreport = outputReportPath
}
runtime {
......@@ -155,22 +161,22 @@ task HaplotypeCallerGvcf {
task GenotypeGVCFs {
String? preCommand
File gvcf_files
File gvcf_file_indexes
File gvcfFiles
File gvcfFileIndexes
Array[File]+ intervals
String output_basename
String outputPath
String gatk_jar
String gatkJar
File ref_fasta
File ref_fasta_index
File ref_dict
File refFasta
File refFastaIndex
File refDict
File dbsnp_vcf
File dbsnp_vcf_index
File dbsnpVCF
File dbsnpVCFindex
Int? compression_level
Int? compressionLevel
Float? memory
Float? memoryMultiplier
......@@ -179,22 +185,22 @@ task GenotypeGVCFs {
set -e -o pipefail
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \
-Xmx${mem}G -jar ${gatk_jar} \
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xmx${mem}G -jar ${gatkJar} \
GenotypeGVCFs \
-R ${ref_fasta} \
-O ${output_basename + ".vcf.gz"} \
-D ${dbsnp_vcf} \
-R ${refFasta} \
-O ${outputPath} \
-D ${dbsnpVCF} \
-G StandardAnnotation \
--only-output-calls-starting-in-intervals \
-new-qual \
-V ${gvcf_files} \
-V ${gvcfFiles} \
-L ${sep=' -L ' intervals}
}
output {
File output_vcf = output_basename + ".vcf.gz"
File output_vcf_index = output_basename + ".vcf.gz.tbi"
File outputVCF = outputPath
File outputVCFindex = outputPath + ".tbi"
}
runtime{
......@@ -202,21 +208,20 @@ task GenotypeGVCFs {
}
}
task CombineGVCFs {
# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
String? preCommand
Array[File]+ gvcf_files
Array[File]+ gvcf_file_indexes
Array[File]+ intervals
String output_basename
String gatk_jar
File ref_fasta
File ref_fasta_index
File ref_dict
Array[File]+ inputBams
Array[File]+ inputBamsIndex
Array[File]+ intervalList
String gvcfPath
File refDict
File refFasta
File refFastaIndex
Float? contamination
Int? compressionLevel
String gatkJar
Int? compression_level
Float? memory
Float? memoryMultiplier
......@@ -224,24 +229,20 @@ task CombineGVCFs {
command {
set -e -o pipefail
${preCommand}
if [ ${length(gvcf_files)} -gt 1 ]; then
java ${"-Dsamjdk.compression_level=" + compression_level} \
-Xmx${mem}G -jar ${gatk_jar} \
CombineGVCFs \
-R ${ref_fasta} \
-O ${output_basename + ".vcf.gz"} \
-V ${sep=' -V ' gvcf_files} \
-L ${sep=' -L ' intervals}
else
ln -sf ${select_first(gvcf_files)} ${output_basename + ".vcf.gz"}
ln -sf ${select_first(gvcf_files)}.tbi ${output_basename + ".vcf.gz.tbi"}
fi
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xmx${mem}G -jar ${gatkJar} \
HaplotypeCaller \
-R ${refFasta} \
-O ${gvcfPath} \
-I ${sep=" -I " inputBams} \
-L ${sep=' -L ' intervalList} \
-contamination ${default=0 contamination} \
-ERC GVCF
}
output {
File output_gvcf = output_basename + ".vcf.gz"
File output_gvcf_index = output_basename + ".vcf.gz.tbi"
File outputGVCF = gvcfPath
File outputGVCFindex = gvcfPath + ".tbi"
}
runtime {
......@@ -252,13 +253,13 @@ task CombineGVCFs {
task SplitNCigarReads {
String? preCommand
File input_bam
File input_bam_index
File ref_fasta
File ref_fasta_index
File ref_dict
String output_bam
String gatk_jar
File inputBam
File inputBamIndex
File refFasta
File refFastaIndex
File refDict
String outputBam
String gatkJar
Array[File]+ intervals
Float? memory
......@@ -268,17 +269,17 @@ task SplitNCigarReads {
command {
set -e -o pipefail
${preCommand}
java -Xms${mem}G -jar ${gatk_jar} \
java -Xms${mem}G -jar ${gatkJar} \
SplitNCigarReads \
-I ${input_bam} \
-R ${ref_fasta} \
-O ${output_bam} \
-I ${inputBam} \
-R ${refFasta} \
-O ${outputBam} \
-L ${sep=' -L ' intervals}
}
output {
File bam = output_bam
File bam_index = sub(output_bam, "\\.bam$", ".bai")
File bam = outputBam
File bamIndex = sub(outputBam, "\\.bam$", ".bai")
}
runtime {
......
......@@ -120,11 +120,11 @@ task MarkDuplicates {
# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
task MergeVCFs {
String? preCommand
Array[File] input_vcfs
Array[File] input_vcfs_indexes
String output_vcf_path
Int? compression_level
String picard_jar
Array[File] inputVCFs
Array[File] inputVCFsIndexes
String outputVCFpath
Int? compressionLevel
String picardJar
Float? memory
Float? memoryMultiplier
......@@ -135,16 +135,16 @@ task MergeVCFs {
command {
set -e -o pipefail
${preCommand}
java ${"-Dsamjdk.compression_level=" + compression_level} \
-Xmx${mem}G -jar ${picard_jar} \
java ${"-Dsamjdk.compression_level=" + compressionLevel} \
-Xmx${mem}G -jar ${picardJar} \
MergeVcfs \
INPUT=${sep=' INPUT=' input_vcfs} \
OUTPUT=${output_vcf_path}
INPUT=${sep=' INPUT=' inputVCFs} \
OUTPUT=${outputVCFpath}
}
output {
File output_vcf = output_vcf_path
File output_vcf_index = output_vcf_path + ".tbi"
File outputVCF = outputVCFpath
File outputVCFindex = outputVCFpath + ".tbi"
}
runtime {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment