diff --git a/biopet.wdl b/biopet.wdl index ea8d2ba4233d8a1373c701c33b3b052388d825dc..b3234e4d16543afcbdc8bce39616010ce5463fde 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -117,25 +117,24 @@ task extractAdaptersFastqc { task FastqSplitter { String? preCommand File inputFastq - String outputPath - Int numberChunks - File toolJar - Array[Int] chunks = range(numberChunks) + Array[String] outputPaths + String toolJar command { set -e -o pipefail ${preCommand} - mkdir -p ${sep=' ' prefix(outputPath + "/chunk_", chunks)} - if [ ${numberChunks} -gt 1 ]; then - SEP="/${basename(inputFastq)} -o " - java -jar ${toolJar} -I ${inputFastq} -o ${sep='$SEP' prefix(outputPath + "/chunk_", chunks)}/${basename(inputFastq)} - else - ln -sf ${inputFastq} ${outputPath}/chunk_0/${basename(inputFastq)} - fi + mkdir -p $(dirname ${sep=') $(dirname ' outputPaths}) + if [ ${length(outputPaths)} -gt 1 ]; then + java -jar ${toolJar} \ + -I ${inputFastq} \ + -o ${sep=' -o ' outputPaths} + else + ln -sf ${inputFastq} ${outputPaths[0]} + fi } output { - Array[File] outputFastqFiles = glob(outputPath + "/chunk_*/" + basename(inputFastq)) + Array[File] chunks = outputPaths } } diff --git a/common.wdl b/common.wdl index 2ac9cb99366ab27e768c29a947bf239064193186..d80d47e6d5a964b4ba2637741eeb62b51cecc2f2 100644 --- a/common.wdl +++ b/common.wdl @@ -106,7 +106,9 @@ task appendToStringArray { } task createLink { - File inputFile + # Making this of type File will create a link to the copy of the file in the execution + # folder, instead of the actual file. + String inputFile String outputPath command { diff --git a/gatk.wdl b/gatk.wdl index 12b03d9820f79aa8cc420e073b8a36a355ddaa71..b5fd4a8d78ab641fd99abf6da0ae1a26f4212c7f 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1,16 +1,16 @@ -# Generate Base Quality Score Recalibration (BQSR) model -task BaseRecalibrator { +# Apply Base Quality Score Recalibration (BQSR) model +task ApplyBQSR { String? preCommand - String gatk_jar - String input_bam - String input_bam_index - String recalibration_report_filename - Array[File]+ sequence_group_interval - Array[File]+ known_indels_sites_VCFs - Array[File]+ known_indels_sites_indices - File ref_dict - File ref_fasta - File ref_fasta_index + File gatkJar + File inputBam + File inputBamIndex + String outputBamPath + File recalibrationReport + Array[File]+ sequenceGroupInterval + File refDict + File refFasta + File refFastaIndex + Int? compressionLevel Float? memory Float? memoryMultiplier @@ -19,18 +19,23 @@ task BaseRecalibrator { command { set -e -o pipefail ${preCommand} - java -Xms${mem}G -jar ${gatk_jar} \ - BaseRecalibrator \ - -R ${ref_fasta} \ - -I ${input_bam} \ + java ${"-Dsamjdk.compression_level=" + compressionLevel} \ + -Xms${mem}G -jar ${gatkJar} \ + ApplyBQSR \ + --create-output-bam-md5 \ + --add-output-sam-program-record \ + -R ${refFasta} \ + -I ${inputBam} \ --use-original-qualities \ - -O ${recalibration_report_filename} \ - --known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \ - -L ${sep=" -L " sequence_group_interval} + -O ${outputBamPath} \ + -bqsr ${recalibrationReport} \ + --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \ + -L ${sep=" -L " sequenceGroupInterval} } output { - File recalibration_report = "${recalibration_report_filename}" + File recalibrated_bam = outputBamPath + File recalibrated_bam_checksum = outputBamPath + ".md5" } runtime { @@ -38,18 +43,19 @@ task BaseRecalibrator { } } -# Apply Base Quality Score Recalibration (BQSR) model -task ApplyBQSR { +# Generate Base Quality Score Recalibration (BQSR) model +task BaseRecalibrator { String? preCommand - String gatk_jar - String input_bam - String output_bam_path - File recalibration_report - Array[String] sequence_group_interval - File ref_dict - File ref_fasta - File ref_fasta_index - Int? compression_level + File gatkJar + File inputBam + File inputBamIndex + String recalibrationReportPath + Array[File]+ sequenceGroupInterval + Array[File]+ knownIndelsSitesVCFs + Array[File]+ knownIndelsSitesIndices + File refDict + File refFasta + File refFastaIndex Float? memory Float? memoryMultiplier @@ -58,23 +64,18 @@ task ApplyBQSR { command { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} \ - -Xms${mem}G -jar ${gatk_jar} \ - ApplyBQSR \ - --create-output-bam-md5 \ - --add-output-sam-program-record \ - -R ${ref_fasta} \ - -I ${input_bam} \ + java -Xms${mem}G -jar ${gatkJar} \ + BaseRecalibrator \ + -R ${refFasta} \ + -I ${inputBam} \ --use-original-qualities \ - -O ${output_bam_path} \ - -bqsr ${recalibration_report} \ - --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \ - -L ${sep=" -L " sequence_group_interval} + -O ${recalibrationReportPath} \ + --known-sites ${sep=" --known-sites " knownIndelsSitesVCFs} \ + -L ${sep=" -L " sequenceGroupInterval} } output { - File recalibrated_bam = "${output_bam_path}" - File recalibrated_bam_checksum = "${output_bam_path}.md5" + File recalibrationReport = recalibrationReportPath } runtime { @@ -82,13 +83,21 @@ task ApplyBQSR { } } -# Combine multiple recalibration tables from scattered BaseRecalibrator runs -task GatherBqsrReports { +task CombineGVCFs { String? preCommand - String gatk_jar - Array[File] input_bqsr_reports - String output_report_filepath + Array[File]+ gvcfFiles + Array[File]+ gvcfFileIndexes + Array[File]+ intervals + + String outputPath + + String gatkJar + File refFasta + File refFastaIndex + File refDict + + Int? compressionLevel Float? memory Float? memoryMultiplier @@ -96,14 +105,24 @@ task GatherBqsrReports { command { set -e -o pipefail ${preCommand} - java -Xms${mem}G -jar ${gatk_jar} \ - GatherBQSRReports \ - -I ${sep=' -I ' input_bqsr_reports} \ - -O ${output_report_filepath} + + if [ ${length(gvcfFiles)} -gt 1 ]; then + java ${"-Dsamjdk.compression_level=" + compressionLevel} \ + -Xmx${mem}G -jar ${gatkJar} \ + CombineGVCFs \ + -R ${refFasta} \ + -O ${outputPath} \ + -V ${sep=' -V ' gvcfFiles} \ + -L ${sep=' -L ' intervals} + else # TODO this should be handeled in wdl + ln -sf ${select_first(gvcfFiles)} ${outputPath} + ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi + fi } output { - File output_bqsr_report = "${output_report_filepath}" + File outputGVCF = outputPath + File outputGVCFindex = outputPath + ".tbi" } runtime { @@ -111,19 +130,12 @@ task GatherBqsrReports { } } -# Call variants on a single sample with HaplotypeCaller to produce a GVCF -task HaplotypeCallerGvcf { +# Combine multiple recalibration tables from scattered BaseRecalibrator runs +task GatherBqsrReports { String? preCommand - Array[File]+ input_bams - Array[File]+ input_bams_index - Array[File]+ interval_list - String gvcf_basename - File ref_dict - File ref_fasta - File ref_fasta_index - Float? contamination - Int? compression_level - String gatk_jar + String gatkJar + Array[File] inputBQSRreports + String outputReportPath Float? memory Float? memoryMultiplier @@ -132,20 +144,14 @@ task HaplotypeCallerGvcf { command { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} \ - -Xmx${mem}G -jar ${gatk_jar} \ - HaplotypeCaller \ - -R ${ref_fasta} \ - -O ${gvcf_basename}.vcf.gz \ - -I ${sep=" -I " input_bams} \ - -L ${sep=' -L ' interval_list} \ - -contamination ${default=0 contamination} \ - -ERC GVCF + java -Xms${mem}G -jar ${gatkJar} \ + GatherBQSRReports \ + -I ${sep=' -I ' inputBQSRreports} \ + -O ${outputReportPath} } output { - File output_gvcf = "${gvcf_basename}.vcf.gz" - File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi" + File outputBQSRreport = outputReportPath } runtime { @@ -155,22 +161,22 @@ task HaplotypeCallerGvcf { task GenotypeGVCFs { String? preCommand - File gvcf_files - File gvcf_file_indexes + File gvcfFiles + File gvcfFileIndexes Array[File]+ intervals - String output_basename + String outputPath - String gatk_jar + String gatkJar - File ref_fasta - File ref_fasta_index - File ref_dict + File refFasta + File refFastaIndex + File refDict - File dbsnp_vcf - File dbsnp_vcf_index + File dbsnpVCF + File dbsnpVCFindex - Int? compression_level + Int? compressionLevel Float? memory Float? memoryMultiplier @@ -179,22 +185,22 @@ task GenotypeGVCFs { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} \ - -Xmx${mem}G -jar ${gatk_jar} \ + java ${"-Dsamjdk.compression_level=" + compressionLevel} \ + -Xmx${mem}G -jar ${gatkJar} \ GenotypeGVCFs \ - -R ${ref_fasta} \ - -O ${output_basename + ".vcf.gz"} \ - -D ${dbsnp_vcf} \ + -R ${refFasta} \ + -O ${outputPath} \ + -D ${dbsnpVCF} \ -G StandardAnnotation \ --only-output-calls-starting-in-intervals \ -new-qual \ - -V ${gvcf_files} \ + -V ${gvcfFiles} \ -L ${sep=' -L ' intervals} } output { - File output_vcf = output_basename + ".vcf.gz" - File output_vcf_index = output_basename + ".vcf.gz.tbi" + File outputVCF = outputPath + File outputVCFindex = outputPath + ".tbi" } runtime{ @@ -202,21 +208,20 @@ task GenotypeGVCFs { } } -task CombineGVCFs { +# Call variants on a single sample with HaplotypeCaller to produce a GVCF +task HaplotypeCallerGvcf { String? preCommand - Array[File]+ gvcf_files - Array[File]+ gvcf_file_indexes - Array[File]+ intervals - - String output_basename - - String gatk_jar - - File ref_fasta - File ref_fasta_index - File ref_dict + Array[File]+ inputBams + Array[File]+ inputBamsIndex + Array[File]+ intervalList + String gvcfPath + File refDict + File refFasta + File refFastaIndex + Float? contamination + Int? compressionLevel + String gatkJar - Int? compression_level Float? memory Float? memoryMultiplier @@ -224,24 +229,20 @@ task CombineGVCFs { command { set -e -o pipefail ${preCommand} - - if [ ${length(gvcf_files)} -gt 1 ]; then - java ${"-Dsamjdk.compression_level=" + compression_level} \ - -Xmx${mem}G -jar ${gatk_jar} \ - CombineGVCFs \ - -R ${ref_fasta} \ - -O ${output_basename + ".vcf.gz"} \ - -V ${sep=' -V ' gvcf_files} \ - -L ${sep=' -L ' intervals} - else - ln -sf ${select_first(gvcf_files)} ${output_basename + ".vcf.gz"} - ln -sf ${select_first(gvcf_files)}.tbi ${output_basename + ".vcf.gz.tbi"} - fi + java ${"-Dsamjdk.compression_level=" + compressionLevel} \ + -Xmx${mem}G -jar ${gatkJar} \ + HaplotypeCaller \ + -R ${refFasta} \ + -O ${gvcfPath} \ + -I ${sep=" -I " inputBams} \ + -L ${sep=' -L ' intervalList} \ + -contamination ${default=0 contamination} \ + -ERC GVCF } output { - File output_gvcf = output_basename + ".vcf.gz" - File output_gvcf_index = output_basename + ".vcf.gz.tbi" + File outputGVCF = gvcfPath + File outputGVCFindex = gvcfPath + ".tbi" } runtime { @@ -252,13 +253,13 @@ task CombineGVCFs { task SplitNCigarReads { String? preCommand - File input_bam - File input_bam_index - File ref_fasta - File ref_fasta_index - File ref_dict - String output_bam - String gatk_jar + File inputBam + File inputBamIndex + File refFasta + File refFastaIndex + File refDict + String outputBam + String gatkJar Array[File]+ intervals Float? memory @@ -268,17 +269,17 @@ task SplitNCigarReads { command { set -e -o pipefail ${preCommand} - java -Xms${mem}G -jar ${gatk_jar} \ + java -Xms${mem}G -jar ${gatkJar} \ SplitNCigarReads \ - -I ${input_bam} \ - -R ${ref_fasta} \ - -O ${output_bam} \ + -I ${inputBam} \ + -R ${refFasta} \ + -O ${outputBam} \ -L ${sep=' -L ' intervals} } output { - File bam = output_bam - File bam_index = sub(output_bam, "\\.bam$", ".bai") + File bam = outputBam + File bamIndex = sub(outputBam, "\\.bam$", ".bai") } runtime { diff --git a/picard.wdl b/picard.wdl index e3bcbc1320c1ef0ce4256f7bcd19400da3273235..5f35122130ede6f6ec93aeb84f060f3f982f6264 100644 --- a/picard.wdl +++ b/picard.wdl @@ -120,11 +120,11 @@ task MarkDuplicates { # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs task MergeVCFs { String? preCommand - Array[File] input_vcfs - Array[File] input_vcfs_indexes - String output_vcf_path - Int? compression_level - String picard_jar + Array[File] inputVCFs + Array[File] inputVCFsIndexes + String outputVCFpath + Int? compressionLevel + String picardJar Float? memory Float? memoryMultiplier @@ -135,16 +135,16 @@ task MergeVCFs { command { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} \ - -Xmx${mem}G -jar ${picard_jar} \ + java ${"-Dsamjdk.compression_level=" + compressionLevel} \ + -Xmx${mem}G -jar ${picardJar} \ MergeVcfs \ - INPUT=${sep=' INPUT=' input_vcfs} \ - OUTPUT=${output_vcf_path} + INPUT=${sep=' INPUT=' inputVCFs} \ + OUTPUT=${outputVCFpath} } output { - File output_vcf = output_vcf_path - File output_vcf_index = output_vcf_path + ".tbi" + File outputVCF = outputVCFpath + File outputVCFindex = outputVCFpath + ".tbi" } runtime {