From 325811625e2466362d2a292c4c143f93c6180682 Mon Sep 17 00:00:00 2001 From: ffinfo <pjrvanthof@gmail.com> Date: Wed, 22 Aug 2018 13:33:14 +0200 Subject: [PATCH] Switch to structs --- gatk.wdl | 141 +++++++++++++++++++++++++------------------------------ 1 file changed, 64 insertions(+), 77 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 1f76099..265d284 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1,18 +1,17 @@ version 1.0 +import "common.wdl" + # Apply Base Quality Score Recalibration (BQSR) model task ApplyBQSR { input { String? preCommand File? gatkJar - File inputBam - File inputBamIndex + IndexedBamFile inputBam String outputBamPath File recalibrationReport Array[File]+ sequenceGroupInterval - File refDict - File refFasta - File refFastaIndex + Reference reference Int? compressionLevel Int memory = 4 @@ -30,8 +29,8 @@ task ApplyBQSR { ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ - -R ~{refFasta} \ - -I ~{inputBam} \ + -R ~{reference.fasta} \ + -I ~{inputBam.file} \ --use-original-qualities \ -O ~{outputBamPath} \ -bqsr ~{recalibrationReport} \ @@ -42,7 +41,11 @@ task ApplyBQSR { } output { - File recalibrated_bam = outputBamPath + IndexedBamFile recalibrated_bam = { + "file": outputBamPath, + "index": sub(outputBamPath, "\.bam$", ".bai") + } + File recalibrated_bam_checksum = outputBamPath + ".md5" } @@ -56,28 +59,19 @@ task BaseRecalibrator { input { String? preCommand File? gatkJar - File inputBam - File inputBamIndex + IndexedBamFile inputBam String recalibrationReportPath Array[File]+ sequenceGroupInterval - Array[File]? knownIndelsSitesVCFs - Array[File]? knownIndelsSitesIndices - File? dbsnpVCF - File? dbsnpVCFindex - File refDict - File refFasta - File refFastaIndex + Array[IndexedVcfFile]? knownIndelsSitesVCFs + IndexedVcfFile? dbsnpVCF + Reference reference Int memory = 4 Float memoryMultiplier = 3.0 } Array[File]+ knownIndelsSitesVCFsArg = flatten([ - select_first([knownIndelsSitesVCFs, []]), - select_all([dbsnpVCF]) - ]) - Array[File]+ knownIndelsSitesIndicesArg = flatten([ - select_first([knownIndelsSitesIndices, []]), - select_all([dbsnpVCFindex]) + select_first([knownIndelsSitesVCFs.file, []]), + select_all([dbsnpVCF.file]) ]) String toolCommand = if defined(gatkJar) @@ -89,8 +83,8 @@ task BaseRecalibrator { ~{preCommand} ~{toolCommand} \ BaseRecalibrator \ - -R ~{refFasta} \ - -I ~{inputBam} \ + -R ~{reference.fasta} \ + -I ~{inputBam.file} \ --use-original-qualities \ -O ~{recalibrationReportPath} \ --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \ @@ -109,17 +103,14 @@ task BaseRecalibrator { task CombineGVCFs { input { String? preCommand - Array[File]+ gvcfFiles - Array[File]+ gvcfFileIndexes + Array[IndexedVcfFile]+ gvcfFiles Array[File]+ intervals String outputPath String? gatkJar - File refFasta - File refFastaIndex - File refDict + Reference reference Int? compressionLevel #TODO This isn't being used? Int memory = 4 @@ -137,19 +128,21 @@ task CombineGVCFs { if [ ~{length(gvcfFiles)} -gt 1 ]; then ~{toolCommand} \ CombineGVCFs \ - -R ~{refFasta} \ + -R ~{reference.fasta} \ -O ~{outputPath} \ -V ~{sep=' -V ' gvcfFiles} \ -L ~{sep=' -L ' intervals} else # TODO this should be handeled in wdl - ln -sf ~{gvcfFiles[0]} ~{outputPath} - ln -sf ~{gvcfFileIndexes[0]} ~{outputPath}.tbi + ln -sf ~{gvcfFiles[0].file} ~{outputPath} + ln -sf ~{gvcfFiles[0].index} ~{outputPath}.tbi fi } output { - File outputGVCF = outputPath - File outputGVCFindex = outputPath + ".tbi" + IndexedVcfFile outputVCF = { + "file": outputPath, + "index": outputPath + ".tbi" + } } runtime { @@ -194,20 +187,16 @@ task GatherBqsrReports { task GenotypeGVCFs { input { String? preCommand - File gvcfFiles - File gvcfFileIndexes + Array[IndexedVcfFile] gvcfFiles Array[File]+ intervals String outputPath String? gatkJar - File refFasta - File refFastaIndex - File refDict + Reference reference - File? dbsnpVCF - File? dbsnpVCFindex + IndexedVcfFile? dbsnpVCF Int? compressionLevel Int memory = 4 @@ -223,19 +212,21 @@ task GenotypeGVCFs { ~{preCommand} ~{toolCommand} \ GenotypeGVCFs \ - -R ~{refFasta} \ + -R ~{reference.fasta} \ -O ~{outputPath} \ - ~{"-D " + dbsnpVCF} \ + ~{"-D " + dbsnpVCF.file} \ -G StandardAnnotation \ --only-output-calls-starting-in-intervals \ -new-qual \ - -V ~{gvcfFiles} \ + -V ~{sep=' -V ' gvcfFiles.file} \ -L ~{sep=' -L ' intervals} } output { - File outputVCF = outputPath - File outputVCFindex = outputPath + ".tbi" + IndexedVcfFile outputVCF = { + "file": outputPath, + "index": outputPath + ".tbi" + } } runtime{ @@ -247,19 +238,15 @@ task GenotypeGVCFs { task HaplotypeCallerGvcf { input { String? preCommand - Array[File]+ inputBams - Array[File]+ inputBamsIndex + Array[IndexedBamFile]+ inputBams Array[File]+ intervalList String gvcfPath - File refDict - File refFasta - File refFastaIndex + Reference reference Float contamination = 0.0 Int? compressionLevel String? gatkJar - File? dbsnpVCF - File? dbsnpVCFindex + IndexedVcfFile? dbsnpVCF Int memory = 4 Float memoryMultiplier = 3 @@ -274,18 +261,20 @@ task HaplotypeCallerGvcf { ~{preCommand} ~{toolCommand} \ HaplotypeCaller \ - -R ~{refFasta} \ + -R ~{reference.fasta} \ -O ~{gvcfPath} \ - -I ~{sep=" -I " inputBams} \ + -I ~{sep=" -I " inputBams.file} \ -L ~{sep=' -L ' intervalList} \ - ~{"-D " + dbsnpVCF} \ + ~{"-D " + dbsnpVCF.file} \ -contamination ~{contamination} \ -ERC GVCF } output { - File outputGVCF = gvcfPath - File outputGVCFindex = gvcfPath + ".tbi" + IndexedVcfFile outputGVCF = { + "file": gvcfPath, + "index": gvcfPath + ".tbi" + } } runtime { @@ -297,11 +286,8 @@ task MuTect2 { input { String? preCommand - Array[File]+ inputBams - Array[File]+ inputBamIndex - File refFasta - File refFastaIndex - File refDict + Array[IndexedBamFile]+ inputBams + Reference reference String outputVcf String tumorSample String? normalSample @@ -321,8 +307,8 @@ task MuTect2 { ~{preCommand} ~{toolCommand} \ Mutect2 \ - -R ~{refFasta} \ - -I ~{sep=" -I " inputBams} \ + -R ~{reference.fasta} \ + -I ~{sep=" -I " inputBams.file} \ -tumor ~{tumorSample} \ ~{"-normal " + normalSample} \ -O ~{outputVcf} \ @@ -330,8 +316,10 @@ task MuTect2 { } output { - File vcfFile = outputVcf - File vcfIndex = outputVcf + ".tbi" + IndexedVcfFile vcfFile = { + "file": outputVcf, + "index": outputVcf + ".tbi" + } } runtime { @@ -343,11 +331,8 @@ task SplitNCigarReads { input { String? preCommand - File inputBam - File inputBamIndex - File refFasta - File refFastaIndex - File refDict + IndexedBamFile inputBam + Reference reference String outputBam String? gatkJar Array[File]+ intervals @@ -365,15 +350,17 @@ task SplitNCigarReads { ~{preCommand} ~{toolCommand} \ SplitNCigarReads \ - -I ~{inputBam} \ - -R ~{refFasta} \ + -I ~{inputBam.file} \ + -R ~{reference.fasta} \ -O ~{outputBam} \ -L ~{sep=' -L ' intervals} } output { - File bam = outputBam - File bamIndex = sub(outputBam, "\.bam$", ".bai") + IndexedBamFile bam = { + "file": outputBam, + "index": sub(outputBam, "\.bam$", ".bai") + } } runtime { -- GitLab