diff --git a/.travis.yml b/.travis.yml index cb8b70de06b9c35f8d7759b565eb621c435549ca..4065d1cb15dd8c5d625c7ed43043955d1b7bd8a8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,9 @@ language: java script: - - set -e - - export CROMWELL_VERSION=34 - - wget https://github.com/broadinstitute/cromwell/releases/download/$CROMWELL_VERSION/womtool-$CROMWELL_VERSION.jar - - for F in $(git ls-files *.wdl); do echo $F; java -jar womtool-$CROMWELL_VERSION.jar validate $F; done +- set -e +- export CROMWELL_VERSION=34 +# - wget https://github.com/broadinstitute/cromwell/releases/download/$CROMWELL_VERSION/womtool-$CROMWELL_VERSION.jar +- wget https://barmsijs.lumc.nl/womtool-35-a7ae2d8-SNAP.jar +- for F in `find -name "*.wdl"`; do echo $F; java -jar womtool-*.jar validate $F; done +- 'if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then git submodule foreach --recursive git checkout $TRAVIS_BRANCH && git submodule foreach --recursive git pull; fi' +- "git diff --exit-code || (echo ERROR: Git changes detected. Please update submodules && exit 1)" diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index 8459ca4f26e344764f7ea04b0d32f846829b7262..df919ef82352df4c73b4ec6efe290c2d5b34784b 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -2,19 +2,19 @@ version 1.0 # Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 +import "../common.wdl" as common + task Generate { input { String? preCommand File? toolJar - File bam - File bamIndex + IndexedBamFile bam File? bedFile Boolean scatterMode = false Boolean onlyUnmapped = false Boolean tsvOutputs = false String outputDir - File? reference - File? referenceDict + Reference? reference Int memory = 4 Float memoryMultiplier = 2.0 } @@ -23,14 +23,16 @@ task Generate { then "java -Xmx" + memory + "G -jar " + toolJar else "biopet-bamstats -Xmx" + memory + "G" + String refArg = if (defined(reference)) then "--reference " + select_first([reference]).fasta else "" + command { set -e -o pipefail ~{preCommand} mkdir -p ~{outputDir} ~{toolCommand} Generate \ - --bam ~{bam} \ + --bam ~{bam.file} \ ~{"--bedFile " + bedFile} \ - ~{"--reference " + reference} \ + ~{refArg} \ ~{true="--onlyUnmapped" false="" onlyUnmapped} \ ~{true="--scatterMode" false="" scatterMode} \ ~{true="--tsvOutputs" false="" tsvOutputs} \ diff --git a/biopet.wdl b/biopet/biopet.wdl similarity index 81% rename from biopet.wdl rename to biopet/biopet.wdl index feb963fc2745f7d523025a3258a4a9f27a0280f7..daa5371764393b99253c16945549eb1ab9c1fcf6 100644 --- a/biopet.wdl +++ b/biopet/biopet.wdl @@ -1,11 +1,12 @@ version 1.0 +import "../common.wdl" + task BaseCounter { input { String? preCommand File? toolJar - File bam - File bamIndex + IndexedBamFile bam File refFlat String outputDir String prefix @@ -23,7 +24,7 @@ task BaseCounter { mkdir -p ~{outputDir} ~{preCommand} ~{toolCommand} \ - -b ~{bam} \ + -b ~{bam.file} \ -r ~{refFlat} \ -o ~{outputDir} \ -p ~{prefix} @@ -160,10 +161,8 @@ task FastqSplitter { task FastqSync { input { String? preCommand - File ref1 - File ref2 - File in1 - File in2 + FastqPair refFastq + FastqPair inputFastq String out1path String out2path File? toolJar @@ -181,17 +180,19 @@ task FastqSync { ~{preCommand} mkdir -p $(dirname ~{out1path}) $(dirname ~{out2path}) ~{toolCommand} \ - --in1 ~{in1} \ - --in2 ~{in2} \ - --ref1 ~{ref1} \ - --ref2 ~{ref2} \ + --in1 ~{inputFastq.R1} \ + --in2 ~{inputFastq.R2} \ + --ref1 ~{refFastq.R1} \ + --ref2 ~{refFastq.R2} \ --out1 ~{out1path} \ --out2 ~{out2path} } output { - File out1 = out1path - File out2 = out2path + FastqPair out1 = object { + R1: out1path, + R1: out2path + } } runtime { @@ -199,89 +200,10 @@ task FastqSync { } } -task SampleConfig { - input { - File? toolJar - String? preCommand - Array[File]+ inputFiles - String keyFilePath - String? sample - String? library - String? readgroup - String? jsonOutputPath - String? tsvOutputPath - - Int memory = 4 - Float memoryMultiplier = 2.0 - } - - String toolCommand = if defined(toolJar) - then "java -Xmx" + memory + "G -jar " +toolJar - else "biopet-sampleconfig -Xmx" + memory + "G" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p . ~{"$(dirname " + jsonOutputPath + ")"} ~{"$(dirname " + tsvOutputPath + ")"} - ~{toolCommand} \ - -i ~{sep="-i " inputFiles} \ - ~{"--sample " + sample} \ - ~{"--library " + library} \ - ~{"--readgroup " + readgroup} \ - ~{"--jsonOutput " + jsonOutputPath} \ - ~{"--tsvOutput " + tsvOutputPath} \ - > ~{keyFilePath} - } - - output { - File keysFile = keyFilePath - File? jsonOutput = jsonOutputPath - File? tsvOutput = tsvOutputPath - } - - runtime { - memory: ceil(memory * memoryMultiplier) - } -} - -task SampleConfigCromwellArrays { - input { - File? toolJar - String? preCommand - Array[File]+ inputFiles - String outputPath - - Int memory = 4 - Float memoryMultiplier = 2.0 - } - - String toolCommand = if defined(toolJar) - then "java -Xmx" + memory + "G -jar " + toolJar - else "biopet-sampleconfig -Xmx" + memory + "G" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputPath}) - ~{toolCommand} CromwellArrays \ - -i ~{sep="-i " inputFiles} \ - ~{"-o " + outputPath} - } - - output { - File outputFile = outputPath - } - - runtime { - memory: ceil(memory * memoryMultiplier) - } -} - task ScatterRegions { input { String? preCommand - File refFasta - File refDict + Reference reference String outputDirPath File? toolJar Int? scatterSize @@ -300,7 +222,7 @@ task ScatterRegions { ~{preCommand} mkdir -p ~{outputDirPath} ~{toolCommand} \ - -R ~{refFasta} \ + -R ~{reference.fasta} \ -o ~{outputDirPath} \ ~{"-s " + scatterSize} \ ~{"-L " + regions} @@ -315,48 +237,13 @@ task ScatterRegions { } } -task Seqstat { - input { - String? preCommand - File? toolJar - File fastq - String outputFile - - Int memory = 4 - Float memoryMultiplier = 2.0 - } - - String toolCommand = if defined(toolJar) - then "java -Xmx" + memory + "G -jar " + toolJar - else "biopet-seqstat -Xmx" + memory + "G" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputFile}) - ~{toolCommand} \ - --fastq ~{fastq} \ - --output ~{outputFile} - } - - output { - File json = outputFile - } - - runtime { - memory: ceil(memory * memoryMultiplier) - } -} - task ValidateAnnotation { input { String? preCommand File? toolJar File? refRefflat File? gtfFile - File refFasta - File refFastaIndex - File refDict + Reference reference Int memory = 4 Float memoryMultiplier = 2.0 @@ -372,7 +259,7 @@ task ValidateAnnotation { ~{toolCommand} \ ~{"-r " + refRefflat} \ ~{"-g " + gtfFile} \ - -R ~{refFasta} + -R ~{reference.fasta} } output { @@ -388,8 +275,7 @@ task ValidateFastq { input { String? preCommand File? toolJar - File fastq1 - File? fastq2 + FastqPair inputFastq Int memory = 4 Float memoryMultiplier = 2.0 @@ -403,14 +289,13 @@ task ValidateFastq { set -e -o pipefail ~{preCommand} ~{toolCommand} \ - --fastq1 ~{fastq1} \ - ~{"--fastq2 " + fastq2} + --fastq1 ~{inputFastq.R1} \ + ~{"--fastq2 " + inputFastq.R2} } output { File stderr = stderr() - File validatedFastq1 = fastq1 - File? validatedFastq2 = fastq2 + FastqPair validatedFastq = inputFastq } runtime { @@ -422,11 +307,8 @@ task ValidateVcf { input { String? preCommand File? toolJar - File vcfFile - File vcfIndex - File refFasta - File refFastaIndex - File refDict + IndexedVcfFile vcf + Reference reference Int memory = 4 Float memoryMultiplier = 2.0 @@ -440,8 +322,8 @@ task ValidateVcf { set -e -o pipefail ~{preCommand} ~{toolCommand} \ - -i ~{vcfFile} \ - -R ~{refFasta} + -i ~{vcf.file} \ + -R ~{reference.fasta} } output { @@ -455,11 +337,8 @@ task ValidateVcf { task VcfStats { input { - File vcfFile - File vcfIndex - File refFasta - File refFastaIndex - File refDict + IndexedVcfFile vcf + Reference reference String outputDir File? intervals Array[String]+? infoTags @@ -493,8 +372,8 @@ task VcfStats { mkdir -p ~{outputDir} ~{preCommand} ~{toolCommand} \ - -I ~{vcfFile} \ - -R ~{refFasta} \ + -I ~{vcf.file} \ + -R ~{reference.fasta} \ -o ~{outputDir} \ -t ~{localThreads} \ ~{"--intervals " + intervals} \ diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl new file mode 100644 index 0000000000000000000000000000000000000000..61defae84ff0583b3515f16a8e6948bbc564c84a --- /dev/null +++ b/biopet/sampleconfig.wdl @@ -0,0 +1,79 @@ +version 1.0 + +task SampleConfig { + input { + File? toolJar + String? preCommand + Array[File]+ inputFiles + String keyFilePath + String? sample + String? library + String? readgroup + String? jsonOutputPath + String? tsvOutputPath + + Int memory = 4 + Float memoryMultiplier = 2.0 + } + + String toolCommand = if defined(toolJar) + then "java -Xmx" + memory + "G -jar " +toolJar + else "biopet-sampleconfig -Xmx" + memory + "G" + + command { + set -e -o pipefail + ~{preCommand} + mkdir -p . ~{"$(dirname " + jsonOutputPath + ")"} ~{"$(dirname " + tsvOutputPath + ")"} + ~{toolCommand} \ + -i ~{sep="-i " inputFiles} \ + ~{"--sample " + sample} \ + ~{"--library " + library} \ + ~{"--readgroup " + readgroup} \ + ~{"--jsonOutput " + jsonOutputPath} \ + ~{"--tsvOutput " + tsvOutputPath} \ + > ~{keyFilePath} + } + + output { + File keysFile = keyFilePath + File? jsonOutput = jsonOutputPath + File? tsvOutput = tsvOutputPath + } + + runtime { + memory: ceil(memory * memoryMultiplier) + } +} + +task SampleConfigCromwellArrays { + input { + File? toolJar + String? preCommand + Array[File]+ inputFiles + String outputPath + + Int memory = 4 + Float memoryMultiplier = 2.0 + } + + String toolCommand = if defined(toolJar) + then "java -Xmx" + memory + "G -jar " + toolJar + else "biopet-sampleconfig -Xmx" + memory + "G" + + command { + set -e -o pipefail + ~{preCommand} + mkdir -p $(dirname ~{outputPath}) + ~{toolCommand} CromwellArrays \ + -i ~{sep="-i " inputFiles} \ + ~{"-o " + outputPath} + } + + output { + File outputFile = outputPath + } + + runtime { + memory: ceil(memory * memoryMultiplier) + } +} diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl index c1e83a2f86919ad181dfaa4498c0d94b96752e15..a9c24dc688f03a1fdf00eebfab7c1be779b5c326 100644 --- a/biopet/seqstat.wdl +++ b/biopet/seqstat.wdl @@ -2,12 +2,13 @@ version 1.0 # Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 +import "../common.wdl" as common + task Generate { input { String? preCommand File? toolJar - File fastqR1 - File? fastqR2 + FastqPair fastq String outputFile String sample String library @@ -26,8 +27,8 @@ task Generate { ~{preCommand} mkdir -p $(dirname ~{outputFile}) ~{toolCommand} Generate \ - --fastqR1 ~{fastqR1} \ - ~{"--fastqR2 " + fastqR2} \ + --fastqR1 ~{fastq.R1} \ + ~{"--fastqR2 " + fastq.R2} \ --output ~{outputFile} \ ~{"--sample " + sample} \ ~{"--library " + library } \ diff --git a/bwa.wdl b/bwa.wdl index d8ce3e3239a202f93de5ce25fbacd3e75bde6371..9c717ccd789bdde15d9a5f033f7efa82d49386be 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -5,15 +5,14 @@ import "common.wdl" as common task Mem { input { String? preCommand - File inputR1 - File? inputR2 + FastqPair inputFastq BwaIndex bwaIndex String outputPath String? readgroup String? picardJar - Int threads = 1 + Int threads = 2 Int memory = 8 Int picardMemory = 4 } @@ -48,8 +47,8 @@ task Mem { bwa mem ~{"-t " + threads} \ ~{readgroupArg} \ ~{bwaIndex.fastaFile} \ - ~{inputR1} \ - ~{inputR2} \ + ~{inputFastq.R1} \ + ~{inputFastq.R2} \ ~{altCommand} \ | ~{picardCommand} } diff --git a/common.wdl b/common.wdl index 7bd1da697e678744c521a0432337a4e86714af7e..af3b66bf470be5da4cf72ca1bcb8d5cc77cacb88 100644 --- a/common.wdl +++ b/common.wdl @@ -24,13 +24,14 @@ task AppendToStringArray { task CheckFileMD5 { input { File file - String MD5sum + File md5 } command { set -e -o pipefail MD5SUM=$(md5sum ~{file} | cut -d ' ' -f 1) - [ $MD5SUM = ~{MD5sum} ] + MD5SUM_CORRECT=$(cat ~{md5} | grep ~{basename(file)} | cut -d ' ' -f 1) + [ $MD5SUM = $MD5SUM_CORRECT ] } } @@ -147,16 +148,18 @@ struct Reference { struct IndexedVcfFile { File file File index + File? md5sum } struct IndexedBamFile { File file File index + File? md5sum } struct FastqPair { File R1 - String? R1_md5 + File? R1_md5 File? R2 - String? R2_md5 + File? R2_md5 } diff --git a/cutadapt.wdl b/cutadapt.wdl index 0df1ced44f253b904586c7a89b7dc61436181551..a8160aa2df7039c55f0f10375698a14164553cc9 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -1,9 +1,10 @@ version 1.0 +import "common.wdl" + task Cutadapt { input { - File read1 - File? read2 + FastqPair inputFastq String read1output String? read2output String? format @@ -119,15 +120,17 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ - ~{read1} \ - ~{read2} \ + ~{inputFastq.R1} \ + ~{inputFastq.R2} \ ~{"> " + reportPath} } output{ + FastqPair cutOutput = object { + R1: read1output, + R2: read2output + } File report = if defined(reportPath) then select_first([reportPath]) else stdout() - File cutRead1 = read1output - File? cutRead2 = read2output File? tooLongOutput=tooLongOutputPath File? tooShortOutput=tooShortOutputPath File? untrimmedOutput=untrimmedOutputPath diff --git a/flash.wdl b/flash.wdl index 3d3eecdf3820be7c841ca44f0f4de580df5e61d1..728a237cc6c3d6eba8b19008d1404df97746913b 100644 --- a/flash.wdl +++ b/flash.wdl @@ -1,10 +1,11 @@ version 1.0 +import "common.wdl" as common + task Flash { input { String? preCommand - File inputR1 - File inputR2 + FastqPair inputFastq String outdirPath String outPrefix = "flash" Int? minOverlap @@ -25,13 +26,17 @@ task Flash { ~{true="--compress " false="" compress} \ ~{"--min-overlap=" + minOverlap} \ ~{"--max-overlap=" + maxOverlap} \ - ~{inputR1} ~{inputR2} + ~{inputFastq.R1} ~{inputFastq.R2} } output { File extendedFrags = outdirPath + "/" + outPrefix + ".extendedFrags.fastq.gz" File notCombined1 = outdirPath + "/" + outPrefix + ".notCombined_1.fastq.gz" File notCombined2 = outdirPath + "/" + outPrefix + ".notCombined_2.fastq.gz" + FastqPair notCombined = object { + R1: notCombined1, + R2: notCombined2 + } File hist = outdirPath + "/" + outPrefix + ".hist" File histogram = outdirPath + "/" + outPrefix + ".histogram" } diff --git a/gatk.wdl b/gatk.wdl index f252c7eca36c4676dfbe383d549936494c6183cb..99a5b2c472b2b812cdcdae9a0b6cfa5ff454599d 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1,18 +1,17 @@ version 1.0 +import "common.wdl" + # Apply Base Quality Score Recalibration (BQSR) model task ApplyBQSR { input { String? preCommand File? gatkJar - File inputBam - File inputBamIndex + IndexedBamFile inputBam String outputBamPath File recalibrationReport Array[File]+ sequenceGroupInterval - File refDict - File refFasta - File refFastaIndex + Reference reference Int memory = 4 Float memoryMultiplier = 3.0 @@ -29,8 +28,8 @@ task ApplyBQSR { ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ - -R ~{refFasta} \ - -I ~{inputBam} \ + -R ~{reference.fasta} \ + -I ~{inputBam.file} \ --use-original-qualities \ -O ~{outputBamPath} \ -bqsr ~{recalibrationReport} \ @@ -41,8 +40,11 @@ task ApplyBQSR { } output { - File recalibrated_bam = outputBamPath - File recalibrated_bam_checksum = outputBamPath + ".md5" + IndexedBamFile recalibratedBam = { + "file": outputBamPath, + "index": sub(outputBamPath, "\.bam$", ".bai"), + "md5": outputBamPath + ".md5" + } } runtime { @@ -55,28 +57,20 @@ task BaseRecalibrator { input { String? preCommand File? gatkJar - File inputBam - File inputBamIndex + IndexedBamFile inputBam String recalibrationReportPath Array[File]+ sequenceGroupInterval Array[File]? knownIndelsSitesVCFs - Array[File]? knownIndelsSitesIndices - File? dbsnpVCF - File? dbsnpVCFindex - File refDict - File refFasta - File refFastaIndex + Array[File]? knownIndelsSitesVCFIndexes + IndexedVcfFile? dbsnpVCF + Reference reference Int memory = 4 Float memoryMultiplier = 3.0 } Array[File]+ knownIndelsSitesVCFsArg = flatten([ select_first([knownIndelsSitesVCFs, []]), - select_all([dbsnpVCF]) - ]) - Array[File]+ knownIndelsSitesIndicesArg = flatten([ - select_first([knownIndelsSitesIndices, []]), - select_all([dbsnpVCFindex]) + [select_first([dbsnpVCF]).file] ]) String toolCommand = if defined(gatkJar) @@ -88,8 +82,8 @@ task BaseRecalibrator { ~{preCommand} ~{toolCommand} \ BaseRecalibrator \ - -R ~{refFasta} \ - -I ~{inputBam} \ + -R ~{reference.fasta} \ + -I ~{inputBam.file} \ --use-original-qualities \ -O ~{recalibrationReportPath} \ --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \ @@ -109,16 +103,14 @@ task CombineGVCFs { input { String? preCommand Array[File]+ gvcfFiles - Array[File]+ gvcfFileIndexes + Array[File]+ gvcfFilesIndex Array[File]+ intervals String outputPath String? gatkJar - File refFasta - File refFastaIndex - File refDict + Reference reference Int memory = 4 Float memoryMultiplier = 3.0 @@ -135,19 +127,21 @@ task CombineGVCFs { if [ ~{length(gvcfFiles)} -gt 1 ]; then ~{toolCommand} \ CombineGVCFs \ - -R ~{refFasta} \ + -R ~{reference.fasta} \ -O ~{outputPath} \ -V ~{sep=' -V ' gvcfFiles} \ -L ~{sep=' -L ' intervals} else # TODO this should be handeled in wdl ln -sf ~{gvcfFiles[0]} ~{outputPath} - ln -sf ~{gvcfFileIndexes[0]} ~{outputPath}.tbi + ln -sf ~{gvcfFiles[0]} ~{outputPath}.tbi fi } output { - File outputGVCF = outputPath - File outputGVCFindex = outputPath + ".tbi" + IndexedVcfFile outputVCF = { + "file": outputPath, + "index": outputPath + ".tbi" + } } runtime { @@ -192,25 +186,24 @@ task GatherBqsrReports { task GenotypeGVCFs { input { String? preCommand - File gvcfFiles - File gvcfFileIndexes + Array[File]+ gvcfFiles + Array[File]+ gvcfFilesIndex Array[File]+ intervals String outputPath String? gatkJar - File refFasta - File refFastaIndex - File refDict + Reference reference - File? dbsnpVCF - File? dbsnpVCFindex + IndexedVcfFile? dbsnpVCF - Int memory = 4 - Float memoryMultiplier =3.0 + Int memory = 6 + Float memoryMultiplier = 2.0 } + String dbsnpArg = if defined(dbsnpVCF) then "-D " + select_first([dbsnpVCF]).file else "" + String toolCommand = if defined(gatkJar) then "java -Xmx" + memory + "G -jar " + gatkJar else "gatk --java-options -Xmx" + memory + "G" @@ -220,19 +213,21 @@ task GenotypeGVCFs { ~{preCommand} ~{toolCommand} \ GenotypeGVCFs \ - -R ~{refFasta} \ + -R ~{reference.fasta} \ -O ~{outputPath} \ - ~{"-D " + dbsnpVCF} \ + ~{dbsnpArg} \ -G StandardAnnotation \ --only-output-calls-starting-in-intervals \ -new-qual \ - -V ~{gvcfFiles} \ + -V ~{sep=' -V ' gvcfFiles} \ -L ~{sep=' -L ' intervals} } output { - File outputVCF = outputPath - File outputVCFindex = outputPath + ".tbi" + IndexedVcfFile outputVCF = { + "file": outputPath, + "index": outputPath + ".tbi" + } } runtime{ @@ -248,19 +243,18 @@ task HaplotypeCallerGvcf { Array[File]+ inputBamsIndex Array[File]+ intervalList String gvcfPath - File refDict - File refFasta - File refFastaIndex + Reference reference Float contamination = 0.0 String? gatkJar - File? dbsnpVCF - File? dbsnpVCFindex + IndexedVcfFile? dbsnpVCF Int memory = 4 Float memoryMultiplier = 3 } + String dbsnpArg = if (defined(dbsnpVCF)) then "-D " + select_first([dbsnpVCF]).file else "" + String toolCommand = if defined(gatkJar) then "java -Xmx" + memory + "G -jar " + gatkJar else "gatk --java-options -Xmx" + memory + "G" @@ -270,18 +264,20 @@ task HaplotypeCallerGvcf { ~{preCommand} ~{toolCommand} \ HaplotypeCaller \ - -R ~{refFasta} \ + -R ~{reference.fasta} \ -O ~{gvcfPath} \ -I ~{sep=" -I " inputBams} \ -L ~{sep=' -L ' intervalList} \ - ~{"-D " + dbsnpVCF} \ + ~{dbsnpArg} \ -contamination ~{contamination} \ -ERC GVCF } output { - File outputGVCF = gvcfPath - File outputGVCFindex = gvcfPath + ".tbi" + IndexedVcfFile outputGVCF = { + "file": gvcfPath, + "index": gvcfPath + ".tbi" + } } runtime { @@ -294,10 +290,8 @@ task MuTect2 { String? preCommand Array[File]+ inputBams - Array[File]+ inputBamIndex - File refFasta - File refFastaIndex - File refDict + Array[File]+ inputBamsIndex + Reference reference String outputVcf String tumorSample String? normalSample @@ -317,7 +311,7 @@ task MuTect2 { ~{preCommand} ~{toolCommand} \ Mutect2 \ - -R ~{refFasta} \ + -R ~{reference.fasta} \ -I ~{sep=" -I " inputBams} \ -tumor ~{tumorSample} \ ~{"-normal " + normalSample} \ @@ -326,8 +320,10 @@ task MuTect2 { } output { - File vcfFile = outputVcf - File vcfIndex = outputVcf + ".tbi" + IndexedVcfFile vcfFile = { + "file": outputVcf, + "index": outputVcf + ".tbi" + } } runtime { @@ -339,11 +335,8 @@ task SplitNCigarReads { input { String? preCommand - File inputBam - File inputBamIndex - File refFasta - File refFastaIndex - File refDict + IndexedBamFile inputBam + Reference reference String outputBam String? gatkJar Array[File]+ intervals @@ -361,15 +354,17 @@ task SplitNCigarReads { ~{preCommand} ~{toolCommand} \ SplitNCigarReads \ - -I ~{inputBam} \ - -R ~{refFasta} \ + -I ~{inputBam.file} \ + -R ~{reference.fasta} \ -O ~{outputBam} \ -L ~{sep=' -L ' intervals} } output { - File bam = outputBam - File bamIndex = sub(outputBam, "\.bam$", ".bai") + IndexedBamFile bam = { + "file": outputBam, + "index": sub(outputBam, "\.bam$", ".bai") + } } runtime { diff --git a/htseq.wdl b/htseq.wdl index db399cc2fb35061e69d280eb2efbcf44e1ccfb6d..cdfc98421feb745340bb5dabe6908c670ef3c9d0 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -1,9 +1,12 @@ version 1.0 +import "common.wdl" + task HTSeqCount { input { String? preCommand - Array[File] alignmentFiles + Array[File]+ inputBams + Array[File]+ inputBamsIndex File gtfFile String outputTable String format = "bam" @@ -21,7 +24,7 @@ task HTSeqCount { -f ~{format} \ -r ~{order} \ -s ~{stranded} \ - ~{sep=" " alignmentFiles} \ + ~{sep=" " inputBams} \ ~{gtfFile} \ > ~{outputTable} } diff --git a/macs2.wdl b/macs2.wdl index 9364d9dc4f318bc657d42163925c758b592b2cfd..982802f6d33fde3c6eb5024a1262fced21f74382 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -1,9 +1,12 @@ version 1.0 +import "common.wdl" + task PeakCalling { input { String? preCommand - Array[File] bamFiles + Array[File]+ inputBams + Array[File]+ inputBamsIndex String outDir String sampleName Int threads = 1 @@ -15,7 +18,7 @@ task PeakCalling { set -e -o pipefail ~{preCommand} macs2 callpeak \ - --treatment ~{sep = ' ' bamFiles} \ + --treatment ~{sep = ' ' inputBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ ~{true='--nomodel' false='' nomodel} diff --git a/manta.wdl b/manta.wdl index a5b024c55f8f864ea97774a82081c4e35e089df6..dea9937c1a2466e03be8dee2b333787f6f8ac63a 100644 --- a/manta.wdl +++ b/manta.wdl @@ -1,13 +1,12 @@ version 1.0 +import "common.wdl" + task ConfigureSomatic { input { - File tumorBam - File tumorIndex - File? normalBam - File? normalIndex - File refFasta - File refFastaIndex + IndexedBamFile tumorBam + IndexedBamFile? normalBam + Reference reference String runDir File? callRegions File? callRegionsIndex @@ -20,13 +19,15 @@ task ConfigureSomatic { then installDir + "bin/configMata.py" else "configManta.py" + String normalArg = if (defined(normalBam)) then "--normalBam " + select_first([normalBam]).file else "" + command { set -e -o pipefail ~{preCommand} ~{toolCommand} \ - ~{"--normalBam " + normalBam} \ - ~{"--tumorBam " + tumorBam} \ - --referenceFasta ~{refFasta} \ + ~{normalArg} \ + ~{"--tumorBam " + tumorBam.file} \ + --referenceFasta ~{reference.fasta} \ ~{"--callRegions " + callRegions} \ --runDir ~{runDir} \ ~{true="--exome" false="" exome} @@ -53,19 +54,27 @@ task RunSomatic { } output { - File condidateSmallIndels = runDir + "/results/variants/candidateSmallIndels.vcf.gz" - File condidateSmallIndelsIndex = runDir + - "/results/variants/candidateSmallIndels.vcf.gz.tbi" - File candidateSV = runDir + "/results/variants/candidateSV.vcf.gz" - File candidateSVindex = runDir + "/results/variants/candidateSV.vcf.gz.tbi" - File tumorSV = if paired - then runDir + "/results/variants/somaticSV.vcf.gz" - else runDir + "/results/variants/tumorSV.vcf.gz" - File tumorSVindex = if paired - then runDir + "/results/variants/somaticSV.vcf.gz.tbi" - else runDir + "/results/variants/tumorSV.vcf.gz.tbi" - File? diploidSV = "/results/variants/diploidSV.vcf.gz" - File? diploidSVindex = "/results/variants/diploidSV.vcf.gz.tbi" + IndexedVcfFile condidateSmallIndels = object { + file: runDir + "/results/variants/candidateSmallIndels.vcf.gz", + index: runDir + "/results/variants/candidateSmallIndels.vcf.gz.tbi" + } + IndexedVcfFile candidateSV = object { + file: runDir + "/results/variants/candidateSV.vcf.gz", + index: runDir + "/results/variants/candidateSV.vcf.gz.tbi" + } + IndexedVcfFile tumorSV = if (paired) + then object { + file: runDir + "/results/variants/somaticSV.vcf.gz", + index: runDir + "/results/variants/somaticSV.vcf.gz.tbi" + } + else object { + file: runDir + "/results/variants/tumorSV.vcf.gz", + index: runDir + "/results/variants/tumorSV.vcf.gz.tbi" + } + + #FIXME: workaround for https://github.com/broadinstitute/cromwell/issues/4111 + File? diploidSV = runDir + "/results/variants/diploidSV.vcf.gz" + File? diploidSVindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" } runtime { diff --git a/picard.wdl b/picard.wdl index f0175ac9839dae4d22b957e2e29312d131ffaccd..6a52ee4c53a62a5b3b48a5cd1ed41838c2dd4e28 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1,5 +1,7 @@ version 1.0 +import "common.wdl" + task BedToIntervalList { input { String? preCommand @@ -40,11 +42,8 @@ task BedToIntervalList { task CollectMultipleMetrics { input { String? preCommand - File bamFile - File bamIndex - File refFasta - File refDict - File refFastaIndex + IndexedBamFile bamFile + Reference reference String basename Boolean collectAlignmentSummaryMetrics = true @@ -53,7 +52,7 @@ task CollectMultipleMetrics { Boolean meanQualityByCycle = true Boolean collectBaseDistributionByCycle = true Boolean collectGcBiasMetrics = true - #Boolean rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999 + #FIXME: Boolean rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999 Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true @@ -73,8 +72,8 @@ task CollectMultipleMetrics { ~{preCommand} ~{toolCommand} \ CollectMultipleMetrics \ - I=~{bamFile} \ - R=~{refFasta} \ + I=~{bamFile.file} \ + R=~{reference.fasta} \ O=~{basename} \ PROGRAM=null \ ~{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \ @@ -117,8 +116,7 @@ task CollectMultipleMetrics { task CollectRnaSeqMetrics { input { String? preCommand - File bamFile - File bamIndex + IndexedBamFile bamFile File refRefflat String basename String strandSpecificity = "NONE" @@ -139,7 +137,7 @@ task CollectRnaSeqMetrics { ~{preCommand} ~{toolCommand} \ CollectRnaSeqMetrics \ - I=~{bamFile} \ + I=~{bamFile.file} \ O=~{basename}.RNA_Metrics \ CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \ STRAND_SPECIFICITY=~{strandSpecificity} \ @@ -159,11 +157,8 @@ task CollectRnaSeqMetrics { task CollectTargetedPcrMetrics { input { String? preCommand - File bamFile - File bamIndex - File refFasta - File refDict - File refFastaIndex + IndexedBamFile bamFile + Reference reference File ampliconIntervals Array[File]+ targetIntervals String basename @@ -184,8 +179,8 @@ task CollectTargetedPcrMetrics { ~{preCommand} ~{toolCommand} \ CollectTargetedPcrMetrics \ - I=~{bamFile} \ - R=~{refFasta} \ + I=~{bamFile.file} \ + R=~{reference.fasta} \ AMPLICON_INTERVALS=~{ampliconIntervals} \ TARGET_INTERVALS=~{sep=" TARGET_INTERVALS=" targetIntervals} \ O=~{basename}.targetPcrMetrics \ @@ -208,9 +203,9 @@ task CollectTargetedPcrMetrics { task GatherBamFiles { input { String? preCommand - Array[File]+ input_bams - String output_bam_path - Int? compression_level + Array[File]+ inputBams + Array[File]+ inputBamsIndex + String outputBamPath String? picardJar Int memory = 4 @@ -226,16 +221,18 @@ task GatherBamFiles { ~{preCommand} ~{toolCommand} \ GatherBamFiles \ - INPUT=~{sep=' INPUT=' input_bams} \ - OUTPUT=~{output_bam_path} \ + INPUT=~{sep=' INPUT=' inputBams} \ + OUTPUT=~{outputBamPath} \ CREATE_INDEX=true \ CREATE_MD5_FILE=true } output { - File output_bam = "~{output_bam_path}" - File output_bam_index = sub(output_bam_path, ".bam$", ".bai") - File output_bam_md5 = "~{output_bam_path}.md5" + IndexedBamFile outputBam = object { + file: outputBamPath, + index: sub(outputBamPath, ".bam$", ".bai"), + md5: outputBamPath + ".md5" + } } runtime { @@ -247,10 +244,10 @@ task GatherBamFiles { task MarkDuplicates { input { String? preCommand - Array[File] input_bams - String output_bam_path - String metrics_path - Int? compression_level + Array[File]+ inputBams + Array[File] inputBamIndexes + String outputBamPath + String metricsPath String? picardJar Int memory = 4 @@ -273,24 +270,28 @@ task MarkDuplicates { command { set -e -o pipefail ~{preCommand} - mkdir -p $(dirname ~{output_bam_path}) + mkdir -p $(dirname ~{outputBamPath}) ~{toolCommand} \ MarkDuplicates \ - INPUT=~{sep=' INPUT=' input_bams} \ - OUTPUT=~{output_bam_path} \ - METRICS_FILE=~{metrics_path} \ + INPUT=~{sep=' INPUT=' inputBams} \ + OUTPUT=~{outputBamPath} \ + METRICS_FILE=~{metricsPath} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ CLEAR_DT="false" \ CREATE_INDEX=true \ - ADD_PG_TAG_TO_READS=false + ADD_PG_TAG_TO_READS=false \ + CREATE_MD5_FILE=true } output { - File output_bam = output_bam_path - File output_bam_index = sub(output_bam_path, ".bam$", ".bai") - File duplicate_metrics = metrics_path + IndexedBamFile outputBam = object { + file: outputBamPath, + index: sub(outputBamPath, ".bam$", ".bai"), + md5: outputBamPath + ".md5" + } + File metricsFile = metricsPath } runtime { @@ -304,7 +305,7 @@ task MergeVCFs { String? preCommand Array[File] inputVCFs Array[File] inputVCFsIndexes - String outputVCFpath + String outputVcfPath Int? compressionLevel String? picardJar @@ -325,12 +326,14 @@ task MergeVCFs { ~{toolCommand} \ MergeVcfs \ INPUT=~{sep=' INPUT=' inputVCFs} \ - OUTPUT=~{outputVCFpath} + OUTPUT=~{outputVcfPath} } output { - File outputVCF = outputVCFpath - File outputVCFindex = outputVCFpath + ".tbi" + IndexedVcfFile outputVcf = object { + file: outputVcfPath, + index: outputVcfPath + ".tbi" + } } runtime { @@ -341,7 +344,7 @@ task MergeVCFs { task SamToFastq { input { String? preCommand - File inputBam + IndexedBamFile inputBam String outputRead1 String? outputRead2 String? outputUnpaired @@ -360,7 +363,7 @@ task SamToFastq { ~{preCommand} ~{toolCommand} \ SamToFastq \ - I=~{inputBam} \ + I=~{inputBam.file} \ ~{"FASTQ=" + outputRead1} \ ~{"SECOND_END_FASTQ=" + outputRead2} \ ~{"UNPAIRED_FASTQ=" + outputUnpaired} @@ -422,8 +425,8 @@ task SortVcf { String? picardJar Array[File]+ vcfFiles - String outputVcf - File? sequenceDict + String outputVcfPath + File? dict Int memory = 4 Float memoryMultiplier = 3.0 @@ -439,13 +442,15 @@ task SortVcf { ~{toolCommand} \ SortVcf \ I=~{sep=" I=" vcfFiles} \ - ~{"SEQUENCE_DICTIONARY=" + sequenceDict} \ - O=~{outputVcf} + ~{"SEQUENCE_DICTIONARY=" + dict} \ + O=~{outputVcfPath} } output { - File vcfFile = outputVcf - File vcfIndex = outputVcf + ".tbi" + IndexedVcfFile outputVcf = object { + file: outputVcfPath, + index: outputVcfPath + ".tbi" + } } runtime { diff --git a/samtools.wdl b/samtools.wdl index 58bdcfd85b61264990af124e847a76faceefffef..8bdf771209c658f69892d71c294194f1b27064ee 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -1,5 +1,7 @@ version 1.0 +import "common.wdl" + task BgzipAndIndex { input { File inputFile @@ -23,20 +25,21 @@ task BgzipAndIndex { task Index { input { String? preCommand - File bamFilePath - String? bamIndexPath + File bamFile + String bamIndexPath } command { set -e -o pipefail ~{preCommand} - samtools index ~{bamFilePath} ~{bamIndexPath} + samtools index ~{bamFile} ~{bamIndexPath} } output { - File indexFile = if defined(bamIndexPath) - then select_first([bamIndexPath]) - else bamFilePath + ".bai" + IndexedBamFile outputBam = object { + file: bamFile, + index: bamIndexPath + } } } diff --git a/strelka.wdl b/strelka.wdl index 959fabf8f5a4783986c9e9b56a4b20dd7963e4e4..c47345ed6c4b75991121f49155ce3369d99b23e8 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -1,5 +1,7 @@ version 1.0 +import "common.wdl" as common + task ConfigureGermline { input { String? preCommand @@ -7,8 +9,7 @@ task ConfigureGermline { String runDir Array[File]+ bams Array[File]+ indexes - File refFasta - File refFastaIndex + Reference reference File? callRegions File? callRegionsIndex Boolean exome = false @@ -24,7 +25,7 @@ task ConfigureGermline { ~{preCommand} ~{toolCommand} \ --bam ~{sep=" --bam " bams} \ - --ref ~{refFasta} \ + --ref ~{reference.fasta} \ --runDir ~{runDir} \ ~{"--callRegions " + callRegions} \ ~{true="--exome" false="" exome} \ @@ -41,16 +42,12 @@ task ConfigureSomatic { String? preCommand String? installDir String runDir - File normalBam - File normalIndex - File tumorBam - File tumorIndex - File refFasta - File refFastaIndex + IndexedBamFile normalBam + IndexedBamFile tumorBam + Reference reference File? callRegions File? callRegionsIndex - File? indelCandidates - File? indelCandidatesIndex + IndexedVcfFile? indelCandidates Boolean exome = false } @@ -58,16 +55,18 @@ task ConfigureSomatic { then installDir + "bin/configureStrelkaSomaticWorkflow.py" else "configureStrelkaSomaticWorkflow.py" + String indelCandidatesArg = if (defined(indelCandidates)) then "--indelCandidates " + select_first([indelCandidates]).file else "" + command { set -e -o pipefail ~{preCommand} ~{toolCommand} \ - --normalBam ~{normalBam} \ - --tumorBam ~{tumorBam} \ - --ref ~{refFasta} \ + --normalBam ~{normalBam.file} \ + --tumorBam ~{tumorBam.file} \ + --ref ~{reference.fasta} \ --runDir ~{runDir} \ ~{"--callRegions " + callRegions} \ - ~{"--indelCandidates " + indelCandidates} \ + ~{indelCandidatesArg} \ ~{true="--exome" false="" exome} \ } @@ -82,6 +81,7 @@ task Run { Int cores = 1 Int memory = 4 Boolean somatic = true + #FIXME: This task does not have input files } command { diff --git a/stringtie.wdl b/stringtie.wdl index 97455da498f722ddf05008fc7102c9eef360335e..b26109d1b18980b902d552baefc06aadc3345b86 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -1,9 +1,11 @@ version 1.0 +import "common.wdl" + task Stringtie { input { String? preCommand - File alignedReads + IndexedBamFile bamFile File? referenceGtf Int threads = 1 String assembledTranscriptsFile @@ -23,7 +25,7 @@ task Stringtie { ~{true="fr" false="" secondStranded} \ -o ~{assembledTranscriptsFile} \ ~{"-A " + geneAbundanceFile} \ - ~{alignedReads} + ~{bamFile.file} } output { diff --git a/vardict.wdl b/vardict.wdl index c79a768ffc1fc0a72c9c8506e20338a952a91d4d..989fc25417e2a77601dbeb18a06d6480d6b486b2 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -1,18 +1,17 @@ version 1.0 +import "common.wdl" + task VarDict { input { String? installDir Boolean useJavaVersion = true String tumorSampleName - File tumorBam - File tumorIndex + IndexedBamFile tumorBam String? normalSampleName - File? normalBam - File? normalIndex - File refFasta - File refFastaIndex + IndexedBamFile? normalBam + Reference reference File bedFile String outputVcf @@ -26,6 +25,8 @@ task VarDict { Float memoryMultiplier = 2.0 } + String normalArg = if (defined(normalBam)) then "|" + select_first([normalBam]).file else "" + String toolCommand = if defined(installDir) then installDir + "/VarDict" else if useJavaVersion @@ -37,9 +38,9 @@ task VarDict { export JAVA_OPTS="-Xmx~{memory}G" ~{preCommand} ~{toolCommand} \ - -G ~{refFasta} \ + -G ~{reference.fasta} \ -N ~{tumorSampleName} \ - -b "~{tumorBam}~{"|" + normalBam}" \ + -b "~{tumorBam.file}~{normalArg}" \ ~{true="" false="-z" defined(normalBam)} \ -c ~{chromosomeColumn} \ -S ~{startColumn} \ @@ -52,10 +53,14 @@ task VarDict { -N "~{tumorSampleName}~{"|" + normalSampleName}" \ ~{true="" false="-E" defined(normalBam)} | \ bgzip -c > ~{outputVcf} + tabix -p vcf ~{outputVcf} } output { - File vcfFile = outputVcf + IndexedVcfFile vcfFile = object { + file: outputVcf, + index: outputVcf + ".tbi" + } } runtime {