diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 5c9242b49e7acaf5b0e052a52fa1fdd84d4a2a1c..149d8bfdf33d75a2b1a86b7d2ab06294df7410ae 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -452,6 +452,45 @@ task VcfStats { "/sample_compare/genotype.non_ref.abs.tsv" File? sampleCompareGenotypeRefAbs = outputDir + "/sample_compare/genotype.ref.abs.tsv" File? sampleCompareGenotypeRel = outputDir + "/sample_compare/genotype.rel.tsv" + # A glob is easier, but duplicates all the outputs + Array[File] allStats = select_all([ + general, + genotype, + sampleDistributionAvailableAggregate, + sampleDistributionAvailable, + sampleDistributionCalledAggregate, + sampleDistributionCalled, + sampleDistributionFilteredAggregate, + sampleDistributionFiltered, + sampleDistributionHetAggregate, + sampleDistributionHetNoNRefAggregate, + sampleDistributionHetNonRef, + sampleDistributionHet, + sampleDistributionHomAggregate, + sampleDistributionHomRefAggregate, + sampleDistributionHomRef, + sampleDistributionHom, + sampleDistributionHomVarAggregate, + sampleDistributionHomVar, + sampleDistributionMixedAggregate, + sampleDistributionMixed, + sampleDistributionNoCallAggregate, + sampleDistributionNoCall, + sampleDistributionNonInformativeAggregate, + sampleDistributionNonInformative, + sampleDistributionToalAggregate, + sampleDistributionTotal, + sampleDistributionVariantAggregate, + sampleDistributionVariant, + sampleCompareAlleleAbs, + sampleCompareAlleleNonRefAbs, + sampleCompareAlleleRefAbs, + sampleCompareAlleleRel, + sampleCompareGenotypeAbs, + sampleCompareGenotypeNonRefAbs, + sampleCompareGenotypeRefAbs, + sampleCompareGenotypeRel + ]) } runtime { diff --git a/bwa.wdl b/bwa.wdl index edc8206f8124fa0506a7142260d155befea47f05..e45c4dbc31de14114c9e3feb84938fa62e3ff3d7 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -1,7 +1,5 @@ version 1.0 -import "common.wdl" as common - task Mem { input { File read1 @@ -35,10 +33,8 @@ task Mem { } output { - IndexedBamFile bamFile = object { - file: outputPath, - index: sub(outputPath, ".bam$", ".bai") - } + File outputBam = outputPath + File outputBamIndex = sub(outputPath, "\.bam$", ".bai") } runtime{ diff --git a/common.wdl b/common.wdl index e684dfbcbd0eaf19992abd544075a6c9d516a850..2d099aaa9d921e453b3b2b500acb9d953bcb40e2 100644 --- a/common.wdl +++ b/common.wdl @@ -25,8 +25,10 @@ task CheckFileMD5 { input { File file String md5 - # Version not that important as long as it is stable. - String dockerTag = "5.0.2" + # By default cromwell expects /bin/bash to be present in the container + # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) + # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) + String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } command { @@ -36,8 +38,7 @@ task CheckFileMD5 { } runtime { - # Apparently there is a bash container for this sort of stuff. - docker: "bash:" + dockerTag + docker: dockerImage } } diff --git a/gatk.wdl b/gatk.wdl index a02a2069436e065f6f9282fc2deefe7816a694a9..e76a93e8c979c6ebee0e8857d669da2e13827458 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1,15 +1,16 @@ version 1.0 -import "common.wdl" - # Apply Base Quality Score Recalibration (BQSR) model task ApplyBQSR { input { - IndexedBamFile inputBam + File inputBam + File inputBamIndex String outputBamPath File recalibrationReport Array[File]+ sequenceGroupInterval - Reference reference + File referenceFasta + File referenceFastaDict + File referenceFastaFai Int memory = 4 Float memoryMultiplier = 3.0 @@ -23,8 +24,8 @@ task ApplyBQSR { ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ - -R ~{reference.fasta} \ - -I ~{inputBam.file} \ + -R ~{referenceFasta} \ + -I ~{inputBam} \ --use-original-qualities \ -O ~{outputBamPath} \ -bqsr ~{recalibrationReport} \ @@ -35,11 +36,9 @@ task ApplyBQSR { } output { - IndexedBamFile recalibratedBam = { - "file": outputBamPath, - "index": sub(outputBamPath, "\.bam$", ".bai"), - "md5": outputBamPath + ".md5" - } + File recalibratedBam = outputBamPath + File recalibratedBamIndex = sub(outputBamPath, "\.bam$", ".bai") + File recalibratedBamMd5 = outputBamPath + ".md5" } runtime { @@ -51,13 +50,17 @@ task ApplyBQSR { # Generate Base Quality Score Recalibration (BQSR) model task BaseRecalibrator { input { - IndexedBamFile inputBam + File inputBam + File inputBamIndex String recalibrationReportPath Array[File]+ sequenceGroupInterval Array[File]? knownIndelsSitesVCFs Array[File]? knownIndelsSitesVCFIndexes - IndexedVcfFile? dbsnpVCF - Reference reference + File? dbsnpVCF + File? dbsnpVCFIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai Int memory = 4 Float memoryMultiplier = 3.0 @@ -66,7 +69,7 @@ task BaseRecalibrator { Array[File]+ knownIndelsSitesVCFsArg = flatten([ select_first([knownIndelsSitesVCFs, []]), - [select_first([dbsnpVCF]).file] + [select_first([dbsnpVCF])] ]) command { @@ -74,8 +77,8 @@ task BaseRecalibrator { mkdir -p $(dirname ~{recalibrationReportPath}) gatk --java-options -Xmx~{memory}G \ BaseRecalibrator \ - -R ~{reference.fasta} \ - -I ~{inputBam.file} \ + -R ~{referenceFasta} \ + -I ~{inputBam} \ --use-original-qualities \ -O ~{recalibrationReportPath} \ --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \ @@ -98,7 +101,9 @@ task CombineGVCFs { Array[File]+ gvcfFilesIndex Array[File]+ intervals String outputPath - Reference reference + File referenceFasta + File referenceFastaDict + File referenceFastaFai Int memory = 4 Float memoryMultiplier = 3.0 @@ -110,17 +115,15 @@ task CombineGVCFs { mkdir -p $(dirname ~{outputPath}) gatk --java-options -Xmx~{memory}G \ CombineGVCFs \ - -R ~{reference.fasta} \ + -R ~{referenceFasta} \ -O ~{outputPath} \ -V ~{sep=' -V ' gvcfFiles} \ -L ~{sep=' -L ' intervals} } output { - IndexedVcfFile outputVCF = { - "file": outputPath, - "index": outputPath + ".tbi" - } + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" } runtime { @@ -165,26 +168,24 @@ task GenotypeGVCFs { Array[File]+ gvcfFilesIndex Array[File]+ intervals String outputPath - Reference reference - IndexedVcfFile? dbsnpVCF - + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File? dbsnpVCF + File? dbsnpVCFIndex Int memory = 6 Float memoryMultiplier = 2.0 String dockerTag = "4.1.0.0--0" } - File dbsnpFile = if (defined(dbsnpVCF)) - then select_first([dbsnpVCF]).file - else "" - command { set -e mkdir -p $(dirname ~{outputPath}) gatk --java-options -Xmx~{memory}G \ GenotypeGVCFs \ - -R ~{reference.fasta} \ + -R ~{referenceFasta} \ -O ~{outputPath} \ - ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpFile} \ + ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \ -G StandardAnnotation \ --only-output-calls-starting-in-intervals \ -new-qual \ @@ -193,10 +194,9 @@ task GenotypeGVCFs { } output { - IndexedVcfFile outputVCF = { - "file": outputPath, - "index": outputPath + ".tbi" - } + File outputVCF = outputPath + File outputVCFIndex = outputPath + ".tbi" + } runtime { @@ -212,38 +212,34 @@ task HaplotypeCallerGvcf { Array[File]+ inputBamsIndex Array[File]+ intervalList String gvcfPath - Reference reference + File referenceFasta + File referenceFastaIndex + File referenceFastaDict Float contamination = 0.0 - IndexedVcfFile? dbsnpVCF - + File? dbsnpVCF + File? dbsnpVCFIndex Int memory = 4 Float memoryMultiplier = 3 String dockerTag = "4.1.0.0--0" } - File dbsnpFile = if (defined(dbsnpVCF)) - then select_first([dbsnpVCF]).file - else "" - command { set -e mkdir -p $(dirname ~{gvcfPath}) gatk --java-options -Xmx~{memory}G \ HaplotypeCaller \ - -R ~{reference.fasta} \ + -R ~{referenceFasta} \ -O ~{gvcfPath} \ -I ~{sep=" -I " inputBams} \ -L ~{sep=' -L ' intervalList} \ - ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpFile} \ + ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \ -contamination ~{contamination} \ -ERC GVCF } output { - IndexedVcfFile outputGVCF = { - "file": gvcfPath, - "index": gvcfPath + ".tbi" - } + File outputGVCF = gvcfPath + File outputGVCFIndex = gvcfPath + ".tbi" } runtime { @@ -256,7 +252,9 @@ task MuTect2 { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Reference reference + File referenceFasta + File referenceFastaDict + File referenceFastaFai String outputVcf String tumorSample String? normalSample @@ -272,7 +270,7 @@ task MuTect2 { mkdir -p $(dirname ~{outputVcf}) gatk --java-options -Xmx~{memory}G \ Mutect2 \ - -R ~{reference.fasta} \ + -R ~{referenceFasta} \ -I ~{sep=" -I " inputBams} \ -tumor ~{tumorSample} \ ~{"-normal " + normalSample} \ @@ -281,10 +279,8 @@ task MuTect2 { } output { - IndexedVcfFile vcfFile = { - "file": outputVcf, - "index": outputVcf + ".tbi" - } + File vcfFile = outputVcf + File vcfFileIndex = outputVcf + ".tbi" } runtime { @@ -295,8 +291,11 @@ task MuTect2 { task SplitNCigarReads { input { - IndexedBamFile inputBam - Reference reference + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai String outputBam Array[File]+ intervals @@ -310,17 +309,15 @@ task SplitNCigarReads { mkdir -p $(dirname ~{outputBam}) gatk --java-options -Xmx~{memory}G \ SplitNCigarReads \ - -I ~{inputBam.file} \ - -R ~{reference.fasta} \ + -I ~{inputBam} \ + -R ~{referenceFasta} \ -O ~{outputBam} \ -L ~{sep=' -L ' intervals} } output { - IndexedBamFile bam = { - "file": outputBam, - "index": sub(outputBam, "\.bam$", ".bai") - } + File bam = outputBam + File bamIndex = sub(outputBam, "\.bam$", ".bai") } runtime { diff --git a/picard.wdl b/picard.wdl index 78c7e8bd597fa31d9f8723f3a4cb0fc2d5711c92..2628ca8125e78087b4fc57e98118e15a9cde47a0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1,7 +1,5 @@ version 1.0 -import "common.wdl" - task BedToIntervalList { input { File bedFile @@ -35,8 +33,11 @@ task BedToIntervalList { task CollectMultipleMetrics { input { - IndexedBamFile bamFile - Reference reference + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai String basename Boolean collectAlignmentSummaryMetrics = true @@ -60,8 +61,8 @@ task CollectMultipleMetrics { mkdir -p $(dirname "~{basename}") picard -Xmx~{memory}G \ CollectMultipleMetrics \ - I=~{bamFile.file} \ - R=~{reference.fasta} \ + I=~{inputBam} \ + R=~{referenceFasta} \ O=~{basename} \ PROGRAM=null \ ~{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \ @@ -94,6 +95,26 @@ task CollectMultipleMetrics { File qualityDistribution = basename + ".quality_distribution_metrics" File qualityDistributionPdf = basename + ".quality_distribution.pdf" File qualityYield = basename + ".quality_yield_metrics" + # Using a glob is easier. But will lead to very ugly output directories. + Array[File] allStats = select_all([ + alignmentSummary, + baitBiasDetail, + baitBiasSummary, + baseDistributionByCycle, + baseDistributionByCyclePdf, + errorSummary, + gcBiasDetail, + gcBiasPdf, + gcBiasSummary, + insertSizeHistogramPdf, + insertSize, + preAdapterDetail, + qualityByCycle, + qualityByCyclePdf, + qualityDistribution, + qualityDistributionPdf, + qualityYield + ]) } runtime { @@ -106,7 +127,8 @@ task CollectMultipleMetrics { task CollectRnaSeqMetrics { input { - IndexedBamFile bamFile + File inputBam + File inputBamIndex File refRefflat String basename String strandSpecificity = "NONE" @@ -121,7 +143,7 @@ task CollectRnaSeqMetrics { mkdir -p $(dirname "~{basename}") picard -Xmx~{memory}G \ CollectRnaSeqMetrics \ - I=~{bamFile.file} \ + I=~{inputBam} \ O=~{basename}.RNA_Metrics \ CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \ STRAND_SPECIFICITY=~{strandSpecificity} \ @@ -143,8 +165,11 @@ task CollectRnaSeqMetrics { task CollectTargetedPcrMetrics { input { - IndexedBamFile bamFile - Reference reference + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai File ampliconIntervals Array[File]+ targetIntervals String basename @@ -159,8 +184,8 @@ task CollectTargetedPcrMetrics { mkdir -p $(dirname "~{basename}") picard -Xmx~{memory}G \ CollectTargetedPcrMetrics \ - I=~{bamFile.file} \ - R=~{reference.fasta} \ + I=~{inputBam} \ + R=~{referenceFasta} \ AMPLICON_INTERVALS=~{ampliconIntervals} \ TARGET_INTERVALS=~{sep=" TARGET_INTERVALS=" targetIntervals} \ O=~{basename}.targetPcrMetrics \ @@ -194,6 +219,7 @@ task GatherBamFiles { command { set -e + mkdir -p $(dirname ~{outputBamPath}) picard -Xmx~{memory}G \ GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ @@ -203,11 +229,9 @@ task GatherBamFiles { } output { - IndexedBamFile outputBam = object { - file: outputBamPath, - index: sub(outputBamPath, ".bam$", ".bai"), - md5: outputBamPath + ".md5" - } + File outputBam = outputBamPath + File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") + File outputBamMd5 = outputBamPath + ".md5" } runtime { @@ -220,7 +244,7 @@ task GatherVcfs { input { Array[File]+ inputVcfs Array[File]+ inputVcfIndexes - String outputVcfPath + String outputVcfPath = "out.vcf.gz" Int memory = 4 Float memoryMultiplier = 3.0 @@ -229,6 +253,7 @@ task GatherVcfs { command { set -e + mkdir -p $(dirname ~{outputVcfPath}) picard -Xmx~{memory}G \ GatherVcfs \ INPUT=~{sep=' INPUT=' inputVcfs} \ @@ -287,11 +312,9 @@ task MarkDuplicates { } output { - IndexedBamFile outputBam = object { - file: outputBamPath, - index: sub(outputBamPath, ".bam$", ".bai"), - md5: outputBamPath + ".md5" - } + File outputBam = outputBamPath + File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") + File outputBamMd5 = outputBamPath + ".md5" File metricsFile = metricsPath } @@ -326,10 +349,8 @@ task MergeVCFs { } output { - IndexedVcfFile outputVcf = object { - file: outputVcfPath, - index: outputVcfPath + ".tbi" - } + File outputVcf = outputVcfPath + File outputVcfIndex = outputVcfPath + ".tbi" } runtime { @@ -340,7 +361,8 @@ task MergeVCFs { task SamToFastq { input { - IndexedBamFile inputBam + File inputBam + File inputBamIndex String outputRead1 String? outputRead2 String? outputUnpaired @@ -354,7 +376,7 @@ task SamToFastq { set -e picard -Xmx~{memory}G \ SamToFastq \ - I=~{inputBam.file} \ + I=~{inputBam} \ ~{"FASTQ=" + outputRead1} \ ~{"SECOND_END_FASTQ=" + outputRead2} \ ~{"UNPAIRED_FASTQ=" + outputUnpaired} @@ -429,10 +451,8 @@ task SortVcf { } output { - IndexedVcfFile outputVcf = object { - file: outputVcfPath, - index: outputVcfPath + ".tbi" - } + File outputVcf = outputVcfPath + File outputVcfIndex = outputVcfPath + ".tbi" } runtime { diff --git a/samtools.wdl b/samtools.wdl index a14876d05ad814741c752e3af94a2e4d326a549b..a78f55353d5235463b8e3d9f9e255858c6861676 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -1,7 +1,5 @@ version 1.0 -import "common.wdl" - task BgzipAndIndex { input { File inputFile @@ -42,10 +40,8 @@ task Index { } output { - IndexedBamFile outputBam = object { - file: bamFile, - index: bamIndexPath - } + File indexedBam = bamFile + File index = bamIndexPath } runtime { @@ -56,18 +52,23 @@ task Index { task Merge { input { Array[File]+ bamFiles - String outputBamPath + String outputBamPath = "merged.bam" Boolean force = true String dockerTag = "1.8--h46bd0b3_5" } + String indexPath = sub(outputBamPath, "\.bam$",".bai") command { + set -e + mkdir -p $(dirname ~{outputBamPath}) samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} + samtools index ~{outputBamPath} ~{indexPath} } output { File outputBam = outputBamPath + File outputBamIndex = indexPath } runtime { @@ -177,17 +178,25 @@ task Fastq { task Tabix { input { - String inputFile + File inputFile + String outputFilePath = "indexed.vcf.gz" String type = "vcf" String dockerTag = "0.2.6--ha92aebf_0" } - + # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast. command { - tabix ~{inputFile} -p ~{type} + set -e + mkdir -p $(dirname ~{outputFilePath}) + if [ ! -f ~{outputFilePath} ] + then + ln ~{inputFile} ~{outputFilePath} + fi + tabix ~{outputFilePath} -p ~{type} } output { - File index = inputFile + ".tbi" + File indexedFile = outputFilePath + File index = outputFilePath + ".tbi" } runtime {