diff --git a/biopet.wdl b/biopet.wdl index 93dab22236fd00f9343d353c9048d1c1c6e21706..7d0d0a5fcfe5f1cb9711a3da6b3665f34546e3a8 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -1,3 +1,70 @@ +# PLEASE ADD TASKS IN ALPHABETIC ORDER. +# This makes searching a lot easier. +task BaseCounter { + String? preCommand + String tool_jar #Should this be of type File? + File bam + File refFlat + String outputDir + String prefix + + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 12.0])) + command { + set -e -o pipefail + ${preCommand} + mkdir -p ${outputDir} + java -Xmx${mem}G -jar ${tool_jar} \ + -b ${bam} \ + -r ${refFlat} \ + -o ${outputDir} \ + -p ${prefix} + } + + output { + File exonAntisense = outputDir + "/" + prefix + ".base.exon.antisense.counts" + File exon = outputDir + "/" + prefix + ".base.exon.counts" + File exonMergeAntisense = outputDir + "/" + prefix + ".base.exon.merge.antisense.counts" + File exonMerge = outputDir + "/" + prefix + ".base.exon.merge.counts" + File exonMergeSense = outputDir + "/" + prefix + ".base.exon.merge.sense.counts" + File exonSense = outputDir + "/" + prefix + ".base.exon.sense.counts" + File geneAntisense = outputDir + "/" + prefix + ".base.gene.antisense.counts" + File gene = outputDir + "/" + prefix + ".base.gene.counts" + File geneExonicAntisense = outputDir + "/" + prefix + ".base.gene.exonic.antisense.counts" + File geneExonic = outputDir + "/" + prefix + ".base.gene.exonic.counts" + File geneExonicSense = outputDir + "/" + prefix + ".base.gene.exonic.sense.counts" + File geneIntronicAntisense = outputDir + "/" + prefix + ".base.gene.intronic.antisense.counts" + File geneIntronic = outputDir + "/" + prefix + ".base.gene.intronic.counts" + File geneIntronicSense = outputDir + "/" + prefix + ".base.gene.intronic.sense.counts" + File geneSense = outputDir + "/" + prefix + ".base.gene.sense.counts" + File intronAntisense = outputDir + "/" + prefix + ".base.intron.antisense.counts" + File intron = outputDir + "/" + prefix + ".base.intron.counts" + File intronMergeAntisense = outputDir + "/" + prefix + ".base.intron.merge.antisense.counts" + File intronMerge = outputDir + "/" + prefix + ".base.intron.merge.counts" + File intronMergeSense = outputDir + "/" + prefix + ".base.intron.merge.sense.counts" + File intronSense = outputDir + "/" + prefix + ".base.intron.sense.counts" + File metaExonsNonStranded = outputDir + "/" + prefix + ".base.metaexons.non_stranded.counts" + File metaExonsStrandedAntisense = outputDir + "/" + prefix + ".base.metaexons.stranded.antisense.counts" + File metaExonsStranded = outputDir + "/" + prefix + ".base.metaexons.stranded.counts" + File metaExonsStrandedSense = outputDir + "/" + prefix + ".base.metaexons.stranded.sense.counts" + File transcriptAntisense = outputDir + "/" + prefix + ".base.transcript.antisense.counts" + File transcript = outputDir + "/" + prefix + ".base.transcript.counts" + File transcriptExonicAntisense = outputDir + "/" + prefix + ".base.transcript.exonic.antisense.counts" + File transcriptExonic = outputDir + "/" + prefix + ".base.transcript.exonic.counts" + File transcriptExonicSense = outputDir + "/" + prefix + ".base.transcript.exonic.sense.counts" + File transcriptIntronicAntisense = outputDir + "/" + prefix + ".base.transcript.intronic.antisense.counts" + File transcriptIntronic = outputDir + "/" + prefix + ".base.transcript.intronic.counts" + File transcriptIntronicSense = outputDir + "/" + prefix + ".base.transcript.intronic.sense.counts" + File transcriptSense = outputDir + "/" + prefix + ".base.transcript.sense.counts" + } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } +} + task FastqSplitter { String? preCommand File inputFastq @@ -23,36 +90,30 @@ task FastqSplitter { } } -task ScatterRegions { +task FastqSync { String? preCommand - File ref_fasta - File ref_dict - String outputDirPath - String tool_jar - Int? scatterSize - File? regions - - Float? memory - Float? memoryMultiplier - - Int mem = ceil(select_first([memory, 4.0])) + File ref1 + File ref2 + File in1 + File in2 + String out1path + String out2path + File tool_jar command { set -e -o pipefail ${preCommand} - mkdir -p ${outputDirPath} - java -Xmx${mem}G -jar ${tool_jar} \ - -R ${ref_fasta} \ - -o ${outputDirPath} \ - ${"-s " + scatterSize} \ - ${"-L " + regions} + mkdir -p $(dirname ${out1path}) $(dirname ${out2path}) + java -jar ${tool_jar} \ + --in1 ${in1} \ + --in2 ${in2} \ + --ref1 ${ref1} \ + --ref2 ${ref2} \ + --out1 ${out1path} \ + --out2 ${out2path} } - output { - Array[File] scatters = glob(outputDirPath + "/scatter-*.bed") - } - - runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.0])) + File out1 = out1path + File out2 = out2path } } @@ -94,67 +155,36 @@ task SampleConfig { } } -task BaseCounter { +task ScatterRegions { String? preCommand - String tool_jar #Should this be of type File? - File bam - File refFlat - String outputDir - String prefix + File ref_fasta + File ref_dict + String outputDirPath + String tool_jar + Int? scatterSize + File? regions Float? memory Float? memoryMultiplier - Int mem = ceil(select_first([memory, 12.0])) + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - mkdir -p ${outputDir} + mkdir -p ${outputDirPath} java -Xmx${mem}G -jar ${tool_jar} \ - -b ${bam} \ - -r ${refFlat} \ - -o ${outputDir} \ - -p ${prefix} + -R ${ref_fasta} \ + -o ${outputDirPath} \ + ${"-s " + scatterSize} \ + ${"-L " + regions} } output { - File exonAntisense = outputDir + "/" + prefix + ".base.exon.antisense.counts" - File exon = outputDir + "/" + prefix + ".base.exon.counts" - File exonMergeAntisense = outputDir + "/" + prefix + ".base.exon.merge.antisense.counts" - File exonMerge = outputDir + "/" + prefix + ".base.exon.merge.counts" - File exonMergeSense = outputDir + "/" + prefix + ".base.exon.merge.sense.counts" - File exonSense = outputDir + "/" + prefix + ".base.exon.sense.counts" - File geneAntisense = outputDir + "/" + prefix + ".base.gene.antisense.counts" - File gene = outputDir + "/" + prefix + ".base.gene.counts" - File geneExonicAntisense = outputDir + "/" + prefix + ".base.gene.exonic.antisense.counts" - File geneExonic = outputDir + "/" + prefix + ".base.gene.exonic.counts" - File geneExonicSense = outputDir + "/" + prefix + ".base.gene.exonic.sense.counts" - File geneIntronicAntisense = outputDir + "/" + prefix + ".base.gene.intronic.antisense.counts" - File geneIntronic = outputDir + "/" + prefix + ".base.gene.intronic.counts" - File geneIntronicSense = outputDir + "/" + prefix + ".base.gene.intronic.sense.counts" - File geneSense = outputDir + "/" + prefix + ".base.gene.sense.counts" - File intronAntisense = outputDir + "/" + prefix + ".base.intron.antisense.counts" - File intron = outputDir + "/" + prefix + ".base.intron.counts" - File intronMergeAntisense = outputDir + "/" + prefix + ".base.intron.merge.antisense.counts" - File intronMerge = outputDir + "/" + prefix + ".base.intron.merge.counts" - File intronMergeSense = outputDir + "/" + prefix + ".base.intron.merge.sense.counts" - File intronSense = outputDir + "/" + prefix + ".base.intron.sense.counts" - File metaExonsNonStranded = outputDir + "/" + prefix + ".base.metaexons.non_stranded.counts" - File metaExonsStrandedAntisense = outputDir + "/" + prefix + ".base.metaexons.stranded.antisense.counts" - File metaExonsStranded = outputDir + "/" + prefix + ".base.metaexons.stranded.counts" - File metaExonsStrandedSense = outputDir + "/" + prefix + ".base.metaexons.stranded.sense.counts" - File transcriptAntisense = outputDir + "/" + prefix + ".base.transcript.antisense.counts" - File transcript = outputDir + "/" + prefix + ".base.transcript.counts" - File transcriptExonicAntisense = outputDir + "/" + prefix + ".base.transcript.exonic.antisense.counts" - File transcriptExonic = outputDir + "/" + prefix + ".base.transcript.exonic.counts" - File transcriptExonicSense = outputDir + "/" + prefix + ".base.transcript.exonic.sense.counts" - File transcriptIntronicAntisense = outputDir + "/" + prefix + ".base.transcript.intronic.antisense.counts" - File transcriptIntronic = outputDir + "/" + prefix + ".base.transcript.intronic.counts" - File transcriptIntronicSense = outputDir + "/" + prefix + ".base.transcript.intronic.sense.counts" - File transcriptSense = outputDir + "/" + prefix + ".base.transcript.sense.counts" + Array[File] scatters = glob(outputDirPath + "/scatter-*.bed") } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + memory: ceil(mem * select_first([memoryMultiplier, 2.0])) } } + diff --git a/bwa.wdl b/bwa.wdl index ffa42313dad13f0c13df6f9c77f938567c0027a0..440b6559b2fcceaa0efaebfdeddd35b46da07d5a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -1,8 +1,8 @@ -task BwaMem { +task mem { String? preCommand File inputR1 File? inputR2 - String referenceFasta + File referenceFasta Array[File] indexFiles # These indexFiles need to be added, otherwise cromwell will not find them. String outputPath String? readgroup @@ -10,6 +10,7 @@ task BwaMem { Int? threads Int? memory + command { set -e -o pipefail mkdir -p $(dirname ${outputPath}) @@ -23,8 +24,8 @@ task BwaMem { File bamFile = outputPath } runtime{ - cpu: if defined(threads) then threads else 1 - memory: if defined(memory) then memory else 8 + cpu: select_first([threads,1]) + memory: select_first([memory,8]) } } @@ -35,6 +36,7 @@ task index { Int? blockSize String? outputDir String fastaFilename = basename(fasta) + String outputFile = if (defined(outputDir)) then outputDir + "/" + fastaFilename else fasta command { set -e -o pipefail @@ -47,13 +49,12 @@ task index { bwa index \ ${"-a " + constructionAlgorithm} \ ${"-b" + blockSize} \ - ${outputDir + "/"}${fastaFilename} + ${outputFile} } output { - File indexBase = if (defined(outputDir)) then select_first([outputDir]) + "/" + fastaFilename else fastaFilename - File indexedFasta = indexBase - Array[File] indexFiles = [indexBase + ".bwt",indexBase + ".pac",indexBase + ".sa",indexBase + ".amb",indexBase + ".ann"] + File indexedFasta = outputFile + Array[File] indexFiles = [outputFile + ".bwt",outputFile + ".pac",outputFile + ".sa",outputFile + ".amb",outputFile + ".ann"] } parameter_meta { fasta: "Fasta file to be indexed" diff --git a/picard.wdl b/picard.wdl index 104261816f42dea6126dc5c645ab7871e618fe1e..2c638df3bc4ae1d98261393d2f74012c692eb3e0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -147,6 +147,41 @@ task MergeVCFs { File output_vcf_index = output_vcf_path + ".tbi" } + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } +} + +task SamToFastq { + String? preCommand + File inputBam + String outputRead1 + String? outputRead2 + String? outputUnpaired + String picard_jar + Float? memory + Float? memoryMultiplier + Int mem = ceil(select_first([memory, 16.0])) # High memory default to avoid crashes. + + command { + set -e -o pipefail + ${preCommand} + java \ + -Xmx${mem}G \ + -jar ${picard_jar} \ + SamToFastq \ + I=${inputBam} \ + ${"FASTQ=" + outputRead1} \ + ${"SECOND_END_FASTQ=" + outputRead2} \ + ${"UNPAIRED_FASTQ=" + outputUnpaired} + } + + output { + File read1 = outputRead1 + File? read2 = outputRead2 + File? unpairedRead = outputUnpaired + } + runtime { memory: ceil(mem * select_first([memoryMultiplier, 1.5])) } diff --git a/samtools.wdl b/samtools.wdl index a1192ac3a08419a8584c01a005cd785649a7d60d..59f5b0eadb9575becb1d783c6e2ddec310fbee68 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -117,3 +117,40 @@ task fastq { } } + +task view { + String? preCommand + File inFile + File? referenceFasta + String outputFileName + Boolean? outputBam + Boolean? uncompressedBamOutput + Int? includeFilter + Int? excludeFilter + Int? excludeSpecificFilter + Int? threads + Int? memory + + command { + set -e -o pipefail + ${preCommand} + samtools view \ + ${"-T " + referenceFasta} \ + ${"-o " + outputFileName} \ + ${true="-b " false="" outputBam} \ + ${true="-u " false="" uncompressedBamOutput} \ + ${"-f " + includeFilter} \ + ${"-F " + excludeFilter} \ + ${"-G " + excludeSpecificFilter} \ + ${"--threads " + threads - 1} \ + ${inFile} + } + + output { + File outputFile = outputFileName + } + runtime { + cpu: select_first([threads, 1]) + memory: select_first([memory, 1]) + } +} diff --git a/spades.wdl b/spades.wdl index ce9de29cecb0b48d20faff177fc1a007e0e7c312..f0feb573ec4fb71f09ec5cb42f62fe45c4ed7cb8 100644 --- a/spades.wdl +++ b/spades.wdl @@ -22,14 +22,15 @@ task spades { Boolean? disableRepeatResolution File? dataset Int? threads - Int? memoryGb + Float? memoryGb File? tmpDir String? k Float? covCutoff Int? phredOffset Int finalThreads = select_first([threads,1]) - Int totalMemory = select_first([memoryGb, finalThreads * 16]) - Int clusterMemory = totalMemory / finalThreads + Float totalMemory = select_first([memoryGb, finalThreads * 16.0]) + Int finalMemory = ceil(totalMemory) + Int clusterMemory = ceil(totalMemory / finalThreads) command { set -e -o pipefail @@ -57,7 +58,7 @@ task spades { ${true="--disable-rr" false="" disableRepeatResolution } \ ${"--dataset " + dataset } \ ${"--threads " + finalThreads} \ - ${"--memory " + totalMemory } \ + ${"--memory " + finalMemory } \ ${"-k " + k } \ ${"--cov-cutoff " + covCutoff } \ ${"--phred-offset " + phredOffset }