diff --git a/.gitignore b/.gitignore index 9f11b755a17d8192c60f61cb17b8902dffbd9f23..d392f0e82c4ac6dd2e7bb17eb9253e1b30d07105 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -.idea/ +*.jar diff --git a/.travis.yml b/.travis.yml index b730aaa8e7c65116da7c03734e802a2d8b455aa9..cb8b70de06b9c35f8d7759b565eb621c435549ca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ language: java script: - set -e - - wget https://github.com/broadinstitute/cromwell/releases/download/31/womtool-31.jar - - for F in `find -name "*.wdl"`; do echo $F; java -jar womtool-31.jar validate $F; done + - export CROMWELL_VERSION=34 + - wget https://github.com/broadinstitute/cromwell/releases/download/$CROMWELL_VERSION/womtool-$CROMWELL_VERSION.jar + - for F in $(git ls-files *.wdl); do echo $F; java -jar womtool-$CROMWELL_VERSION.jar validate $F; done diff --git a/bioconda.wdl b/bioconda.wdl index 0600ff72d79dc27065fbdb7b6e4db1950e37e710..0a513ddc5df99f1354cfd8a4db8aad75d8ef7428 100644 --- a/bioconda.wdl +++ b/bioconda.wdl @@ -1,13 +1,17 @@ +version 1.0 # Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2017 # Bioconda installs task installPrefix { - Array[String] requirements - String prefix - String? condaPath - command { - ${default="conda" condaPath} create \ + input { + Array[String] requirements + String prefix + String condaPath = "conda" + } + + command <<< + ~{condaPath} create \ --json -q \ --yes \ --override-channels \ @@ -16,8 +20,9 @@ task installPrefix { --channel defaults \ --channel r \ --prefix ${prefix} \ - ${sep=' ' requirements} - } + ~{sep=' ' requirements} + >>> + output { File condaEnvPath=prefix File condaJson=stdout() diff --git a/biopet.wdl b/biopet.wdl index d076fc2a1c8c271fa1847ab7f3721b03f2a5ee7f..36d6f2a9cd7b4b4bf633a063ff2220c2cee70995 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -1,32 +1,32 @@ -# PLEASE ADD TASKS IN ALPHABETIC ORDER. -# This makes searching a lot easier. +version 1.0 task BaseCounter { - String? preCommand - File? toolJar - File bam - File bamIndex - File refFlat - String outputDir - String prefix - - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + File? toolJar + File bam + File bamIndex + File refFlat + String outputDir + String prefix + + Int memory = 4 + Float memoryMultiplier = 3.0 + } String toolCommand = if defined(toolJar) - then "java -Xmx" + mem + "G -jar " +toolJar - else "biopet-basecounter -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " +toolJar + else "biopet-basecounter -Xmx" + memory + "G" command { set -e -o pipefail - mkdir -p ${outputDir} - ${preCommand} - ${toolCommand} \ - -b ${bam} \ - -r ${refFlat} \ - -o ${outputDir} \ - -p ${prefix} + mkdir -p ~{outputDir} + ~{preCommand} + ~{toolCommand} \ + -b ~{bam} \ + -r ~{refFlat} \ + -o ~{outputDir} \ + -p ~{prefix} } output { @@ -67,83 +67,84 @@ task BaseCounter { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } task ExtractAdaptersFastqc { - File? toolJar - String? preCommand - File inputFile - String outputDir - String? adapterOutputFilePath = outputDir + "/adapter.list" - String? contamsOutputFilePath = outputDir + "/contaminations.list" - Boolean? skipContams - File? knownContamFile - File? knownAdapterFile - Float? adapterCutoff - Boolean? outputAsFasta - - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + File? toolJar + String? preCommand + File inputFile + String outputDir + String adapterOutputFilePath = outputDir + "/adapter.list" + String contamsOutputFilePath = outputDir + "/contaminations.list" + Boolean? skipContams + File? knownContamFile + File? knownAdapterFile + Float? adapterCutoff + Boolean? outputAsFasta + + Int memory = 4 + Float memoryMultiplier = 2.5 + } String toolCommand = if defined(toolJar) - then "java -Xmx" + mem + "G -jar " +toolJar - else "biopet-extractadaptersfastqc -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " +toolJar + else "biopet-extractadaptersfastqc -Xmx" + memory + "G" command { - set -e - ${preCommand} - mkdir -p ${outputDir} - ${toolCommand} \ - --inputFile ${inputFile} \ - ${"--adapterOutputFile " + adapterOutputFilePath } \ - ${"--contamsOutputFile " + contamsOutputFilePath } \ - ${"--knownContamFile " + knownContamFile} \ - ${"--knownAdapterFile " + knownAdapterFile} \ - ${"--adapterCutoff " + adapterCutoff} \ - ${true="--skipContams" false="" skipContams} \ - ${true="--outputAsFasta" false="" outputAsFasta} + set -e + ~{preCommand} + mkdir -p ~{outputDir} + ~{toolCommand} \ + --inputFile ~{inputFile} \ + ~{"--adapterOutputFile " + adapterOutputFilePath } \ + ~{"--contamsOutputFile " + contamsOutputFilePath } \ + ~{"--knownContamFile " + knownContamFile} \ + ~{"--knownAdapterFile " + knownAdapterFile} \ + ~{"--adapterCutoff " + adapterCutoff} \ + ~{true="--skipContams" false="" skipContams} \ + ~{true="--outputAsFasta" false="" outputAsFasta} } output { - File adapterOutputFile = select_first([adapterOutputFilePath]) - File contamsOutputFile = select_first([contamsOutputFilePath]) - Array[String] adapterList = read_lines(select_first([adapterOutputFilePath])) - Array[String] contamsList = read_lines(select_first([contamsOutputFilePath])) + File adapterOutputFile = adapterOutputFilePath + File contamsOutputFile = contamsOutputFilePath + Array[String] adapterList = read_lines(adapterOutputFilePath) + Array[String] contamsList = read_lines(contamsOutputFilePath) } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.5])) + memory: ceil(memory * memoryMultiplier) } } - task FastqSplitter { - String? preCommand - File inputFastq - Array[String] outputPaths - File? toolJar - - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + File inputFastq + Array[String]+ outputPaths + File? toolJar + + Int memory = 4 + Float memoryMultiplier = 2.5 + } String toolCommand = if defined(toolJar) - then "java -Xmx" + mem + "G -jar " +toolJar - else "biopet-fastqsplitter -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " +toolJar + else "biopet-fastqsplitter -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - mkdir -p $(dirname ${sep=') $(dirname ' outputPaths}) - if [ ${length(outputPaths)} -gt 1 ]; then - ${toolCommand} \ - -I ${inputFastq} \ - -o ${sep=' -o ' outputPaths} + ~{preCommand} + mkdir -p $(dirname ~{sep=') $(dirname ' outputPaths}) + if [ ~{length(outputPaths)} -gt 1 ]; then + ~{toolCommand} \ + -I ~{inputFastq} \ + -o ~{sep=' -o ' outputPaths} else - ln -sf ${inputFastq} ${outputPaths[0]} + ln -sf ~{inputFastq} ~{outputPaths[0]} fi } @@ -152,39 +153,40 @@ task FastqSplitter { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.5])) + memory: ceil(memory * memoryMultiplier) } } task FastqSync { - String? preCommand - File ref1 - File ref2 - File in1 - File in2 - String out1path - String out2path - File? toolJar - - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + File ref1 + File ref2 + File in1 + File in2 + String out1path + String out2path + File? toolJar + + Int memory = 4 + Float memoryMultiplier = 2.5 + } String toolCommand = if defined(toolJar) - then "java -Xmx" + mem + "G -jar " +toolJar - else "biopet-fastqsync -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " +toolJar + else "biopet-fastqsync -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - mkdir -p $(dirname ${out1path}) $(dirname ${out2path}) - ${toolCommand} \ - --in1 ${in1} \ - --in2 ${in2} \ - --ref1 ${ref1} \ - --ref2 ${ref2} \ - --out1 ${out1path} \ - --out2 ${out2path} + ~{preCommand} + mkdir -p $(dirname ~{out1path}) $(dirname ~{out2path}) + ~{toolCommand} \ + --in1 ~{in1} \ + --in2 ~{in2} \ + --ref1 ~{ref1} \ + --ref2 ~{ref2} \ + --out1 ~{out1path} \ + --out2 ~{out2path} } output { @@ -193,41 +195,42 @@ task FastqSync { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.5])) + memory: ceil(memory * memoryMultiplier) } } task SampleConfig { - File? toolJar - String? preCommand - Array[File]+ inputFiles - String keyFilePath - String? sample - String? library - String? readgroup - String? jsonOutputPath - String? tsvOutputPath - - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + File? toolJar + String? preCommand + Array[File]+ inputFiles + String keyFilePath + String? sample + String? library + String? readgroup + String? jsonOutputPath + String? tsvOutputPath + + Int memory = 4 + Float memoryMultiplier = 2.0 + } String toolCommand = if defined(toolJar) - then "java -Xmx" + mem + "G -jar " +toolJar - else "biopet-sampleconfig -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " +toolJar + else "biopet-sampleconfig -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - mkdir -p . ${"$(dirname " + jsonOutputPath + ")"} ${"$(dirname " + tsvOutputPath + ")"} - ${toolCommand} \ - -i ${sep="-i " inputFiles} \ - ${"--sample " + sample} \ - ${"--library " + library} \ - ${"--readgroup " + readgroup} \ - ${"--jsonOutput " + jsonOutputPath} \ - ${"--tsvOutput " + tsvOutputPath} \ - > ${keyFilePath} + ~{preCommand} + mkdir -p . ~{"$(dirname " + jsonOutputPath + ")"} ~{"$(dirname " + tsvOutputPath + ")"} + ~{toolCommand} \ + -i ~{sep="-i " inputFiles} \ + ~{"--sample " + sample} \ + ~{"--library " + library} \ + ~{"--readgroup " + readgroup} \ + ~{"--jsonOutput " + jsonOutputPath} \ + ~{"--tsvOutput " + tsvOutputPath} \ + > ~{keyFilePath} } output { @@ -237,36 +240,37 @@ task SampleConfig { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.0])) + memory: ceil(memory * memoryMultiplier) } } task ScatterRegions { - String? preCommand - File refFasta - File refDict - String outputDirPath - File? toolJar - Int? scatterSize - File? regions - - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + File refFasta + File refDict + String outputDirPath + File? toolJar + Int? scatterSize + File? regions + + Int memory = 4 + Float memoryMultiplier = 3.0 + } String toolCommand = if defined(toolJar) - then "java -Xmx" + mem + "G -jar " +toolJar - else "biopet-scatterregions -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " +toolJar + else "biopet-scatterregions -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - mkdir -p ${outputDirPath} - ${toolCommand} \ - -R ${refFasta} \ - -o ${outputDirPath} \ - ${"-s " + scatterSize} \ - ${"-L " + regions} + ~{preCommand} + mkdir -p ~{outputDirPath} + ~{toolCommand} \ + -R ~{refFasta} \ + -o ~{outputDirPath} \ + ~{"-s " + scatterSize} \ + ~{"-L " + regions} } output { @@ -274,65 +278,142 @@ task ScatterRegions { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } task Seqstat { - String? preCommand - File? toolJar - File fastq - String outputFile - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + File? toolJar + File fastq + String outputFile + + Int memory = 4 + Float memoryMultiplier = 2.0 + } String toolCommand = if defined(toolJar) - then "java -Xmx" + mem + "G -jar " + toolJar - else "biopet-seqstat -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + toolJar + else "biopet-seqstat -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - mkdir -p $(dirname ${outputFile}) - ${toolCommand} \ - --fastq ${fastq} \ - --output ${outputFile} + ~{preCommand} + mkdir -p $(dirname ~{outputFile}) + ~{toolCommand} \ + --fastq ~{fastq} \ + --output ~{outputFile} } + output { File json = outputFile } + runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.0])) + memory: ceil(memory * memoryMultiplier) } } -task ValidateFastq { - String? preCommand - File? toolJar - File fastq1 - File? fastq2 +task ValidateAnnotation { + input { + String? preCommand + File? toolJar + File? refRefflat + File? gtfFile + File refFasta + File refFastaIndex + File refDict + + Int memory = 4 + Float memoryMultiplier = 2.0 + } + + String toolCommand = if defined(toolJar) + then "java -Xmx" + memory + "G -jar " + toolJar + else "biopet-validateannotation -Xmx" + memory + "G" + + command { + set -e -o pipefail + ~{preCommand} + ~{toolCommand} \ + ~{"-r " + refRefflat} \ + ~{"-g " + gtfFile} \ + -R ~{refFasta} + } - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + output { + File stderr = stderr() + } + + runtime { + memory: ceil(memory * memoryMultiplier) + } +} + +task ValidateFastq { + input { + String? preCommand + File? toolJar + File fastq1 + File? fastq2 + + Int memory = 4 + Float memoryMultiplier = 2.0 + } String toolCommand = if defined(toolJar) - then "java -Xmx" + mem + "G -jar " + toolJar - else "biopet-validatefastq -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + toolJar + else "biopet-validatefastq -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - biopet-validatefastq \ - --fastq1 ${fastq1} \ - ${"--fastq2 " + fastq2} + ~{preCommand} + ~{toolCommand} \ + --fastq1 ~{fastq1} \ + ~{"--fastq2 " + fastq2} } + output { File stderr = stderr() } + runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.0])) + memory: ceil(memory * memoryMultiplier) } } +task ValidateVcf { + input { + String? preCommand + File? toolJar + File vcfFile + File vcfIndex + File refFasta + File refFastaIndex + File refDict + + Int memory = 4 + Float memoryMultiplier = 2.0 + } + + String toolCommand = if defined(toolJar) + then "java -Xmx" + memory + "G -jar " + toolJar + else "biopet-validatevcf -Xmx" + memory + "G" + + command { + set -e -o pipefail + ~{preCommand} + ~{toolCommand} \ + -i ~{vcfFile} \ + -R ~{refFasta} + } + + output { + File stderr = stderr() + } + + runtime { + memory: ceil(memory * memoryMultiplier) + } +} diff --git a/bwa.wdl b/bwa.wdl index 440b6559b2fcceaa0efaebfdeddd35b46da07d5a..01d1b7d08b798083d0bb7dbf6fd74ce4ebe64a14 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -1,61 +1,72 @@ -task mem { - String? preCommand - File inputR1 - File? inputR2 - File referenceFasta - Array[File] indexFiles # These indexFiles need to be added, otherwise cromwell will not find them. - String outputPath - String? readgroup +version 1.0 - Int? threads - Int? memory +task Mem { + input { + String? preCommand + File inputR1 + File? inputR2 + File referenceFasta + Array[File] indexFiles # These indexFiles need to be added, otherwise cromwell will not find them. + String outputPath + String? readgroup + Int threads = 1 + Int memory = 8 + } command { set -e -o pipefail - mkdir -p $(dirname ${outputPath}) - ${preCommand} - bwa mem ${"-t " + threads} \ - ${"-R '" + readgroup + "'"} \ - ${referenceFasta} ${inputR1} ${inputR2} | samtools sort --output-fmt BAM - > ${outputPath} + mkdir -p $(dirname ~{outputPath}) + ~{preCommand} + bwa mem ~{"-t " + threads} \ + ~{"-R '" + readgroup + "'"} \ + ~{referenceFasta} \ + ~{inputR1} \ + ~{inputR2} \ + | samtools sort --output-fmt BAM - > ~{outputPath} } output { File bamFile = outputPath } + runtime{ - cpu: select_first([threads,1]) - memory: select_first([memory,8]) + cpu: threads + memory: memory } } -task index { - File fasta - String? preCommand - String? constructionAlgorithm - Int? blockSize - String? outputDir +task Index { + input { + File fasta + String? preCommand + String? constructionAlgorithm + Int? blockSize + String? outputDir + } + String fastaFilename = basename(fasta) String outputFile = if (defined(outputDir)) then outputDir + "/" + fastaFilename else fasta command { set -e -o pipefail - ${"mkdir -p " + outputDir} - ${preCommand} - if [[ ! '${outputDir}' = '' ]] + ~{"mkdir -p " + outputDir} + ~{preCommand} + if [[ ! '~{outputDir}' = '' ]] then - ln -sf ${fasta} ${outputDir + "/"}${fastaFilename} + ln -sf ~{fasta} ~{outputDir + "/"}~{fastaFilename} fi bwa index \ - ${"-a " + constructionAlgorithm} \ - ${"-b" + blockSize} \ - ${outputFile} + ~{"-a " + constructionAlgorithm} \ + ~{"-b" + blockSize} \ + ~{outputFile} } output { File indexedFasta = outputFile Array[File] indexFiles = [outputFile + ".bwt",outputFile + ".pac",outputFile + ".sa",outputFile + ".amb",outputFile + ".ann"] } + parameter_meta { fasta: "Fasta file to be indexed" constructionAlgorithm: "-a STR BWT construction algorithm: bwtsw, is or rb2 [auto]" diff --git a/centrifuge.wdl b/centrifuge.wdl index f466fe3b53c380077fb61cccc12354c2b0cc06a5..4b128c33627eb4eec4facb184c4e49a321d4b4a7 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -1,105 +1,114 @@ +version 1.0 + # Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 # # Tasks from centrifuge task Build { - File conversionTable - File taxonomyTree - File inputFasta - String centrifugeIndexBase - String? preCommand - String? centrifugeBuildExecutable = "centrifuge-build" - #Boolean? c = false - Boolean? largeIndex = false - Boolean? noAuto = false - Int? bMax - Int? bMaxDivn - Boolean? noDiffCover = false - Boolean? noRef = false - Boolean? justRef = false - Int? offRate - Int? fTabChars - File? nameTable - File? sizeTable - Int? seed - Int? kmerCount - - Int? threads - Int? memory + input { + File conversionTable + File taxonomyTree + File inputFasta + String centrifugeIndexBase + String? preCommand + String centrifugeBuildExecutable = "centrifuge-build" + #Boolean? c = false + Boolean largeIndex = false + Boolean noAuto = false + Int? bMax + Int? bMaxDivn + Boolean noDiffCover = false + Boolean noRef = false + Boolean justRef = false + Int? offRate + Int? fTabChars + File? nameTable + File? sizeTable + Int? seed + Int? kmerCount + + Int threads = 8 + Int memory = 20 + } command { set -e -o pipefail - ${preCommand} - ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"} - ${centrifugeBuildExecutable} \ - ${true='--large-index' false='' largeIndex} \ - ${true='--noauto' false='' noAuto} \ - ${'--bmax ' + bMax} \ - ${'--bmaxdivn ' + bMaxDivn} \ - ${true='--nodc' false='' noDiffCover} \ - ${true='--noref' false='' noRef} \ - ${true='--justref' false='' justRef} \ - ${'--offrate ' + offRate} \ - ${'--ftabchars ' + fTabChars} \ - ${'--name-table ' + nameTable } \ - ${'--size-table ' + sizeTable} \ - ${'--seed ' + seed} \ - ${'--kmer-count' + kmerCount} \ - ${'--threads ' + threads} \ - --conversion-table ${conversionTable} \ - --taxonomy-tree ${taxonomyTree} \ - ${inputFasta} \ - ${centrifugeIndexBase} + ~{preCommand} + ~{"mkdir -p $(dirname " + centrifugeIndexBase + ")"} + ~{centrifugeBuildExecutable} \ + ~{true='--large-index' false='' largeIndex} \ + ~{true='--noauto' false='' noAuto} \ + ~{'--bmax ' + bMax} \ + ~{'--bmaxdivn ' + bMaxDivn} \ + ~{true='--nodc' false='' noDiffCover} \ + ~{true='--noref' false='' noRef} \ + ~{true='--justref' false='' justRef} \ + ~{'--offrate ' + offRate} \ + ~{'--ftabchars ' + fTabChars} \ + ~{'--name-table ' + nameTable } \ + ~{'--size-table ' + sizeTable} \ + ~{'--seed ' + seed} \ + ~{'--kmer-count' + kmerCount} \ + ~{'--threads ' + threads} \ + --conversion-table ~{conversionTable} \ + --taxonomy-tree ~{taxonomyTree} \ + ~{inputFasta} \ + ~{centrifugeIndexBase} } + runtime { - cpu: select_first([threads, 8]) - memory: select_first([memory, 20]) + cpu: threads + memory: memory } } task Classify { - String outputDir - Boolean? compressOutput = true - String? preCommand - String indexPrefix - Array[File]? unpairedReads - Array[File]+ read1 - Array[File]? read2 - Boolean? fastaInput - # Variables for handling output + input { + String outputDir + Boolean compressOutput = true + String? preCommand + String indexPrefix + Array[File]? unpairedReads + Array[File]+ read1 + Array[File]? read2 + Boolean? fastaInput + # Variables for handling output + + String? metFilePath # If this is specified, the report file is empty + Int? assignments + Int? minHitLen + Int? minTotalLen + Array[String]? hostTaxIds + Array[String]? excludeTaxIds + + Int threads = 4 + Int memory = 8 + } + String outputFilePath = outputDir + "/centrifuge.out" String reportFilePath = outputDir + "/centrifuge_report.tsv" String finalOutputPath = if (compressOutput == true) then outputFilePath + ".gz" else outputFilePath - String? metFilePath # If this is specified, the report file is empty - Int? assignments - Int? minHitLen - Int? minTotalLen - Array[String]? hostTaxIds - Array[String]? excludeTaxIds - - Int? threads - Int? memory command { set -e -o pipefail - mkdir -p ${outputDir} - ${preCommand} + mkdir -p ~{outputDir} + ~{preCommand} centrifuge \ - ${"-p " + select_first([threads, 4])} \ - ${"-x " + indexPrefix} \ - ${true="-f" false="" fastaInput} \ - ${true="-k" false="" defined(assignments)} ${assignments} \ - ${true="-1" false="-U" defined(read2)} ${sep=',' read1} \ - ${true="-2" false="" defined(read2)} ${sep=',' read2} \ - ${true="-U" false="" defined(unpairedReads)} ${sep=',' unpairedReads} \ - ${"--report-file " + reportFilePath} \ - ${"--min-hitlen " + minHitLen} \ - ${"--min-totallen " + minTotalLen} \ - ${"--met-file " + metFilePath} \ - ${true="--host-taxids " false="" defined(hostTaxIds)} ${sep=',' hostTaxIds} \ - ${true="--exclude-taxids " false="" defined(excludeTaxIds)} ${sep=',' excludeTaxIds} \ - ${true="| gzip -c >" false="-S" compressOutput} ${finalOutputPath} + ~{"-p " + threads} \ + ~{"-x " + indexPrefix} \ + ~{true="-f" false="" fastaInput} \ + ~{true="-k" false="" defined(assignments)} ~{assignments} \ + ~{true="-1" false="-U" defined(read2)} ~{sep=',' read1} \ + ~{true="-2" false="" defined(read2)} ~{sep=',' read2} \ + ~{true="-U" false="" defined(unpairedReads)} ~{sep=',' unpairedReads} \ + ~{"--report-file " + reportFilePath} \ + ~{"--min-hitlen " + minHitLen} \ + ~{"--min-totallen " + minTotalLen} \ + ~{"--met-file " + metFilePath} \ + ~{true="--host-taxids " false="" defined(hostTaxIds)} ~{sep=',' hostTaxIds} \ + ~{true="--exclude-taxids " false="" defined(excludeTaxIds)} ~{sep=',' excludeTaxIds} \ + ~{true="| gzip -c >" false="-S" compressOutput} ~{finalOutputPath} } output { @@ -108,64 +117,68 @@ task Classify { } runtime { - cpu: select_first([threads, 4]) - memory: select_first([memory, 8]) + cpu: threads + memory: memory } } task Download { - String libraryPath - Array[String]? domain - String? executable = "centrifuge-download" - String? preCommand - String? seqTaxMapPath - String? database = "refseq" - String? assemblyLevel - String? refseqCategory - Array[String]? taxIds - Boolean? filterUnplaced = false - Boolean? maskLowComplexRegions = false - Boolean? downloadRnaSeqs = false - Boolean? modifyHeader = false - Boolean? downloadGiMap = false + input { + String libraryPath + Array[String]? domain + String executable = "centrifuge-download" + String? preCommand + String? seqTaxMapPath + String database = "refseq" + String? assemblyLevel + String? refseqCategory + Array[String]? taxIds + Boolean filterUnplaced = false + Boolean maskLowComplexRegions = false + Boolean downloadRnaSeqs = false + Boolean modifyHeader = false + Boolean downloadGiMap = false + } # This will use centrifuge-download to download. # The bash statement at the beginning is to make sure # the directory for the SeqTaxMapPath exists. command { set -e -o pipefail - ${preCommand} - ${"mkdir -p $(dirname " + seqTaxMapPath + ")"} - ${executable} \ - -o ${libraryPath} \ - ${true='-d ' false='' defined(domain)}${sep=',' domain} \ - ${'-a "' + assemblyLevel + '"'} \ - ${"-c " + refseqCategory} \ - ${true='-t' false='' defined(taxIds)} '${sep=',' taxIds}' \ - ${true='-r' false='' downloadRnaSeqs} \ - ${true='-u' false='' filterUnplaced} \ - ${true='-m' false='' maskLowComplexRegions} \ - ${true='-l' false='' modifyHeader} \ - ${true='-g' false='' downloadGiMap} \ - ${database} ${">> " + seqTaxMapPath} + ~{preCommand} + ~{"mkdir -p $(dirname " + seqTaxMapPath + ")"} + ~{executable} \ + -o ~{libraryPath} \ + ~{true='-d ' false='' defined(domain)}~{sep=',' domain} \ + ~{'-a "' + assemblyLevel + '"'} \ + ~{"-c " + refseqCategory} \ + ~{true='-t' false='' defined(taxIds)} '~{sep=',' taxIds}' \ + ~{true='-r' false='' downloadRnaSeqs} \ + ~{true='-u' false='' filterUnplaced} \ + ~{true='-m' false='' maskLowComplexRegions} \ + ~{true='-l' false='' modifyHeader} \ + ~{true='-g' false='' downloadGiMap} \ + ~{database} ~{">> " + seqTaxMapPath} } + output { - File seqTaxMap = "${seqTaxMapPath}" + File seqTaxMap = "~{seqTaxMapPath}" File library = libraryPath Array[File] fastaFiles = glob(libraryPath + "/*/*.fna") } } task DownloadTaxonomy { - String centrifugeTaxonomyDir - String? executable = "centrifuge-download" - String? preCommand - + input { + String centrifugeTaxonomyDir + String executable = "centrifuge-download" + String? preCommand + } command { set -e -o pipefail - ${preCommand} - ${executable} \ - -o ${centrifugeTaxonomyDir} \ + ~{preCommand} + ~{executable} \ + -o ~{centrifugeTaxonomyDir} \ taxonomy } @@ -176,36 +189,38 @@ task DownloadTaxonomy { } task Kreport { - String? preCommand - File centrifugeOut - Boolean inputIsCompressed - String outputDir - String? suffix = "kreport" - String? prefix = "centrifuge" - String kreportFilePath = outputDir + "/" + prefix + "." + suffix - String indexPrefix - Boolean? onlyUnique - Boolean? showZeros - Boolean? isCountTable - Int? minScore - Int? minLength - - Int? cores - Int? memory + input { + String? preCommand + File centrifugeOut + Boolean inputIsCompressed + String outputDir + String suffix = "kreport" + String prefix = "centrifuge" + String indexPrefix + Boolean? onlyUnique + Boolean? showZeros + Boolean? isCountTable + Int? minScore + Int? minLength + + Int cores = 1 + Int memory = 4 + } + String kreportFilePath = outputDir + "/" + prefix + "." + suffix command { set -e -o pipefail - ${preCommand} + ~{preCommand} centrifuge-kreport \ - -x ${indexPrefix} \ - ${true="--only-unique" false="" onlyUnique} \ - ${true="--show-zeros" false="" showZeros} \ - ${true="--is-count-table" false="" isCountTable} \ - ${"--min-score " + minScore} \ - ${"--min-length " + minLength} \ - ${true="<(zcat" false="" inputIsCompressed} ${centrifugeOut}\ - ${true=")" false="" inputIsCompressed} \ - > ${kreportFilePath} + -x ~{indexPrefix} \ + ~{true="--only-unique" false="" onlyUnique} \ + ~{true="--show-zeros" false="" showZeros} \ + ~{true="--is-count-table" false="" isCountTable} \ + ~{"--min-score " + minScore} \ + ~{"--min-length " + minLength} \ + ~{true="<(zcat" false="" inputIsCompressed} ~{centrifugeOut}\ + ~{true=")" false="" inputIsCompressed} \ + > ~{kreportFilePath} } output { @@ -213,7 +228,7 @@ task Kreport { } runtime { - cpu: select_first([cores, 1]) - memory: select_first([memory, 4]) + cpu: cores + memory: memory } } diff --git a/common.wdl b/common.wdl index d80d47e6d5a964b4ba2637741eeb62b51cecc2f2..15e99f5804089fdd328f263b21804dc18712e752 100644 --- a/common.wdl +++ b/common.wdl @@ -1,12 +1,18 @@ -task objectMd5 { - Object the_object +version 1.0 + +task AppendToStringArray { + input { + Array[String] array + String string + } command { - cat ${write_object(the_object)} | md5sum - | sed -e 's/ -//' + echo "~{sep='\n' array} + ~{string}" } output { - String md5sum = read_string(stdout()) + Array[String] outArray = read_lines(stdout()) } runtime { @@ -14,32 +20,37 @@ task objectMd5 { } } -task mapMd5 { - Map[String,String] map - - command { - cat ${write_map(map)} | md5sum - | sed -e 's/ -//' - } - - output { - String md5sum = read_string(stdout()) +# This task will fail if the MD5sum doesn't match the file. +task CheckFileMD5 { + input { + File file + String MD5sum } - runtime { - memory: 1 + command { + set -e -o pipefail + MD5SUM=$(md5sum ~{file} | cut -d ' ' -f 1) + [ $MD5SUM = ~{MD5sum} ] } } -task stringArrayMd5 { - Array[String] stringArray +task ConcatenateTextFiles { + input { + Array[File] fileList + String combinedFilePath + Boolean unzip = false + Boolean zip = false + } command { - set -eu -o pipefail - echo ${sep=',' stringArray} | md5sum - | sed -e 's/ -//' + set -e -o pipefail + ~{"mkdir -p $(dirname " + combinedFilePath + ")"} + ~{true='zcat' false= 'cat' unzip} ~{sep=' ' fileList} \ + ~{true="| gzip -c" false="" zip} > ~{combinedFilePath} } output { - String md5sum = read_string(stdout()) + File combinedFile = combinedFilePath } runtime { @@ -47,39 +58,38 @@ task stringArrayMd5 { } } -task concatenateTextFiles { - Array[File] fileList - String combinedFilePath - Boolean? unzip=false - Boolean? zip=false +task CreateLink { + # Making this of type File will create a link to the copy of the file in the execution + # folder, instead of the actual file. + input { + String inputFile + String outputPath + } command { - set -e -o pipefail - ${"mkdir -p $(dirname " + combinedFilePath + ")"} - ${true='zcat' false= 'cat' unzip} ${sep=' ' fileList} \ - ${true="| gzip -c" false="" zip} > ${combinedFilePath} + ln -sf ~{inputFile} ~{outputPath} } output { - File combinedFile = combinedFilePath - } - - runtime { - memory: 1 + File link = outputPath } } -# inspired by https://gatkforums.broadinstitute.org/wdl/discussion/9616/is-there-a-way-to-flatten-arrays -task flattenStringArray { - Array[Array[String]] arrayList +# DEPRECATED. USE BUILT-IN FLATTEN FUNCTION +# task FlattenStringArray {} +# Commented out to let pipelines that depend on this fail. + +task MapMd5 { + input { + Map[String,String] map + } command { - for line in $(echo ${sep=', ' arrayList}) ; \ - do echo $line | tr -d '"[],' ; done + cat ~{write_map(map)} | md5sum - | sed -e 's/ -//' } output { - Array[String] flattenedArray = read_lines(stdout()) + String md5sum = read_string(stdout()) } runtime { @@ -87,17 +97,18 @@ task flattenStringArray { } } -task appendToStringArray { - Array[String] array - String string + +task ObjectMd5 { + input { + Object the_object + } command { - echo "${sep='\n' array} - ${string}" + cat ~{write_object(the_object)} | md5sum - | sed -e 's/ -//' } output { - Array[String] out_array = read_lines(stdout()) + String md5sum = read_string(stdout()) } runtime { @@ -105,17 +116,21 @@ task appendToStringArray { } } -task createLink { - # Making this of type File will create a link to the copy of the file in the execution - # folder, instead of the actual file. - String inputFile - String outputPath +task StringArrayMd5 { + input { + Array[String] stringArray + } command { - ln -sf ${inputFile} ${outputPath} + set -eu -o pipefail + echo ~{sep=',' stringArray} | md5sum - | sed -e 's/ -//' } output { - File link = outputPath + String md5sum = read_string(stdout()) } -} \ No newline at end of file + + runtime { + memory: 1 + } +} diff --git a/cutadapt.wdl b/cutadapt.wdl index d4255c695c51e5cd6947b7955f8b871746ad8524..8b073442c02a449871f7dc6a320a8eabdf82d8d4 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -1,109 +1,127 @@ -task cutadapt { - File read1 - File? read2 - String read1output - String? read2output - String? format - String? preCommand - Int? cores = 1 - Int? memory = 4 - Array[String]+? adapter - Array[String]+? front - Array[String]+? anywhere - Array[String]+? adapterRead2 - Array[String]+? frontRead2 - Array[String]+? anywhereRead2 - Boolean? interleaved - String? pairFilter - Float? errorRate - Boolean? noIndels - Int? times - Int? overlap - Boolean? matchReadWildcards - Boolean? noMatchAdapterWildcards - Boolean? noTrim - Boolean? maskAdapter - Int? cut - String? nextseqTrim - String? qualityCutoff - Int? qualityBase - Int? length - Boolean? trimN - String? lengthTag - String? stripSuffix - String? prefix - String? suffix - Int? minimumLength = 1 # Necessary to prevent creation of empty reads - Int? maximumLength - Int? maxN - Boolean? discardTrimmed - Boolean? discardUntrimmed - String? infoFilePath - String? restFilePath - String? wildcardFilePath - String? tooShortOutputPath - String? tooLongOutputPath - String? untrimmedOutputPath - String? tooShortPairedOutputPath - String? tooLongPairedOutputPath - String? untrimmedPairedOutputPath - Boolean? colorspace - Boolean? doubleEncode - Boolean? trimPrimer - Boolean? stripF3 - Boolean? maq - Boolean? bwa - Boolean? zeroCap - Boolean? noZeroCap - String? reportPath +version 1.0 + +task Cutadapt { + input { + File read1 + File? read2 + String read1output + String? read2output + String? format + String? preCommand + Int cores = 1 + Int memory = 4 + Array[String]+? adapter + Array[String]+? front + Array[String]+? anywhere + Array[String]+? adapterRead2 + Array[String]+? frontRead2 + Array[String]+? anywhereRead2 + Boolean? interleaved + String? pairFilter + Float? errorRate + Boolean? noIndels + Int? times + Int? overlap + Boolean? matchReadWildcards + Boolean? noMatchAdapterWildcards + Boolean? noTrim + Boolean? maskAdapter + Int? cut + String? nextseqTrim + String? qualityCutoff + Int? qualityBase + Int? length + Boolean? trimN + String? lengthTag + String? stripSuffix + String? prefix + String? suffix + Int? minimumLength = 1 # Necessary to prevent creation of empty reads + Int? maximumLength + Int? maxN + Boolean? discardTrimmed + Boolean? discardUntrimmed + String? infoFilePath + String? restFilePath + String? wildcardFilePath + String? tooShortOutputPath + String? tooLongOutputPath + String? untrimmedOutputPath + String? tooShortPairedOutputPath + String? tooLongPairedOutputPath + String? untrimmedPairedOutputPath + Boolean? colorspace + Boolean? doubleEncode + Boolean? trimPrimer + Boolean? stripF3 + Boolean? maq + Boolean? bwa + Boolean? zeroCap + Boolean? noZeroCap + String? reportPath + } command { set -e -o pipefail - ${"mkdir -p $(dirname " + read1output + ")"} - ${"mkdir -p $(dirname " + read2output + ")"} - ${"mkdir -p $(dirname " + reportPath + ")"} - ${preCommand} + ~{"mkdir -p $(dirname " + read1output + ")"} + ~{"mkdir -p $(dirname " + read2output + ")"} + ~{"mkdir -p $(dirname " + reportPath + ")"} + ~{preCommand} cutadapt \ - ${"--cores=" + cores} \ - ${true="-a " false="" defined(adapter)} ${sep=" -a " adapter} \ - ${true="-A " false="" defined(adapterRead2)} ${sep=" -A " adapterRead2} \ - ${true="-g " false="" defined(front)} ${sep=" -g " front} \ - ${true="-G " false="" defined(frontRead2)} ${sep=" -G " frontRead2} \ - ${true="-b " false="" defined(anywhere)} ${sep=" -a " anywhere} \ - ${true="-B " false="" defined(anywhereRead2)} ${sep=" -B " anywhereRead2} \ - --output ${read1output} ${"--paired-output " + read2output} \ - ${"--to-short-output " + tooShortOutputPath} ${"--to-short-paired-output " + tooShortPairedOutputPath} \ - ${"--to-long-output " + tooLongOutputPath} ${"--to-long-paired-output " + tooLongPairedOutputPath} \ - ${"--untrimmed-output " + untrimmedOutputPath} ${"--untrimmed-paired-output " + untrimmedPairedOutputPath} \ - ${"--pair-filter " + pairFilter} \ - ${"--error-rate " + errorRate} \ - ${"--times " + times} \ - ${"--overlap " + overlap} \ - ${"--cut " + cut} \ - ${"--nextseq-trim " + nextseqTrim} \ - ${"--quality-cutoff " + qualityCutoff} \ - ${"--quality-base " + qualityBase} \ - ${"--length " + length} \ - ${"--length-tag " + lengthTag} \ - ${"--strip-suffix " + stripSuffix} \ - ${"--prefix " + prefix} \ - ${"--suffix " + suffix} \ - ${"--minimum-length " + minimumLength} \ - ${"--maximum-length " + maximumLength} \ - ${"--max-n " + maxN} \ - ${true="--discard-untrimmed" false="" discardUntrimmed} \ - ${"--info-file " + infoFilePath } \ - ${"--rest-file " + restFilePath } \ - ${"--wildcard-file " + wildcardFilePath} \ - ${true="--match-read-wildcards" false="" matchReadWildcards} ${true="--no-match-adapter-wildcards" false="" noMatchAdapterWildcards} \ - ${true="--no-trim" false="" noTrim} ${true="--mask-adapter" false="" maskAdapter} \ - ${true="--no-indels" false="" noIndels} ${true="--trim-n" false="" trimN} \ - ${true="--interleaved" false="" interleaved} ${true="--discard-trimmed" false="" discardTrimmed } \ - ${true="--colorspace" false="" colorspace} ${true="--double-encode" false="" doubleEncode} \ - ${true="--strip-f3" false="" stripF3} ${true="--maq" false="" maq} ${true="--bwa" false="" bwa} \ - ${true="--zero-cap" false="" zeroCap} ${true="--no-zero-cap" false="" noZeroCap} \ - ${read1} ${read2} ${"> " + reportPath} + ~{"--cores=" + cores} \ + ~{true="-a" false="" defined(adapter)} ~{sep=" -a " adapter} \ + ~{true="-A" false="" defined(adapterRead2)} ~{sep=" -A " adapterRead2} \ + ~{true="-g" false="" defined(front)} ~{sep=" -g " front} \ + ~{true="-G" false="" defined(frontRead2)} ~{sep=" -G " frontRead2} \ + ~{true="-b" false="" defined(anywhere)} ~{sep=" -b " anywhere} \ + ~{true="-B" false="" defined(anywhereRead2)} ~{sep=" -B " anywhereRead2} \ + --output ~{read1output} ~{"--paired-output " + read2output} \ + ~{"--to-short-output " + tooShortOutputPath} \ + ~{"--to-short-paired-output " + tooShortPairedOutputPath} \ + ~{"--to-long-output " + tooLongOutputPath} \ + ~{"--to-long-paired-output " + tooLongPairedOutputPath} \ + ~{"--untrimmed-output " + untrimmedOutputPath} \ + ~{"--untrimmed-paired-output " + untrimmedPairedOutputPath} \ + ~{"--pair-filter " + pairFilter} \ + ~{"--error-rate " + errorRate} \ + ~{"--times " + times} \ + ~{"--overlap " + overlap} \ + ~{"--cut " + cut} \ + ~{"--nextseq-trim " + nextseqTrim} \ + ~{"--quality-cutoff " + qualityCutoff} \ + ~{"--quality-base " + qualityBase} \ + ~{"--length " + length} \ + ~{"--length-tag " + lengthTag} \ + ~{"--strip-suffix " + stripSuffix} \ + ~{"--prefix " + prefix} \ + ~{"--suffix " + suffix} \ + ~{"--minimum-length " + minimumLength} \ + ~{"--maximum-length " + maximumLength} \ + ~{"--max-n " + maxN} \ + ~{true="--discard-untrimmed" false="" discardUntrimmed} \ + ~{"--info-file " + infoFilePath } \ + ~{"--rest-file " + restFilePath } \ + ~{"--wildcard-file " + wildcardFilePath} \ + ~{true="--match-read-wildcards" false="" matchReadWildcards} \ + ~{true="--no-match-adapter-wildcards" false="" noMatchAdapterWildcards} \ + ~{true="--no-trim" false="" noTrim} \ + ~{true="--mask-adapter" false="" maskAdapter} \ + ~{true="--no-indels" false="" noIndels} \ + ~{true="--trim-n" false="" trimN} \ + ~{true="--interleaved" false="" interleaved} \ + ~{true="--discard-trimmed" false="" discardTrimmed } \ + ~{true="--colorspace" false="" colorspace} \ + ~{true="--double-encode" false="" doubleEncode} \ + ~{true="--strip-f3" false="" stripF3} \ + ~{true="--maq" false="" maq} \ + ~{true="--bwa" false="" bwa} \ + ~{true="--zero-cap" false="" zeroCap} \ + ~{true="--no-zero-cap" false="" noZeroCap} \ + ~{read1} \ + ~{read2} \ + ~{"> " + reportPath} } + output{ File report = if defined(reportPath) then select_first([reportPath]) else stdout() File cutRead1 = read1output @@ -118,8 +136,9 @@ task cutadapt { File? restFile=restFilePath File? wildcardFile=wildcardFilePath } + runtime { - cpu: select_first([cores]) - memory: select_first([memory]) + cpu: cores + memory: memory } } diff --git a/fastqc.wdl b/fastqc.wdl index d9135f73197bf1e985f5a096c45f449f37e42d94..6d924cf550a84640fcf3e8f9e1b555d2cf239dd7 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -1,46 +1,51 @@ -task fastqc { - File seqFile - String outdirPath - String? preCommand - Boolean? casava - Boolean? nano - Boolean? noFilter - Boolean? extract = true - Boolean? nogroup - Int? minLength - String? format - Int? threads = 1 - File? contaminants - File? adapters - File? limits - Int? kmers - String? dir +version 1.0 + +task Fastqc { + input { + File seqFile + String outdirPath + String? preCommand + Boolean? casava + Boolean? nano + Boolean? noFilter + Boolean extract = true + Boolean? nogroup + Int? minLength + String? format + Int threads = 1 + File? contaminants + File? adapters + File? limits + Int? kmers + String? dir + } + # Chops of the .gz extension if present. String name = sub(seqFile, "\\.gz$","") # This regex chops of the extension and replaces it with _fastqc for the reportdir. # Just as fastqc does it. String reportDir = outdirPath + "/" + sub(basename(name), "\\.[^\\.]*$", "_fastqc") - command { - set -e -o pipefail - ${preCommand} - mkdir -p ${outdirPath} - fastqc \ - ${"--outdir " + outdirPath} \ - ${true="--casava" false="" casava} \ - ${true="--nano" false="" nano} \ - ${true="--nofilter" false="" noFilter} \ - ${true="--extract" false="" extract} \ - ${true="--nogroup" false="" nogroup} \ - ${"--min_length " + minLength } \ - ${"--format " + format} \ - ${"--threads " + threads} \ - ${"--contaminants " + contaminants} \ - ${"--adapters " + adapters} \ - ${"--limits " + limits} \ - ${"--kmers " + kmers} \ - ${"--dir " + dir} \ - ${seqFile} + command { + set -e -o pipefail + ~{preCommand} + mkdir -p ~{outdirPath} + fastqc \ + ~{"--outdir " + outdirPath} \ + ~{true="--casava" false="" casava} \ + ~{true="--nano" false="" nano} \ + ~{true="--nofilter" false="" noFilter} \ + ~{true="--extract" false="" extract} \ + ~{true="--nogroup" false="" nogroup} \ + ~{"--min_length " + minLength } \ + ~{"--format " + format} \ + ~{"--threads " + threads} \ + ~{"--contaminants " + contaminants} \ + ~{"--adapters " + adapters} \ + ~{"--limits " + limits} \ + ~{"--kmers " + kmers} \ + ~{"--dir " + dir} \ + ~{seqFile} } output { @@ -51,18 +56,20 @@ task fastqc { } runtime { - cpu: select_first([threads]) + cpu: threads } } -task getConfiguration { - String? preCommand - String? fastqcDirFile = "fastqcDir.txt" +task GetConfiguration { + input { + String? preCommand + String fastqcDirFile = "fastqcDir.txt" + } command { set -e -o pipefail - ${preCommand} - echo $(dirname $(readlink -f $(which fastqc))) > ${fastqcDirFile} + ~{preCommand} + echo $(dirname $(readlink -f $(which fastqc))) > ~{fastqcDirFile} } output { diff --git a/flash.wdl b/flash.wdl index c081d49cdd3841e91991b0c90ac894d61d965e40..3d3eecdf3820be7c841ca44f0f4de580df5e61d1 100644 --- a/flash.wdl +++ b/flash.wdl @@ -1,27 +1,31 @@ -task flash { - String? preCommand - File inputR1 - File inputR2 - String outdirPath - String? outPrefix = "flash" - Int? minOverlap - Int? maxOverlap - Boolean? compress = true - Int? threads - Int? memory +version 1.0 + +task Flash { + input { + String? preCommand + File inputR1 + File inputR2 + String outdirPath + String outPrefix = "flash" + Int? minOverlap + Int? maxOverlap + Boolean compress = true + Int threads = 2 + Int memory = 2 + } command { set -e -o pipefail - mkdir -p ${outdirPath} - ${preCommand} + mkdir -p ~{outdirPath} + ~{preCommand} flash \ - ${"--threads=" + threads} \ - ${"--output-directory=" + outdirPath} \ - ${"--output-prefix=" + outPrefix} \ - ${true="--compress " false="" defined(compress)} \ - ${"--min-overlap=" + minOverlap} \ - ${"--max-overlap=" + maxOverlap} \ - ${inputR1} ${inputR2} + ~{"--threads=" + threads} \ + ~{"--output-directory=" + outdirPath} \ + ~{"--output-prefix=" + outPrefix} \ + ~{true="--compress " false="" compress} \ + ~{"--min-overlap=" + minOverlap} \ + ~{"--max-overlap=" + maxOverlap} \ + ~{inputR1} ~{inputR2} } output { @@ -33,8 +37,8 @@ task flash { } runtime { - cpu: select_first([threads, 2]) - memory: select_first([memory, 2]) + cpu: threads + memory: memory } } \ No newline at end of file diff --git a/gatk.wdl b/gatk.wdl index fbbc8138342e1e37aafb7289129d775e6aec962f..68281f386d0dab5e69f734415711a570f657382b 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1,40 +1,44 @@ +version 1.0 + # Apply Base Quality Score Recalibration (BQSR) model task ApplyBQSR { - String? preCommand - File? gatkJar - File inputBam - File inputBamIndex - String outputBamPath - File recalibrationReport - Array[File]+ sequenceGroupInterval - File refDict - File refFasta - File refFastaIndex - Int? compressionLevel - - Float? memory - Float? memoryMultiplier - - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + File? gatkJar + File inputBam + File inputBamIndex + String outputBamPath + File recalibrationReport + Array[File]+ sequenceGroupInterval + File refDict + File refFasta + File refFastaIndex + Int? compressionLevel + + Int memory = 4 + Float memoryMultiplier = 3.0 + } String toolCommand = if defined(gatkJar) - then "java -Xmx" + mem + "G -jar " + gatkJar - else "gatk --java-options -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + gatkJar + else "gatk --java-options -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ - ApplyBQSR \ - --create-output-bam-md5 \ - --add-output-sam-program-record \ - -R ${refFasta} \ - -I ${inputBam} \ - --use-original-qualities \ - -O ${outputBamPath} \ - -bqsr ${recalibrationReport} \ - --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \ - -L ${sep=" -L " sequenceGroupInterval} + ~{preCommand} + ~{toolCommand} \ + ApplyBQSR \ + --create-output-bam-md5 \ + --add-output-sam-program-record \ + -R ~{refFasta} \ + -I ~{inputBam} \ + --use-original-qualities \ + -O ~{outputBamPath} \ + -bqsr ~{recalibrationReport} \ + --static-quantized-quals 10 \ + --static-quantized-quals 20 \ + --static-quantized-quals 30 \ + -L ~{sep=" -L " sequenceGroupInterval} } output { @@ -43,25 +47,29 @@ task ApplyBQSR { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } # Generate Base Quality Score Recalibration (BQSR) model task BaseRecalibrator { - String? preCommand - File? gatkJar - File inputBam - File inputBamIndex - String recalibrationReportPath - Array[File]+ sequenceGroupInterval - Array[File]? knownIndelsSitesVCFs - Array[File]? knownIndelsSitesIndices - File? dbsnpVCF - File? dbsnpVCFindex - File refDict - File refFasta - File refFastaIndex + input { + String? preCommand + File? gatkJar + File inputBam + File inputBamIndex + String recalibrationReportPath + Array[File]+ sequenceGroupInterval + Array[File]? knownIndelsSitesVCFs + Array[File]? knownIndelsSitesIndices + File? dbsnpVCF + File? dbsnpVCFindex + File refDict + File refFasta + File refFastaIndex + Int memory = 4 + Float memoryMultiplier = 3.0 + } Array[File]+ knownIndelsSitesVCFsArg = flatten([ select_first([knownIndelsSitesVCFs, []]), @@ -72,26 +80,21 @@ task BaseRecalibrator { select_all([dbsnpVCFindex]) ]) - Float? memory - Float? memoryMultiplier - - Int mem = ceil(select_first([memory, 4.0])) - String toolCommand = if defined(gatkJar) - then "java -Xmx" + mem + "G -jar " + gatkJar - else "gatk --java-options -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + gatkJar + else "gatk --java-options -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ - BaseRecalibrator \ - -R ${refFasta} \ - -I ${inputBam} \ - --use-original-qualities \ - -O ${recalibrationReportPath} \ - --known-sites ${sep=" --known-sites " knownIndelsSitesVCFsArg} \ - -L ${sep=" -L " sequenceGroupInterval} + ~{preCommand} + ~{toolCommand} \ + BaseRecalibrator \ + -R ~{refFasta} \ + -I ~{inputBam} \ + --use-original-qualities \ + -O ~{recalibrationReportPath} \ + --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \ + -L ~{sep=" -L " sequenceGroupInterval} } output { @@ -99,48 +102,48 @@ task BaseRecalibrator { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } task CombineGVCFs { - String? preCommand - Array[File]+ gvcfFiles - Array[File]+ gvcfFileIndexes - Array[File]+ intervals - - String outputPath + input { + String? preCommand + Array[File]+ gvcfFiles + Array[File]+ gvcfFileIndexes + Array[File]+ intervals - String? gatkJar + String outputPath - File refFasta - File refFastaIndex - File refDict + String? gatkJar - Int? compressionLevel - Float? memory - Float? memoryMultiplier + File refFasta + File refFastaIndex + File refDict - Int mem = ceil(select_first([memory, 4.0])) + Int? compressionLevel #TODO This isn't being used? + Int memory = 4 + Float memoryMultiplier = 3.0 + } String toolCommand = if defined(gatkJar) - then "java -Xmx" + mem + "G -jar " + gatkJar - else "gatk --java-options -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + gatkJar + else "gatk --java-options -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} + ~{preCommand} - if [ ${length(gvcfFiles)} -gt 1 ]; then - ${toolCommand} \ + if [ ~{length(gvcfFiles)} -gt 1 ]; then + ~{toolCommand} \ CombineGVCFs \ - -R ${refFasta} \ - -O ${outputPath} \ - -V ${sep=' -V ' gvcfFiles} \ - -L ${sep=' -L ' intervals} + -R ~{refFasta} \ + -O ~{outputPath} \ + -V ~{sep=' -V ' gvcfFiles} \ + -L ~{sep=' -L ' intervals} else # TODO this should be handeled in wdl - ln -sf ${select_first(gvcfFiles)} ${outputPath} - ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi + ln -sf ~{select_first(gvcfFiles)} ~{outputPath} + ln -sf ~{select_first(gvcfFileIndexes)} ~{outputPath}.tbi fi } @@ -150,33 +153,33 @@ task CombineGVCFs { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } # Combine multiple recalibration tables from scattered BaseRecalibrator runs task GatherBqsrReports { - String? preCommand - String? gatkJar - Array[File] inputBQSRreports - String outputReportPath - - Float? memory - Float? memoryMultiplier - - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + String? gatkJar + Array[File] inputBQSRreports + String outputReportPath + + Int memory = 4 + Float memoryMultiplier = 3.0 + } String toolCommand = if defined(gatkJar) - then "java -Xmx" + mem + "G -jar " + gatkJar - else "gatk --java-options -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + gatkJar + else "gatk --java-options -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ + ~{preCommand} + ~{toolCommand} \ GatherBQSRReports \ - -I ${sep=' -I ' inputBQSRreports} \ - -O ${outputReportPath} + -I ~{sep=' -I ' inputBQSRreports} \ + -O ~{outputReportPath} } output { @@ -184,51 +187,50 @@ task GatherBqsrReports { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } task GenotypeGVCFs { - String? preCommand - File gvcfFiles - File gvcfFileIndexes - Array[File]+ intervals + input { + String? preCommand + File gvcfFiles + File gvcfFileIndexes + Array[File]+ intervals - String outputPath + String outputPath - String? gatkJar + String? gatkJar - File refFasta - File refFastaIndex - File refDict + File refFasta + File refFastaIndex + File refDict - File? dbsnpVCF - File? dbsnpVCFindex + File? dbsnpVCF + File? dbsnpVCFindex - Int? compressionLevel - Float? memory - Float? memoryMultiplier - - Int mem = ceil(select_first([memory, 4.0])) + Int? compressionLevel + Int memory = 4 + Float memoryMultiplier =3.0 + } String toolCommand = if defined(gatkJar) - then "java -Xmx" + mem + "G -jar " + gatkJar - else "gatk --java-options -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + gatkJar + else "gatk --java-options -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - - ${toolCommand} \ - GenotypeGVCFs \ - -R ${refFasta} \ - -O ${outputPath} \ - ${"-D " + dbsnpVCF} \ - -G StandardAnnotation \ - --only-output-calls-starting-in-intervals \ - -new-qual \ - -V ${gvcfFiles} \ - -L ${sep=' -L ' intervals} + ~{preCommand} + ~{toolCommand} \ + GenotypeGVCFs \ + -R ~{refFasta} \ + -O ~{outputPath} \ + ~{"-D " + dbsnpVCF} \ + -G StandardAnnotation \ + --only-output-calls-starting-in-intervals \ + -new-qual \ + -V ~{gvcfFiles} \ + -L ~{sep=' -L ' intervals} } output { @@ -237,47 +239,48 @@ task GenotypeGVCFs { } runtime{ - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } # Call variants on a single sample with HaplotypeCaller to produce a GVCF task HaplotypeCallerGvcf { - String? preCommand - Array[File]+ inputBams - Array[File]+ inputBamsIndex - Array[File]+ intervalList - String gvcfPath - File refDict - File refFasta - File refFastaIndex - Float? contamination - Int? compressionLevel - String? gatkJar - - File? dbsnpVCF - File? dbsnpVCFindex - - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + Array[File]+ inputBams + Array[File]+ inputBamsIndex + Array[File]+ intervalList + String gvcfPath + File refDict + File refFasta + File refFastaIndex + Float contamination = 0.0 + Int? compressionLevel + String? gatkJar + + File? dbsnpVCF + File? dbsnpVCFindex + + Int memory = 4 + Float memoryMultiplier = 3 + } String toolCommand = if defined(gatkJar) - then "java -Xmx" + mem + "G -jar " + gatkJar - else "gatk --java-options -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + gatkJar + else "gatk --java-options -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ - HaplotypeCaller \ - -R ${refFasta} \ - -O ${gvcfPath} \ - -I ${sep=" -I " inputBams} \ - -L ${sep=' -L ' intervalList} \ - ${"-D " + dbsnpVCF} \ - -contamination ${default=0 contamination} \ - -ERC GVCF + ~{preCommand} + ~{toolCommand} \ + HaplotypeCaller \ + -R ~{refFasta} \ + -O ~{gvcfPath} \ + -I ~{sep=" -I " inputBams} \ + -L ~{sep=' -L ' intervalList} \ + ~{"-D " + dbsnpVCF} \ + -contamination ~{contamination} \ + -ERC GVCF } output { @@ -286,39 +289,40 @@ task HaplotypeCallerGvcf { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } task SplitNCigarReads { - String? preCommand - - File inputBam - File inputBamIndex - File refFasta - File refFastaIndex - File refDict - String outputBam - String? gatkJar - Array[File]+ intervals - - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + input { + String? preCommand + + File inputBam + File inputBamIndex + File refFasta + File refFastaIndex + File refDict + String outputBam + String? gatkJar + Array[File]+ intervals + + Int memory = 4 + Float memoryMultiplier = 3 + } String toolCommand = if defined(gatkJar) - then "java -Xmx" + mem + "G -jar " + gatkJar - else "gatk --java-options -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + gatkJar + else "gatk --java-options -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ + ~{preCommand} + ~{toolCommand} \ SplitNCigarReads \ - -I ${inputBam} \ - -R ${refFasta} \ - -O ${outputBam} \ - -L ${sep=' -L ' intervals} + -I ~{inputBam} \ + -R ~{refFasta} \ + -O ~{outputBam} \ + -L ~{sep=' -L ' intervals} } output { @@ -327,6 +331,6 @@ task SplitNCigarReads { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } diff --git a/htseq.wdl b/htseq.wdl index b634bf5ee0f08128729723eaf71e77536bf401f6..db399cc2fb35061e69d280eb2efbcf44e1ccfb6d 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -1,25 +1,29 @@ +version 1.0 + task HTSeqCount { - String? preCommand - Array[File] alignmentFiles - File gtfFile - String outputTable - String? format - String? order - String? stranded + input { + String? preCommand + Array[File] alignmentFiles + File gtfFile + String outputTable + String format = "bam" + String order = "pos" + String stranded = "no" - Int? memory + Int memory = 3 + } command { set -e -o pipefail - mkdir -p ${sub(outputTable, basename(outputTable), "")} - ${preCommand} + mkdir -p ~{sub(outputTable, basename(outputTable), "")} + ~{preCommand} htseq-count \ - -f ${default="bam" format} \ - -r ${default="pos" order} \ - -s ${default="no" stranded} \ - ${sep=" " alignmentFiles} \ - ${gtfFile} \ - > ${outputTable} + -f ~{format} \ + -r ~{order} \ + -s ~{stranded} \ + ~{sep=" " alignmentFiles} \ + ~{gtfFile} \ + > ~{outputTable} } output { @@ -27,6 +31,6 @@ task HTSeqCount { } runtime { - memory: select_first([memory, 3]) + memory: memory } } \ No newline at end of file diff --git a/macs2.wdl b/macs2.wdl index 88ae526ceb71223f41967ef45814046d764853d8..9364d9dc4f318bc657d42163925c758b592b2cfd 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -1,20 +1,24 @@ +version 1.0 + task PeakCalling { - String? preCommand - Array[File] bamFiles - String outDir - String sampleName - Int? threads - Int? memory - Boolean nomodel = false + input { + String? preCommand + Array[File] bamFiles + String outDir + String sampleName + Int threads = 1 + Int memory = 8 + Boolean nomodel = false + } command { set -e -o pipefail - ${preCommand} + ~{preCommand} macs2 callpeak \ - --treatment ${sep = ' ' bamFiles} \ - --outdir ${outDir} \ - --name ${sampleName} \ - ${true='--nomodel' false='' nomodel} + --treatment ~{sep = ' ' bamFiles} \ + --outdir ~{outDir} \ + --name ~{sampleName} \ + ~{true='--nomodel' false='' nomodel} } output { @@ -22,7 +26,7 @@ task PeakCalling { } runtime { - cpu: select_first([threads,1]) - memory: select_first([memory,8]) + cpu: threads + memory: memory } } \ No newline at end of file diff --git a/mergecounts.wdl b/mergecounts.wdl index 8cca5dd494eefdaf208bedeffa051ec0960ed6d9..5de98e799a341e0f1ac3a3b4d2b6323e8e5a8c8c 100644 --- a/mergecounts.wdl +++ b/mergecounts.wdl @@ -1,26 +1,30 @@ -task MergeCounts { - String? preCommand +version 1.0 - Array[File] inputFiles - String outputFile - Int featureColumn - Int valueColumn - Boolean inputHasHeader +task MergeCounts { + input { + String? preCommand + + Array[File] inputFiles + String outputFile + Int featureColumn + Int valueColumn + Boolean inputHasHeader + } # Based on a script by Szymon Kielbasa/Ioannis Moustakas command <<< set -e -o pipefail - mkdir -p ${sub(outputFile, basename(outputFile) + "$", "")} - ${preCommand} + mkdir -p ~{sub(outputFile, basename(outputFile) + "$", "")} + ~{preCommand} R --no-save <<CODE library(dplyr) library(reshape2) - listOfFiles <- c("${sep='", "' inputFiles}") + listOfFiles <- c("~{sep='", "' inputFiles}") - valueI <- ${valueColumn} - featureI <- ${featureColumn} - header <- ${true="TRUE" false="FALSE" inputHasHeader} + valueI <- ~{valueColumn} + featureI <- ~{featureColumn} + header <- ~{true="TRUE" false="FALSE" inputHasHeader} d <- do.call(rbind, lapply(listOfFiles, function(file){ d <- read.table(file, sep="\t", header=header, comment.char="#") @@ -34,7 +38,7 @@ task MergeCounts { })) d <- d %>% dcast(feature ~ sample, value.var="count") - write.table(d, file="${outputFile}", sep="\t", quote=FALSE, row.names=FALSE) + write.table(d, file="~{outputFile}", sep="\t", quote=FALSE, row.names=FALSE) CODE >>> diff --git a/ncbi.wdl b/ncbi.wdl index f32959ec6ebe01494e2b6faf6ed580c354e6417d..0678d3d4ea2fffcb9207e972779d056679058fe0 100644 --- a/ncbi.wdl +++ b/ncbi.wdl @@ -1,41 +1,45 @@ -task genomeDownload { - String outputPath - String? section = "refseq" - String? format = "all" - String? assemblyLevel = "all" - String? taxId - String? refseqCategory - Boolean? humanReadable - String? ncbiBaseUri - Int? parallel - Int? retries - Boolean? verbose=true - Boolean? debug - String? domain = "all" +version 1.0 - String? executable = "ncbi-genome-download" - String? preCommand +task GenomeDownload { + input { + String outputPath + String? section = "refseq" + String? format = "all" + String? assemblyLevel = "all" + String? taxId + String? refseqCategory + Boolean? humanReadable + String? ncbiBaseUri + Int? parallel + Int? retries + Boolean verbose = true + Boolean debug = false + String? domain = "all" + + String executable = "ncbi-genome-download" + String? preCommand + } command { set -e -o pipefail - ${preCommand} - ${executable} \ - ${"--section " + section} \ - ${"--format " + format} \ - ${"--assembly-level " + assemblyLevel } \ - ${"--taxid " + taxId } \ - ${"--refseq-category " + refseqCategory} \ - ${"--output-folder " + outputPath } \ - ${true="--human-readable" false="" humanReadable} \ - ${"--uri " + ncbiBaseUri } \ - ${"--parallel " + parallel } \ - ${"--retries " + retries } \ - ${true="--verbose" false="" verbose } \ - ${true="--debug" false ="" debug } \ - ${domain} + ~{preCommand} + ~{executable} \ + ~{"--section " + section} \ + ~{"--format " + format} \ + ~{"--assembly-level " + assemblyLevel } \ + ~{"--taxid " + taxId } \ + ~{"--refseq-category " + refseqCategory} \ + ~{"--output-folder " + outputPath } \ + ~{true="--human-readable" false="" humanReadable} \ + ~{"--uri " + ncbiBaseUri } \ + ~{"--parallel " + parallel } \ + ~{"--retries " + retries } \ + ~{true="--verbose" false="" verbose } \ + ~{true="--debug" false ="" debug } \ + ~{domain} # Check md5sums for all downloaded files - for folder in $(realpath ${outputPath})/*/*/* + for folder in $(realpath ~{outputPath})/*/*/* do ( md5sums="$( @@ -68,23 +72,27 @@ task genomeDownload { } -task downloadNtFasta{ - String libraryPath - String seqTaxMapPath - Boolean? unzip = true - String ntDir = libraryPath + "/nt" - String ntFilePath = ntDir + "/nt.fna" +task DownloadNtFasta{ + input { + String libraryPath + String seqTaxMapPath + Boolean unzip = true + String ntDir = libraryPath + "/nt" + String ntFilePath = ntDir + "/nt.fna" + } + command { set -e -o pipefail - mkdir -p ${ntDir} - rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ${ntDir} - (cd ${ntDir} && md5sum -c nt.gz.md5) + mkdir -p ~{ntDir} + rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ~{ntDir} + (cd ~{ntDir} && md5sum -c nt.gz.md5) # Only unzip when necessary - if ${true='true' false='false' unzip} + if ~{true='true' false='false' unzip} then - zcat ${ntDir}/nt.gz > ${ntFilePath} + zcat ~{ntDir}/nt.gz > ~{ntFilePath} fi - } + } + output { File ntFileGz = ntDir + "/nt.gz" File library = libraryPath @@ -95,19 +103,27 @@ task downloadNtFasta{ } } -task downloadAccessionToTaxId { - String downloadDir - Boolean gzip = false +task DownloadAccessionToTaxId { + input { + String downloadDir + Boolean gzip = false + } + command { set -e -o pipefail - mkdir -p ${downloadDir} - rsync -av --partial rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* ${downloadDir} - (cd ${downloadDir} && md5sum -c *.md5) - for file in ${downloadDir}/nucl_*.accession2taxid.gz + mkdir -p ~{downloadDir} + rsync -av \ + --partial \ + rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \ + ~{downloadDir} + (cd ~{downloadDir} && md5sum -c *.md5) + for file in ~{downloadDir}/nucl_*.accession2taxid.gz do - zcat $file | tail -n +2 | cut -f 2,3 ${true="| gzip " false='' gzip}> $file.seqtaxmap${true='.gz' false='' gzip} + zcat $file | tail -n +2 | cut -f 2,3 ~{true="| gzip" false='' gzip} > \ + $file.seqtaxmap~{true='.gz' false='' gzip} done - } + } + output { Array[File] seqTaxMaps = glob(downloadDir + "/*.seqtaxmap") Array[File] seqTaxMapsGz = glob(downloadDir + "/*.seqtaxmap.gz") diff --git a/picard.wdl b/picard.wdl index 66916efa9ffb874617e1effea459e3dbb1b0c099..0b13ebe4dbf857f311d5f5836e668ebd0545d62d 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1,122 +1,253 @@ -task ScatterIntervalList { - String? preCommand - File interval_list - Int scatter_count - String? picardJar +version 1.0 + +task CollectMultipleMetrics { + input { + String? preCommand + File bamFile + File bamIndex + File refFasta + File refDict + File refFastaIndex + String basename + + Boolean collectAlignmentSummaryMetrics = true + Boolean collectInsertSizeMetrics = true + Boolean qualityScoreDistribution = true + Boolean meanQualityByCycle = true + Boolean collectBaseDistributionByCycle = true + Boolean collectGcBiasMetrics = true + #Boolean rnaSeqMetrics = false # There is a bug in picard https://github.com/broadinstitute/picard/issues/999 + Boolean collectSequencingArtifactMetrics = true + Boolean collectQualityYieldMetrics = true + + String? picardJar + + Int memory = 4 + Float memoryMultiplier = 3.0 + } + + String toolCommand = if defined(picardJar) + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" + + command { + set -e -o pipefail + mkdir -p $(dirname "~{basename}") + ~{preCommand} + ~{toolCommand} \ + CollectMultipleMetrics \ + I=~{bamFile} \ + R=~{refFasta} \ + O=~{basename} \ + PROGRAM=null \ + ~{true="PROGRAM=CollectAlignmentSummaryMetrics" false="" collectAlignmentSummaryMetrics} \ + ~{true="PROGRAM=CollectInsertSizeMetrics" false="" collectInsertSizeMetrics} \ + ~{true="PROGRAM=QualityScoreDistribution" false="" qualityScoreDistribution} \ + ~{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \ + ~{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \ + ~{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \ + ~{true="PROGRAM=CollectSequencingArtifactMetrics" false="" + collectSequencingArtifactMetrics} \ + ~{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics} + } + + output { + File aligmentSummary = basename + ".alignment_summary_metrics" + File baitBiasDetail = basename + ".bait_bias_detail_metrics" + File baitBiasSummary = basename + ".bait_bias_summary_metrics" + File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" + File baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" + File errorSummary = basename + ".error_summary_metrics" + File gcBiasDetail = basename + ".gc_bias.detail_metrics" + File gcBiasPdf = basename + ".gc_bias.pdf" + File gcBiasSummary = basename + ".gc_bias.summary_metrics" + File insertSizeHistogramPdf = basename + ".insert_size_histogram.pdf" + File insertSize = basename + ".insert_size_metrics" + File preAdapterDetail = basename + ".pre_adapter_detail_metrics" + File preAdapterSummary = basename + ".pre_adapter_summary_metrics" + File qualityByCycle = basename + ".quality_by_cycle_metrics" + File qualityByCyclePdf = basename + ".quality_by_cycle.pdf" + File qualityDistribution = basename + ".quality_distribution_metrics" + File qualityDistributionPdf = basename + ".quality_distribution.pdf" + File qualityYield = basename + ".quality_yield_metrics" + } + + runtime { + memory: ceil(memory * memoryMultiplier) + } +} + +task CollectRnaSeqMetrics { + input { + String? preCommand + File bamFile + File bamIndex + File refRefflat + String basename + String strandSpecificity = "NONE" - Float? memory - Float? memoryMultiplier + String? picardJar - Int mem = ceil(select_first([memory, 4.0])) + Int memory = 4 + Float memoryMultiplier = 3.0 + } String toolCommand = if defined(picardJar) - then "java -Xmx" + mem + "G -jar " + picardJar - else "picard -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - mkdir scatter_list - ${toolCommand} \ - IntervalListTools \ - SCATTER_COUNT=${scatter_count} \ - SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ - UNIQUE=true \ - SORT=true \ - INPUT=${interval_list} \ - OUTPUT=scatter_list + mkdir -p $(dirname "~{basename}") + ~{preCommand} + ~{toolCommand} \ + CollectRnaSeqMetrics \ + I=~{bamFile} \ + O=~{basename}.RNA_Metrics \ + CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \ + STRAND_SPECIFICITY=~{strandSpecificity} \ + REF_FLAT=~{refRefflat} } output { - Array[File] out = glob("scatter_list/*/*.interval_list") - Int interval_count = read_int(stdout()) + File chart = basename + ".RNA_Metrics.pdf" + File metrics = basename + ".RNA_Metrics" } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } -# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs -task GatherBamFiles { - String? preCommand - Array[File]+ input_bams - String output_bam_path - Int? compression_level - String? picardJar +task CollectTargetedPcrMetrics { + input { + String? preCommand + File bamFile + File bamIndex + File refFasta + File refDict + File refFastaIndex + File ampliconIntervals + Array[File]+ targetIntervals + String basename + + String? picardJar + + Int memory = 4 + Float memoryMultiplier = 3.0 + } + + String toolCommand = if defined(picardJar) + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" + + command { + set -e -o pipefail + mkdir -p $(dirname "~{basename}") + ~{preCommand} + ~{toolCommand} \ + CollectTargetedPcrMetrics \ + I=~{bamFile} \ + R=~{refFasta} \ + AMPLICON_INTERVALS=~{ampliconIntervals} \ + TARGET_INTERVALS=~{sep=" TARGET_INTERVALS=" targetIntervals} \ + O=~{basename}.targetPcrMetrics \ + PER_BASE_COVERAGE=~{basename}.targetPcrPerBaseCoverage \ + PER_TARGET_COVERAGE=~{basename}.targetPcrPerTargetCoverage + } + + output { + File perTargetCoverage = basename + ".targetPcrPerTargetCoverage" + File perBaseCoverage = basename + ".targetPcrPerBaseCoverage" + File metrics = basename + ".targetPcrMetrics" + } - Float? memory - Float? memoryMultiplier + runtime { + memory: ceil(memory * memoryMultiplier) + } +} - Int mem = ceil(select_first([memory, 4.0])) +# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs +task GatherBamFiles { + input { + String? preCommand + Array[File]+ input_bams + String output_bam_path + Int? compression_level + String? picardJar + + Int memory = 4 + Float memoryMultiplier = 3.0 + } String toolCommand = if defined(picardJar) - then "java -Xmx" + mem + "G -jar " + picardJar - else "picard -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ - GatherBamFiles \ - INPUT=${sep=' INPUT=' input_bams} \ - OUTPUT=${output_bam_path} \ - CREATE_INDEX=true \ - CREATE_MD5_FILE=true + ~{preCommand} + ~{toolCommand} \ + GatherBamFiles \ + INPUT=~{sep=' INPUT=' input_bams} \ + OUTPUT=~{output_bam_path} \ + CREATE_INDEX=true \ + CREATE_MD5_FILE=true } output { - File output_bam = "${output_bam_path}" + File output_bam = "~{output_bam_path}" File output_bam_index = sub(output_bam_path, ".bam$", ".bai") - File output_bam_md5 = "${output_bam_path}.md5" + File output_bam_md5 = "~{output_bam_path}.md5" } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } # Mark duplicate reads to avoid counting non-independent observations task MarkDuplicates { - String? preCommand - Array[File] input_bams - String output_bam_path - String metrics_path - Int? compression_level - String? picardJar - - Float? memory - Float? memoryMultiplier - - # The program default for READ_NAME_REGEX is appropriate in nearly every case. - # Sometimes we wish to supply "null" in order to turn off optical duplicate detection - # This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing - String? read_name_regex + input { + String? preCommand + Array[File] input_bams + String output_bam_path + String metrics_path + Int? compression_level + String? picardJar + + Int memory = 4 + Float memoryMultiplier = 3.0 + + # The program default for READ_NAME_REGEX is appropriate in nearly every case. + # Sometimes we wish to supply "null" in order to turn off optical duplicate detection + # This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing + String? read_name_regex + } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly # This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment. # While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname" - Int mem = ceil(select_first([memory, 4.0])) String toolCommand = if defined(picardJar) - then "java -Xmx" + mem + "G -jar " + picardJar - else "picard -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - mkdir -p $(dirname ${output_bam_path}) - ${toolCommand} \ - MarkDuplicates \ - INPUT=${sep=' INPUT=' input_bams} \ - OUTPUT=${output_bam_path} \ - METRICS_FILE=${metrics_path} \ - VALIDATION_STRINGENCY=SILENT \ - ${"READ_NAME_REGEX=" + read_name_regex} \ - OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ - CLEAR_DT="false" \ - CREATE_INDEX=true \ - ADD_PG_TAG_TO_READS=false + ~{preCommand} + mkdir -p $(dirname ~{output_bam_path}) + ~{toolCommand} \ + MarkDuplicates \ + INPUT=~{sep=' INPUT=' input_bams} \ + OUTPUT=~{output_bam_path} \ + METRICS_FILE=~{metrics_path} \ + VALIDATION_STRINGENCY=SILENT \ + ~{"READ_NAME_REGEX=" + read_name_regex} \ + OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ + CLEAR_DT="false" \ + CREATE_INDEX=true \ + ADD_PG_TAG_TO_READS=false } output { @@ -126,37 +257,38 @@ task MarkDuplicates { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs task MergeVCFs { - String? preCommand - Array[File] inputVCFs - Array[File] inputVCFsIndexes - String outputVCFpath - Int? compressionLevel - String? picardJar - - Float? memory - Float? memoryMultiplier + input { + String? preCommand + Array[File] inputVCFs + Array[File] inputVCFsIndexes + String outputVCFpath + Int? compressionLevel + String? picardJar + + Int memory = 4 + Float memoryMultiplier = 3.0 + } # Using MergeVcfs instead of GatherVcfs so we can create indices # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket - Int mem = ceil(select_first([memory, 4.0])) String toolCommand = if defined(picardJar) - then "java -Xmx" + mem + "G -jar " + picardJar - else "picard -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ - MergeVcfs \ - INPUT=${sep=' INPUT=' inputVCFs} \ - OUTPUT=${outputVCFpath} + ~{preCommand} + ~{toolCommand} \ + MergeVcfs \ + INPUT=~{sep=' INPUT=' inputVCFs} \ + OUTPUT=~{outputVCFpath} } output { @@ -165,34 +297,36 @@ task MergeVCFs { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) } } task SamToFastq { - String? preCommand - File inputBam - String outputRead1 - String? outputRead2 - String? outputUnpaired - String? picardJar - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 16.0])) # High memory default to avoid crashes. + input { + String? preCommand + File inputBam + String outputRead1 + String? outputRead2 + String? outputUnpaired + + String? picardJar + Int memory = 16 # High memory default to avoid crashes. + Float memoryMultiplier = 3.0 + } String toolCommand = if defined(picardJar) - then "java -Xmx" + mem + "G -jar " + picardJar - else "picard -Xmx" + mem + "G" + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ + ~{preCommand} + ~{toolCommand} \ SamToFastq \ - I=${inputBam} \ - ${"FASTQ=" + outputRead1} \ - ${"SECOND_END_FASTQ=" + outputRead2} \ - ${"UNPAIRED_FASTQ=" + outputUnpaired} + I=~{inputBam} \ + ~{"FASTQ=" + outputRead1} \ + ~{"SECOND_END_FASTQ=" + outputRead2} \ + ~{"UNPAIRED_FASTQ=" + outputUnpaired} } output { @@ -202,6 +336,45 @@ task SamToFastq { } runtime { - memory: ceil(mem * select_first([memoryMultiplier, 3.0])) + memory: ceil(memory * memoryMultiplier) + } +} + +task ScatterIntervalList { + input { + String? preCommand + File interval_list + Int scatter_count + String? picardJar + + Int memory = 4 + Float memoryMultiplier = 3.0 + } + + String toolCommand = if defined(picardJar) + then "java -Xmx" + memory + "G -jar " + picardJar + else "picard -Xmx" + memory + "G" + + command { + set -e -o pipefail + ~{preCommand} + mkdir scatter_list + ~{toolCommand} \ + IntervalListTools \ + SCATTER_COUNT=~{scatter_count} \ + SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ + UNIQUE=true \ + SORT=true \ + INPUT=~{interval_list} \ + OUTPUT=scatter_list + } + + output { + Array[File] out = glob("scatter_list/*/*.interval_list") + Int interval_count = read_int(stdout()) + } + + runtime { + memory: ceil(memory * memoryMultiplier) } -} \ No newline at end of file +} diff --git a/samplesheet.wdl b/samplesheet.wdl new file mode 100644 index 0000000000000000000000000000000000000000..e269911ddac6c9c353dcdcee8d95a8abf90713f6 --- /dev/null +++ b/samplesheet.wdl @@ -0,0 +1,73 @@ +version 1.0 +struct Readgroup { + String id + File R1 + String R1_md5 + File? R2 + String R2_md5 +} + +struct Library { + String id + Array[Readgroup]+ readgroups +} + +struct Sample { + String id + Array[Library]+ libraries +} + +task sampleConfigFileToStruct { + input { + File sampleConfigFile + String outputJson = "output.json" + } + + # Below command can convert any samplesheet with a nested dictionary + # structure to a list of objects model. + # It was specifically designed to run on both python2 and python3. + # Only requirement is PyYAML. + # + # Code maintained in https://github.com/rhpvorderman/samplesheet-to-struct + # can be moved to biowdl group later. + command { + python <<CODE + + import yaml + import json + + + def nested_dicts_to_lists(dictionary): + new_dict = dict() + for key, value in dictionary.items(): + if type(value) == dict: + new_dict[key] = dict_to_item_list_with_id(value) + else: + new_dict[key] = value + return new_dict + + + def dict_to_item_list_with_id(dictionary): + items = [] + for sub_key, sub_dictionary in dictionary.items(): + item_dict = dict(id=sub_key, **nested_dicts_to_lists(sub_dictionary)) + items.append(item_dict) + return items + + + with open("~{sampleConfigFile}", "r") as samplesheet: + samplesheet_dict = yaml.load(samplesheet) + + sample_struct = nested_dicts_to_lists(samplesheet_dict) + + with open("~{outputJson}", "w") as output_json: + output_json.write(json.dumps(sample_struct)) + + CODE + } + + output { + Map[String,Array[Sample]] map = read_json(outputJson) + Array[Sample] samples = map["samples"] + } +} \ No newline at end of file diff --git a/samtools.wdl b/samtools.wdl index b574ec5f023c13adb366e301e99bd7702d686fd0..2d910379db88bb9402df50c2e0704e08263a9ff7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -1,28 +1,36 @@ +version 1.0 + task Index { - String? preCommand - File bamFilePath - String? bamIndexPath + input { + String? preCommand + File bamFilePath + String? bamIndexPath + } command { set -e -o pipefail - ${preCommand} - samtools index ${bamFilePath} ${bamIndexPath} + ~{preCommand} + samtools index ~{bamFilePath} ~{bamIndexPath} } output { - File indexFile = if defined(bamIndexPath) then select_first([bamIndexPath]) else bamFilePath + ".bai" + File indexFile = if defined(bamIndexPath) + then select_first([bamIndexPath]) + else bamFilePath + ".bai" } } task Merge { - String? preCommand - Array[File]+ bamFiles - String outputBamPath + input { + String? preCommand + Array[File]+ bamFiles + String outputBamPath + } command { set -e -o pipefail - ${preCommand} - samtools merge ${outputBamPath} ${sep=' ' bamFiles} + ~{preCommand} + samtools merge ~{outputBamPath} ~{sep=' ' bamFiles} } output { @@ -31,14 +39,16 @@ task Merge { } task Markdup { - String? preCommand - File inputBam - String outputBamPath + input { + String? preCommand + File inputBam + String outputBamPath + } command { set -e -o pipefail - ${preCommand} - samtools markdup ${inputBam} ${outputBamPath} + ~{preCommand} + samtools markdup ~{inputBam} ~{outputBamPath} } output { @@ -47,15 +57,17 @@ task Markdup { } task Flagstat { - String? preCommand - File inputBam - String outputPath + input { + String? preCommand + File inputBam + String outputPath + } command { set -e -o pipefail - ${preCommand} - mkdir -p $(dirname ${outputPath}) - samtools flagstat ${inputBam} > ${outputPath} + ~{preCommand} + mkdir -p $(dirname ~{outputPath}) + samtools flagstat ~{inputBam} > ~{outputPath} } output { @@ -63,46 +75,50 @@ task Flagstat { } } -task fastq { - String? preCommand - File inputBam - String outputRead1 - String? outputRead2 - String? outputRead0 - Int? includeFilter - Int? excludeFilter - Int? excludeSpecificFilter - Boolean? appendReadNumber - Boolean? outputQuality - Int? compressionLevel - Int? threads - Int? memory - Int totalThreads = select_first([threads, 1]) +task Fastq { + input { + String? preCommand + File inputBam + String outputRead1 + String? outputRead2 + String? outputRead0 + Int? includeFilter + Int? excludeFilter + Int? excludeSpecificFilter + Boolean? appendReadNumber + Boolean? outputQuality + Int? compressionLevel + Int threads = 1 + Int memory = 1 + } command { - ${preCommand} + ~{preCommand} samtools fastq \ - ${true="-1" false="-s" defined(outputRead2)} ${outputRead1} \ - ${"-2 " + outputRead2} \ - ${"-0 " + outputRead0} \ - ${"-f " + includeFilter} \ - ${"-F " + excludeFilter} \ - ${"-G " + excludeSpecificFilter} \ - ${true="-N" false="-n" appendReadNumber} \ - ${true="-O" false="" outputQuality} \ - ${"-c " + compressionLevel} \ - ${"--threads " + totalThreads} \ - ${inputBam} + ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-2 " + outputRead2} \ + ~{"-0 " + outputRead0} \ + ~{"-f " + includeFilter} \ + ~{"-F " + excludeFilter} \ + ~{"-G " + excludeSpecificFilter} \ + ~{true="-N" false="-n" appendReadNumber} \ + ~{true="-O" false="" outputQuality} \ + ~{"-c " + compressionLevel} \ + ~{"--threads " + threads} \ + ~{inputBam} } + output { File read1 = outputRead1 File? read2 = outputRead2 File? read0 = outputRead0 } + runtime { - cpu: totalThreads - memory: select_first([memory, 1]) + cpu: threads + memory: memory } + parameter_meta { preCommand: "A command that is run before the task. Can be used to activate environments" inputBam: "The bam file to process." @@ -115,39 +131,41 @@ task fastq { } } -task view { - String? preCommand - File inFile - File? referenceFasta - String outputFileName - Boolean? outputBam - Boolean? uncompressedBamOutput - Int? includeFilter - Int? excludeFilter - Int? excludeSpecificFilter - Int? threads - Int? memory +task View { + input { + String? preCommand + File inFile + File? referenceFasta + String outputFileName + Boolean? outputBam + Boolean? uncompressedBamOutput + Int? includeFilter + Int? excludeFilter + Int? excludeSpecificFilter + Int threads = 1 + Int memory = 1 + } command { - set -e -o pipefail - ${preCommand} - samtools view \ - ${"-T " + referenceFasta} \ - ${"-o " + outputFileName} \ - ${true="-b " false="" outputBam} \ - ${true="-u " false="" uncompressedBamOutput} \ - ${"-f " + includeFilter} \ - ${"-F " + excludeFilter} \ - ${"-G " + excludeSpecificFilter} \ - ${"--threads " + threads - 1} \ - ${inFile} + set -e -o pipefail + ~{preCommand} + samtools view \ + ~{"-T " + referenceFasta} \ + ~{"-o " + outputFileName} \ + ~{true="-b " false="" outputBam} \ + ~{true="-u " false="" uncompressedBamOutput} \ + ~{"-f " + includeFilter} \ + ~{"-F " + excludeFilter} \ + ~{"-G " + excludeSpecificFilter} \ + ~{"--threads " + threads - 1} \ + ~{inFile} } output { File outputFile = outputFileName } runtime { - cpu: select_first([threads, 1]) - memory: select_first([memory, 1]) + cpu: threads + memory: memory } } diff --git a/seqtk.wdl b/seqtk.wdl index 35fbedf368a7d62f1c9ed9643ea96fe0dd7a5a12..64c604a5ecfe0beba399605dcabb1bb3de0ee59a 100644 --- a/seqtk.wdl +++ b/seqtk.wdl @@ -1,26 +1,32 @@ -task sample { - File sequenceFile - String? outFilePath = "subsampledReads.fq.gz" - String? preCommand - Int? seed - Boolean? twoPassMode - Float? fraction - Int? number - Boolean? zip = true +version 1.0 + +task Sample { + input { + File sequenceFile + String outFilePath = "subsampledReads.fq.gz" + String? preCommand + Int? seed + Boolean twoPassMode + Float? fraction + Int? number + Boolean zip = true + } command { - set -e -o pipefail - ${'mkdir -p $(dirname ' + outFilePath + ')'} - ${preCommand} - seqtk sample \ - ${"-s " + seed} \ - ${true="-2 " false="" twoPassMode} \ - ${sequenceFile} \ - ${number} ${fraction} \ - ${true="| gzip" false="" zip} \ - ${"> " + outFilePath} + set -e -o pipefail + mkdir -p $(dirname outFilePath) + ~{preCommand} + seqtk sample \ + ~{"-s " + seed} \ + ~{true="-2 " false="" twoPassMode} \ + ~{sequenceFile} \ + ~{number} \ + ~{fraction} \ + ~{true="| gzip" false="" zip} \ + > ~{outFilePath} } + output { - File subsampledReads= select_first([outFilePath]) + File subsampledReads = outFilePath } } \ No newline at end of file diff --git a/spades.wdl b/spades.wdl index f0feb573ec4fb71f09ec5cb42f62fe45c4ed7cb8..47195f196967b8b1863b908d9c483ed49df9f978 100644 --- a/spades.wdl +++ b/spades.wdl @@ -1,68 +1,71 @@ -task spades { - String outputDir - String? preCommand - File read1 - File? read2 - File? interlacedReads - File? sangerReads - File? pacbioReads - File? nanoporeReads - File? tslrContigs - File? trustedContigs - File? untrustedContigs - Boolean? singleCell - Boolean? metagenomic - Boolean? rna - Boolean? plasmid - Boolean? ionTorrent - Boolean? onlyErrorCorrection - Boolean? onlyAssembler - Boolean? careful - Boolean? disableGzipOutput - Boolean? disableRepeatResolution - File? dataset - Int? threads - Float? memoryGb - File? tmpDir - String? k - Float? covCutoff - Int? phredOffset - Int finalThreads = select_first([threads,1]) - Float totalMemory = select_first([memoryGb, finalThreads * 16.0]) - Int finalMemory = ceil(totalMemory) - Int clusterMemory = ceil(totalMemory / finalThreads) +version 1.0 + +task Spades { + input { + String outputDir + String? preCommand + File read1 + File? read2 + File? interlacedReads + File? sangerReads + File? pacbioReads + File? nanoporeReads + File? tslrContigs + File? trustedContigs + File? untrustedContigs + Boolean? singleCell + Boolean? metagenomic + Boolean? rna + Boolean? plasmid + Boolean? ionTorrent + Boolean? onlyErrorCorrection + Boolean? onlyAssembler + Boolean? careful + Boolean? disableGzipOutput + Boolean? disableRepeatResolution + File? dataset + Int threads = 1 + Float memoryGb = 16.0 + File? tmpDir + String? k + Float? covCutoff + Int? phredOffset + } + + Int clusterMemory = ceil(memoryGb / threads) command { set -e -o pipefail - ${preCommand} + ~{preCommand} spades.py \ - ${"-o " + outputDir} \ - ${true="--sc" false="" singleCell} \ - ${true="--meta" false="" metagenomic} \ - ${true="--rna" false="" rna} \ - ${true="--plasmid" false="" plasmid} \ - ${true="--iontorrent" false="" ionTorrent} \ - ${"--12 " + interlacedReads } \ - ${true="-1" false="-s" defined(read2)} ${read1} \ - ${"-2 " + read2 } \ - ${"--sanger " + sangerReads } \ - ${"--pacbio " + pacbioReads } \ - ${"--nanopore " + nanoporeReads } \ - ${"--tslr " + tslrContigs } \ - ${"--trusted-contigs " + trustedContigs } \ - ${"--untrusted-contigs " + untrustedContigs } \ - ${true="--only-error-correction" false="" onlyErrorCorrection } \ - ${true="--only-assembler" false="" onlyAssembler } \ - ${true="--careful" false="" careful } \ - ${true="--disable-gzip-output" false="" disableGzipOutput} \ - ${true="--disable-rr" false="" disableRepeatResolution } \ - ${"--dataset " + dataset } \ - ${"--threads " + finalThreads} \ - ${"--memory " + finalMemory } \ - ${"-k " + k } \ - ${"--cov-cutoff " + covCutoff } \ - ${"--phred-offset " + phredOffset } + ~{"-o " + outputDir} \ + ~{true="--sc" false="" singleCell} \ + ~{true="--meta" false="" metagenomic} \ + ~{true="--rna" false="" rna} \ + ~{true="--plasmid" false="" plasmid} \ + ~{true="--iontorrent" false="" ionTorrent} \ + ~{"--12 " + interlacedReads} \ + ~{true="-1" false="-s" defined(read2)} ~{read1} \ + ~{"-2 " + read2} \ + ~{"--sanger " + sangerReads} \ + ~{"--pacbio " + pacbioReads} \ + ~{"--nanopore " + nanoporeReads} \ + ~{"--tslr " + tslrContigs} \ + ~{"--trusted-contigs " + trustedContigs} \ + ~{"--untrusted-contigs " + untrustedContigs} \ + ~{true="--only-error-correction" false="" onlyErrorCorrection} \ + ~{true="--only-assembler" false="" onlyAssembler} \ + ~{true="--careful" false="" careful} \ + ~{true="--disable-gzip-output" false="" disableGzipOutput} \ + ~{true="--disable-rr" false="" disableRepeatResolution} \ + ~{"--dataset " + dataset} \ + ~{"--threads " + threads} \ + ~{"--memory " + memoryGb} \ + ~{"-k " + k} \ + ~{"--cov-cutoff " + covCutoff} \ + ~{"--phred-offset " + phredOffset} } + output { Array[File] correctedReads = glob(outputDir + "/corrected/*.fastq*") File scaffolds = outputDir + "/scaffolds.fasta" @@ -74,8 +77,9 @@ task spades { File params = outputDir + "/params.txt" File log = outputDir + "/spades.log" } + runtime { - cpu: finalThreads + cpu: threads memory: clusterMemory } } \ No newline at end of file diff --git a/star.wdl b/star.wdl index 941a1d60137ad31dbc4ab7cbe7d8f6616bbee7b5..e03f6301052b648f2b86cd126e5ecfd9bdd27aed 100644 --- a/star.wdl +++ b/star.wdl @@ -1,62 +1,65 @@ +version 1.0 + task Star { - String? preCommand + input { + String? preCommand - Array[File] inputR1 - Array[File]? inputR2 - String genomeDir - String outFileNamePrefix + Array[File] inputR1 + Array[File]? inputR2 + String genomeDir + String outFileNamePrefix - String? outSAMtype - String? readFilesCommand - Int? runThreadN - String? outStd - String? twopassMode - Array[String]? outSAMattrRGline - Int? limitBAMsortRAM + String outSAMtype = "BAM SortedByCoordinate" + String readFilesCommand = "zcat" + Int runThreadN = 1 + String? outStd + String? twopassMode + Array[String]? outSAMattrRGline + Int? limitBAMsortRAM - Int? memory + Int memory = 10 + } #TODO needs to be extended for all possible output extensions Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} - # converts String? to String for use as key (for the Map above) in output - String key = select_first([outSAMtype, "BAM SortedByCoordinate"]) - command { set -e -o pipefail - mkdir -p ${sub(outFileNamePrefix, basename(outFileNamePrefix) + "$", "")} - ${preCommand} + mkdir -p ~{sub(outFileNamePrefix, basename(outFileNamePrefix) + "$", "")} + ~{preCommand} STAR \ - --readFilesIn ${sep=',' inputR1} ${sep="," inputR2} \ - --outFileNamePrefix ${outFileNamePrefix} \ - --genomeDir ${genomeDir} \ - --outSAMtype ${default="BAM SortedByCoordinate" outSAMtype} \ - --readFilesCommand ${default="zcat" readFilesCommand} \ - ${"--runThreadN " + runThreadN} \ - ${"--outStd " + outStd} \ - ${"--twopassMode " + twopassMode} \ - ${"--limitBAMsortRAM " + limitBAMsortRAM} \ - ${true="--outSAMattrRGline " false="" defined(outSAMattrRGline)} ${sep=" , " outSAMattrRGline} + --readFilesIn ~{sep=',' inputR1} ~{sep="," inputR2} \ + --outFileNamePrefix ~{outFileNamePrefix} \ + --genomeDir ~{genomeDir} \ + --outSAMtype ~{outSAMtype} \ + --readFilesCommand ~{readFilesCommand} \ + ~{"--runThreadN " + runThreadN} \ + ~{"--outStd " + outStd} \ + ~{"--twopassMode " + twopassMode} \ + ~{"--limitBAMsortRAM " + limitBAMsortRAM} \ + ~{true="--outSAMattrRGline " false="" defined(outSAMattrRGline)} ~{sep=" , " outSAMattrRGline} } output { - File bamFile = outFileNamePrefix + "Aligned." + samOutputNames[key] + File bamFile = outFileNamePrefix + "Aligned." + samOutputNames[outSAMtype] } runtime { - cpu: select_first([runThreadN, 1]) - memory: select_first([memory, 10]) + cpu: runThreadN + memory: memory } } -task makeStarRGline { - String sample - String library - String? platform - String readgroup +task MakeStarRGline { + input { + String sample + String library + String platform = "ILLUMINA" + String readgroup + } command { - printf '"ID:${readgroup}" "LB:${library}" "PL:${default="ILLUMINA" platform}" "SM:${sample}"' + printf '"ID:~{readgroup}" "LB:~{library}" "PL:~{platform}" "SM:~{sample}"' } output { diff --git a/stringtie.wdl b/stringtie.wdl index f5c6854cbacb671113fde7b4a0ac6e459e128ac1..97455da498f722ddf05008fc7102c9eef360335e 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -1,26 +1,29 @@ +version 1.0 + task Stringtie { - String? preCommand - File alignedReads - File? referenceGtf - Int? threads - String assembledTranscriptsFile - Boolean? firstStranded - Boolean? secondStranded - String? geneAbundanceFile + input { + String? preCommand + File alignedReads + File? referenceGtf + Int threads = 1 + String assembledTranscriptsFile + Boolean? firstStranded + Boolean? secondStranded + String? geneAbundanceFile + } command { set -e -o pipefail - mkdir -p $(dirname ${assembledTranscriptsFile}) - ${preCommand} + mkdir -p $(dirname ~{assembledTranscriptsFile}) + ~{preCommand} stringtie \ - ${"-p " + threads} \ - ${"-G " + referenceGtf} \ - ${true="--rf" false="" firstStranded} \ - ${true="fr" false="" secondStranded} \ - -o ${assembledTranscriptsFile} \ - ${"-A " + geneAbundanceFile} \ - ${alignedReads} \ - + ~{"-p " + threads} \ + ~{"-G " + referenceGtf} \ + ~{true="--rf" false="" firstStranded} \ + ~{true="fr" false="" secondStranded} \ + -o ~{assembledTranscriptsFile} \ + ~{"-A " + geneAbundanceFile} \ + ~{alignedReads} } output { @@ -29,6 +32,6 @@ task Stringtie { } runtime { - cpu: select_first([threads, 1]) + cpu: threads } } \ No newline at end of file diff --git a/unicycler.wdl b/unicycler.wdl index f7aa36a48067bcd2f880a843d1a4aa401c7f5078..c5b30ba93c184a1aef7417e16997053ca507101e 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -1,102 +1,107 @@ -task unicycler { - String? preCommand - File? short1 - File? short2 - File? unpaired - File? long - String out - Int? verbosity - Int? minFastaLength - Int? keep - Boolean? vcf - Int? threads - Int? memory - Int finalThreads = select_first(threads, 1) - Int finalMemory = select_first(memory, 4) - String? mode - Float? minBridgeQual - Int? linearSeqs - File? spadesPath - Boolean? noCorrect - Float? minKmerFrac - Float? maxKmerFrac - Int? kmerCount - Float? depthFilter - Boolean? noMiniasm - File? raconPath - File? existingLongReadAssembly - Boolean? noRotate - File? startGenes - Float? startGeneId - Float? startGeneCov - String? makeblastdbPath - File? tblastnPath - Boolean? noPilon - File? bowtie2Path - File? bowtie2buildPath - File? samtoolsPath - File? pilonPath - File? javaPath - Int? minPolishSize - File? bcftoolsPath - Int? minComponentSize - Int? minDeadEndSize - File? contamination - String? scores - String? lowScore +version 1.0 + +task Unicycler { + input { + String? preCommand + File? short1 + File? short2 + File? unpaired + File? long + String out + Int? verbosity + Int? minFastaLength + Int? keep + Boolean? vcf + Int threads = 1 + Int memory = 4 + String? mode + Float? minBridgeQual + Int? linearSeqs + File? spadesPath + Boolean? noCorrect + Float? minKmerFrac + Float? maxKmerFrac + Int? kmerCount + Float? depthFilter + Boolean? noMiniasm + File? raconPath + File? existingLongReadAssembly + Boolean? noRotate + File? startGenes + Float? startGeneId + Float? startGeneCov + String? makeblastdbPath + File? tblastnPath + Boolean? noPilon + File? bowtie2Path + File? bowtie2buildPath + File? samtoolsPath + File? pilonPath + File? javaPath + Int? minPolishSize + File? bcftoolsPath + Int? minComponentSize + Int? minDeadEndSize + File? contamination + String? scores + String? lowScore + } + command { set -e -o pipefail - mkdir -p ${out} - ${preCommand} + mkdir -p ~{out} + ~{preCommand} unicycler \ - ${"--short1 " + short1} \ - ${"--short2 " + short2} \ - ${"--unpaired " + unpaired} \ - ${"--long " + long} \ - --out ${out} \ - ${"--min_fasta_length " + minFastaLength} \ - ${"--keep " + keep } \ - ${true="--vcf" false="" vcf } \ - ${"--threads " + finalThreads } \ - ${"--mode " + mode } \ - ${"--min_bridge_qual " + minBridgeQual } \ - ${"--linear_seqs " + linearSeqs } \ - ${"--spades_path " + spadesPath } \ - ${true="--no_correct" false="" noCorrect } \ - ${"--min_kmer_frac " + minKmerFrac } \ - ${"--max_kmer_frac " + maxKmerFrac } \ - ${"--kmer_count " + kmerCount } \ - ${"--depth_filter " + depthFilter } \ - ${true="--no_miniasm" false="" noMiniasm } \ - ${"--racon_path " + raconPath } \ - ${"--existing_long_read_assembly " + existingLongReadAssembly } \ - ${true="--no_rotate" false="" noRotate } \ - ${"--start_genes " + startGenes } \ - ${"--start_gene_id " + startGeneId } \ - ${"--start_gene_cov " + startGeneCov } \ - ${"--makeblastdb_path " + makeblastdbPath } \ - ${"--tblastn_path " + tblastnPath } \ - ${true="--no_pilon" false="" noPilon } \ - ${"--bowtie2_path " + bowtie2Path } \ - ${"--bowtie2_build_path " + bowtie2buildPath } \ - ${"--samtools_path " + samtoolsPath } \ - ${"--pilon_path " + pilonPath } \ - ${"--java_path " + javaPath } \ - ${"--min_polish_size " + minPolishSize } \ - ${"--bcftools_path " + bcftoolsPath } \ - ${"--min_component_size " + minComponentSize } \ - ${"--min_dead_end_size " + minDeadEndSize } \ - ${"--contamination " + contamination } \ - ${"--scores " + scores } \ - ${"--low_score " + lowScore } + ~{"--short1 " + short1} \ + ~{"--short2 " + short2} \ + ~{"--unpaired " + unpaired} \ + ~{"--long " + long} \ + --out ~{out} \ + ~{"--min_fasta_length " + minFastaLength} \ + ~{"--keep " + keep } \ + ~{true="--vcf" false="" vcf } \ + ~{"--threads " + threads } \ + ~{"--mode " + mode } \ + ~{"--min_bridge_qual " + minBridgeQual } \ + ~{"--linear_seqs " + linearSeqs } \ + ~{"--spades_path " + spadesPath } \ + ~{true="--no_correct" false="" noCorrect } \ + ~{"--min_kmer_frac " + minKmerFrac } \ + ~{"--max_kmer_frac " + maxKmerFrac } \ + ~{"--kmer_count " + kmerCount } \ + ~{"--depth_filter " + depthFilter } \ + ~{true="--no_miniasm" false="" noMiniasm } \ + ~{"--racon_path " + raconPath } \ + ~{"--existing_long_read_assembly " + existingLongReadAssembly } \ + ~{true="--no_rotate" false="" noRotate } \ + ~{"--start_genes " + startGenes } \ + ~{"--start_gene_id " + startGeneId } \ + ~{"--start_gene_cov " + startGeneCov } \ + ~{"--makeblastdb_path " + makeblastdbPath } \ + ~{"--tblastn_path " + tblastnPath } \ + ~{true="--no_pilon" false="" noPilon } \ + ~{"--bowtie2_path " + bowtie2Path } \ + ~{"--bowtie2_build_path " + bowtie2buildPath } \ + ~{"--samtools_path " + samtoolsPath } \ + ~{"--pilon_path " + pilonPath } \ + ~{"--java_path " + javaPath } \ + ~{"--min_polish_size " + minPolishSize } \ + ~{"--bcftools_path " + bcftoolsPath } \ + ~{"--min_component_size " + minComponentSize } \ + ~{"--min_dead_end_size " + minDeadEndSize } \ + ~{"--contamination " + contamination } \ + ~{"--scores " + scores } \ + ~{"--low_score " + lowScore } } + output { File assemblyFasta = out + "/assembly.fasta" File assemblyGfa = out + "/assembly.gfa" File log = out + "/unicycler.log" } + runtime { - cpu: finalThreads - memory: finalMemory + cpu: threads + memory: memory } } \ No newline at end of file