diff --git a/centrifuge.wdl b/centrifuge.wdl index 3182261156d76793f95e023581178a27026c96db..09d2ee79b45c7b4bcd284a35cf0b5b3c1154df04 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -1,6 +1,114 @@ -# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2017 +# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 # # Tasks from centrifuge +task build { + + File conversionTable + File taxonomyTree + File inputFasta + String centrifugeIndexBase + String? preCommand + String? centrifugeBuildExecutable = "centrifuge-build" + #Boolean? c = false + Boolean? largeIndex = false + Boolean? noAuto = false + Int? bMax + Int? bMaxDivn + Boolean? noDiffCover = false + Boolean? noRef = false + Boolean? justRef = false + Int? offRate + Int? fTabChars + File? nameTable + File? sizeTable + Int? seed + Int? threads + Int? memory + Int? kmerCount + + command { + set -e -o pipefail + ${preCommand} + ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"} + ${centrifugeBuildExecutable} \ + ${true='--large-index' false='' largeIndex} \ + ${true='--noauto' false='' noAuto} \ + ${'--bmax ' + bMax} \ + ${'--bmaxdivn ' + bMaxDivn} \ + ${true='--nodc' false='' noDiffCover} \ + ${true='--noref' false='' noRef} \ + ${true='--justref' false='' justRef} \ + ${'--offrate ' + offRate} \ + ${'--ftabchars ' + fTabChars} \ + ${'--name-table ' + nameTable } \ + ${'--size-table ' + sizeTable} \ + ${'--seed ' + seed} \ + ${'--kmer-count' + kmerCount} \ + ${'--threads ' + threads} \ + --conversion-table ${conversionTable} \ + --taxonomy-tree ${taxonomyTree} \ + ${inputFasta} \ + ${centrifugeIndexBase} + } + runtime { + cpu: select_first([threads, 8]) + memory: select_first([memory, 20]) + } +} + +task classify { + String outputDir + Boolean? compressOutput = true + String? preCommand + String indexPrefix + File? unpairedReads + File read1 + File? read2 + Boolean? fastaInput + # Variables for handling output + String outputFileName = outputDir + "/centrifuge.out" + String reportFileName = outputDir + "/centrifuge_report.tsv" + String finalOutputName = if (compressOutput == true) then outputFileName + ".gz" else outputFileName + String? metFileName # If this is specified, the report file is empty + Int? assignments + Int? minHitLen + Int? minTotalLen + Array[String]? hostTaxIds + Array[String]? excludeTaxIds + Int? threads + Int? memory + + command { + set -e -o pipefail + mkdir -p ${outputDir} + ${preCommand} + centrifuge \ + ${"-p " + threads} \ + ${"-x " + indexPrefix} \ + ${true="-f" false="" fastaInput} \ + ${true="-k " false="" defined(assignments)} ${assignments} \ + ${true="-1 " false="-U " defined(read2)} ${read1} \ + ${"-2 " + read2} \ + ${"-U " + unpairedReads} \ + ${"--report-file " + reportFileName} \ + ${"--min-hitlen " + minHitLen} \ + ${"--min-totallen " + minTotalLen} \ + ${"--met-file " + metFileName} \ + ${true="--host-taxids " false="" defined(hostTaxIds)} ${sep=',' hostTaxIds} \ + ${true="--exclude-taxids " false="" defined(excludeTaxIds)} ${sep=',' excludeTaxIds} \ + ${true="| gzip -c >" false="-S" compressOutput} ${finalOutputName} + } + + output { + File classifiedReads = finalOutputName + File reportFile = reportFileName + } + + runtime { + cpu: select_first([threads, 1]) + memory: select_first([memory, 4]) + } +} task download { String libraryPath @@ -62,55 +170,41 @@ task downloadTaxonomy { } } -task build { - - File conversionTable - File taxonomyTree - File inputFasta - String centrifugeIndexBase +task kreport { String? preCommand - String? centrifugeBuildExecutable = "centrifuge-build" - #Boolean? c = false - Boolean? largeIndex = false - Boolean? noAuto = false - Int? bMax - Int? bMaxDivn - Boolean? noDiffCover = false - Boolean? noRef = false - Boolean? justRef = false - Int? offRate - Int? fTabChars - File? nameTable - File? sizeTable - Int? seed - Int? threads = 1 - Int? kmerCount + File centrifugeOut + Boolean inputIsCompressed + String kreportFileName=sub(centrifugeOut, "\\.out$|\\.out\\.gz$", "\\.kreport") + String indexPrefix + Boolean? onlyUnique + Boolean? showZeros + Boolean? isCountTable + Int? minScore + Int? minLength + Int? cores + Int? memory command { set -e -o pipefail ${preCommand} - ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"} - ${centrifugeBuildExecutable} \ - ${true='--large-index' false='' largeIndex} \ - ${true='--noauto' false='' noAuto} \ - ${'--bmax ' + bMax} \ - ${'--bmaxdivn ' + bMaxDivn} \ - ${true='--nodc' false='' noDiffCover} \ - ${true='--noref' false='' noRef} \ - ${true='--justref' false='' justRef} \ - ${'--offrate ' + offRate} \ - ${'--ftabchars ' + fTabChars} \ - ${'--name-table ' + nameTable } \ - ${'--size-table ' + sizeTable} \ - ${'--seed ' + seed} \ - ${'--kmer-count' + kmerCount} \ - ${'--threads ' + threads} \ - --conversion-table ${conversionTable} \ - --taxonomy-tree ${taxonomyTree} \ - ${inputFasta} \ - ${centrifugeIndexBase} + centrifuge-kreport \ + -x ${indexPrefix} \ + ${true="--only-unique" false="" onlyUnique} \ + ${true="--show-zeros" false="" showZeros} \ + ${true="--is-count-table" false="" isCountTable} \ + ${"--min-score " + minScore} \ + ${"--min-length " + minLength} \ + ${true="<(zcat" false="" inputIsCompressed} ${centrifugeOut}\ + ${true=")" false="" inputIsCompressed} \ + > ${kreportFileName} + } + + output { + File kreport = kreportFileName } + runtime { - cpu: select_first([threads]) + cpu: select_first([cores, 1]) + memory: select_first([memory, 4]) } }