diff --git a/biopet.wdl b/biopet.wdl index e13d7630e473dbf1f758430dfe1204f512996ec3..7d0d0a5fcfe5f1cb9711a3da6b3665f34546e3a8 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -145,10 +145,9 @@ task SampleConfig { } output { - Array[String] keys = read_lines(stdout()) + File keysFile = stdout() File? jsonOutput = jsonOutputPath File? tsvOutput = tsvOutputPath - Object values = if (defined(tsvOutput) && size(tsvOutput) > 0) then read_map(tsvOutput) else { "": "" } } runtime { diff --git a/centrifuge.wdl b/centrifuge.wdl index 3182261156d76793f95e023581178a27026c96db..09d2ee79b45c7b4bcd284a35cf0b5b3c1154df04 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -1,6 +1,114 @@ -# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2017 +# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 # # Tasks from centrifuge +task build { + + File conversionTable + File taxonomyTree + File inputFasta + String centrifugeIndexBase + String? preCommand + String? centrifugeBuildExecutable = "centrifuge-build" + #Boolean? c = false + Boolean? largeIndex = false + Boolean? noAuto = false + Int? bMax + Int? bMaxDivn + Boolean? noDiffCover = false + Boolean? noRef = false + Boolean? justRef = false + Int? offRate + Int? fTabChars + File? nameTable + File? sizeTable + Int? seed + Int? threads + Int? memory + Int? kmerCount + + command { + set -e -o pipefail + ${preCommand} + ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"} + ${centrifugeBuildExecutable} \ + ${true='--large-index' false='' largeIndex} \ + ${true='--noauto' false='' noAuto} \ + ${'--bmax ' + bMax} \ + ${'--bmaxdivn ' + bMaxDivn} \ + ${true='--nodc' false='' noDiffCover} \ + ${true='--noref' false='' noRef} \ + ${true='--justref' false='' justRef} \ + ${'--offrate ' + offRate} \ + ${'--ftabchars ' + fTabChars} \ + ${'--name-table ' + nameTable } \ + ${'--size-table ' + sizeTable} \ + ${'--seed ' + seed} \ + ${'--kmer-count' + kmerCount} \ + ${'--threads ' + threads} \ + --conversion-table ${conversionTable} \ + --taxonomy-tree ${taxonomyTree} \ + ${inputFasta} \ + ${centrifugeIndexBase} + } + runtime { + cpu: select_first([threads, 8]) + memory: select_first([memory, 20]) + } +} + +task classify { + String outputDir + Boolean? compressOutput = true + String? preCommand + String indexPrefix + File? unpairedReads + File read1 + File? read2 + Boolean? fastaInput + # Variables for handling output + String outputFileName = outputDir + "/centrifuge.out" + String reportFileName = outputDir + "/centrifuge_report.tsv" + String finalOutputName = if (compressOutput == true) then outputFileName + ".gz" else outputFileName + String? metFileName # If this is specified, the report file is empty + Int? assignments + Int? minHitLen + Int? minTotalLen + Array[String]? hostTaxIds + Array[String]? excludeTaxIds + Int? threads + Int? memory + + command { + set -e -o pipefail + mkdir -p ${outputDir} + ${preCommand} + centrifuge \ + ${"-p " + threads} \ + ${"-x " + indexPrefix} \ + ${true="-f" false="" fastaInput} \ + ${true="-k " false="" defined(assignments)} ${assignments} \ + ${true="-1 " false="-U " defined(read2)} ${read1} \ + ${"-2 " + read2} \ + ${"-U " + unpairedReads} \ + ${"--report-file " + reportFileName} \ + ${"--min-hitlen " + minHitLen} \ + ${"--min-totallen " + minTotalLen} \ + ${"--met-file " + metFileName} \ + ${true="--host-taxids " false="" defined(hostTaxIds)} ${sep=',' hostTaxIds} \ + ${true="--exclude-taxids " false="" defined(excludeTaxIds)} ${sep=',' excludeTaxIds} \ + ${true="| gzip -c >" false="-S" compressOutput} ${finalOutputName} + } + + output { + File classifiedReads = finalOutputName + File reportFile = reportFileName + } + + runtime { + cpu: select_first([threads, 1]) + memory: select_first([memory, 4]) + } +} task download { String libraryPath @@ -62,55 +170,41 @@ task downloadTaxonomy { } } -task build { - - File conversionTable - File taxonomyTree - File inputFasta - String centrifugeIndexBase +task kreport { String? preCommand - String? centrifugeBuildExecutable = "centrifuge-build" - #Boolean? c = false - Boolean? largeIndex = false - Boolean? noAuto = false - Int? bMax - Int? bMaxDivn - Boolean? noDiffCover = false - Boolean? noRef = false - Boolean? justRef = false - Int? offRate - Int? fTabChars - File? nameTable - File? sizeTable - Int? seed - Int? threads = 1 - Int? kmerCount + File centrifugeOut + Boolean inputIsCompressed + String kreportFileName=sub(centrifugeOut, "\\.out$|\\.out\\.gz$", "\\.kreport") + String indexPrefix + Boolean? onlyUnique + Boolean? showZeros + Boolean? isCountTable + Int? minScore + Int? minLength + Int? cores + Int? memory command { set -e -o pipefail ${preCommand} - ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"} - ${centrifugeBuildExecutable} \ - ${true='--large-index' false='' largeIndex} \ - ${true='--noauto' false='' noAuto} \ - ${'--bmax ' + bMax} \ - ${'--bmaxdivn ' + bMaxDivn} \ - ${true='--nodc' false='' noDiffCover} \ - ${true='--noref' false='' noRef} \ - ${true='--justref' false='' justRef} \ - ${'--offrate ' + offRate} \ - ${'--ftabchars ' + fTabChars} \ - ${'--name-table ' + nameTable } \ - ${'--size-table ' + sizeTable} \ - ${'--seed ' + seed} \ - ${'--kmer-count' + kmerCount} \ - ${'--threads ' + threads} \ - --conversion-table ${conversionTable} \ - --taxonomy-tree ${taxonomyTree} \ - ${inputFasta} \ - ${centrifugeIndexBase} + centrifuge-kreport \ + -x ${indexPrefix} \ + ${true="--only-unique" false="" onlyUnique} \ + ${true="--show-zeros" false="" showZeros} \ + ${true="--is-count-table" false="" isCountTable} \ + ${"--min-score " + minScore} \ + ${"--min-length " + minLength} \ + ${true="<(zcat" false="" inputIsCompressed} ${centrifugeOut}\ + ${true=")" false="" inputIsCompressed} \ + > ${kreportFileName} + } + + output { + File kreport = kreportFileName } + runtime { - cpu: select_first([threads]) + cpu: select_first([cores, 1]) + memory: select_first([memory, 4]) } } diff --git a/flash.wdl b/flash.wdl new file mode 100644 index 0000000000000000000000000000000000000000..c081d49cdd3841e91991b0c90ac894d61d965e40 --- /dev/null +++ b/flash.wdl @@ -0,0 +1,40 @@ +task flash { + String? preCommand + File inputR1 + File inputR2 + String outdirPath + String? outPrefix = "flash" + Int? minOverlap + Int? maxOverlap + Boolean? compress = true + Int? threads + Int? memory + + command { + set -e -o pipefail + mkdir -p ${outdirPath} + ${preCommand} + flash \ + ${"--threads=" + threads} \ + ${"--output-directory=" + outdirPath} \ + ${"--output-prefix=" + outPrefix} \ + ${true="--compress " false="" defined(compress)} \ + ${"--min-overlap=" + minOverlap} \ + ${"--max-overlap=" + maxOverlap} \ + ${inputR1} ${inputR2} + } + + output { + File extendedFrags = outdirPath + "/" + outPrefix + ".extendedFrags.fastq.gz" + File notCombined1 = outdirPath + "/" + outPrefix + ".notCombined_1.fastq.gz" + File notCombined2 = outdirPath + "/" + outPrefix + ".notCombined_2.fastq.gz" + File hist = outdirPath + "/" + outPrefix + ".hist" + File histogram = outdirPath + "/" + outPrefix + ".histogram" + } + + runtime { + cpu: select_first([threads, 2]) + memory: select_first([memory, 2]) + } + +} \ No newline at end of file diff --git a/gatk.wdl b/gatk.wdl index 160849ad00e3d849bfb26a44ce717b73e2c4918f..bd97b427db13da28d323c0408a58f79f0ca0f275 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -253,6 +253,7 @@ task SplitNCigarReads { String? preCommand File input_bam + File input_bam_index File ref_fasta File ref_fasta_index File ref_dict @@ -271,13 +272,13 @@ task SplitNCigarReads { SplitNCigarReads \ -I ${input_bam} \ -R ${ref_fasta} \ - -O ${output_bam} # might have to be -o depending on GATK version \ + -O ${output_bam} \ -L ${sep=' -L ' intervals} } output { File bam = output_bam - File bam_index = output_bam + ".bai" + File bam_index = sub(output_bam, "\\.bam$", ".bai") } runtime { diff --git a/star.wdl b/star.wdl index d7d3b7b595953704ab0de936b82e1ba7405fe279..32dd0565dc2511c7c3073531c5f8e28c9ba707bf 100644 --- a/star.wdl +++ b/star.wdl @@ -12,6 +12,7 @@ task Star { String? outStd String? twopassMode Array[String]? outSAMattrRGline + Int? limitBAMsortRAM Int? memory @@ -34,6 +35,7 @@ task Star { ${"--runThreadN " + runThreadN} \ ${"--outStd " + outStd} \ ${"--twopassMode " + twopassMode} \ + ${"--limitBAMsortRAM " + limitBAMsortRAM} \ ${true="--outSAMattrRGline " false="" defined(outSAMattrRGline)} ${sep=" , " outSAMattrRGline} }