diff --git a/biopet.wdl b/biopet.wdl index 545d6922f3f0391474b04943c3ccde713ab9ce92..9b9722cfa0de8b17c4ab51bc60d9f8d6838ebb3d 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -12,8 +12,8 @@ task BaseCounter { Float? memory Float? memoryMultiplier - Int mem = ceil(select_first([memory, 12.0])) + command { set -e -o pipefail mkdir -p ${outputDir} @@ -67,6 +67,53 @@ task BaseCounter { } } +task extractAdaptersFastqc { + File? toolJar + File inputFile + String outputDir + String? adapterOutputFilePath = outputDir + "/adapter.list" + String? contamsOutputFilePath = outputDir + "/contaminations.list" + Boolean? skipContams + File? knownContamFile + File? knownAdapterFile + Float? adapterCutoff + Boolean? outputAsFasta + + Float? memory + Float? memoryMultiplier + Int mem = ceil(select_first([memory, 4.0])) + + String toolCommand = if defined(toolJar) + then "java -Xmx" + mem + "G -jar " +toolJar + else "biopet-extractadaptersfastqc -Xmx" + mem + "G" + + command { + set -e + mkdir -p ${outputDir} + ${toolCommand} \ + --inputFile ${inputFile} \ + ${"--adapterOutputFile " + adapterOutputFilePath } \ + ${"--contamsOutputFile " + contamsOutputFilePath } \ + ${"--knownContamFile " + knownContamFile} \ + ${"--knownAdapterFile " + knownAdapterFile} \ + ${"--adapterCutoff " + adapterCutoff} \ + ${true="--skipContams" false="" skipContams} \ + ${true="--outputAsFasta" false="" outputAsFasta} + } + + output { + File adapterOutputFile = select_first([adapterOutputFilePath]) + File contamsOutputFile = select_first([contamsOutputFilePath]) + Array[String] adapterList = read_lines(select_first([adapterOutputFilePath])) + Array[String] contamsList = read_lines(select_first([contamsOutputFilePath])) + } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 2.5])) + } +} + + task FastqSplitter { String? preCommand File inputFastq @@ -99,12 +146,13 @@ task FastqSync { File in2 String out1path String out2path - File tool_jar + File toolJar + command { set -e -o pipefail ${preCommand} mkdir -p $(dirname ${out1path}) $(dirname ${out2path}) - java -jar ${tool_jar} \ + java -jar ${toolJar} \ --in1 ${in1} \ --in2 ${in2} \ --ref1 ${ref1} \ @@ -112,6 +160,7 @@ task FastqSync { --out1 ${out1path} \ --out2 ${out2path} } + output { File out1 = out1path File out2 = out2path @@ -120,7 +169,7 @@ task FastqSync { task SampleConfig { String? preCommand - String tool_jar + File toolJar Array[File]+ inputFiles String keyFilePath String? sample @@ -131,13 +180,13 @@ task SampleConfig { Float? memory Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + command { set -e -o pipefail ${preCommand} mkdir -p . ${"$(dirname " + jsonOutputPath + ")"} ${"$(dirname " + tsvOutputPath + ")"} - java -Xmx${mem}G -jar ${tool_jar} \ + java -Xmx${mem}G -jar ${toolJar} \ -i ${sep="-i " inputFiles} \ ${"--sample " + sample} \ ${"--library " + library} \ @@ -160,23 +209,23 @@ task SampleConfig { task ScatterRegions { String? preCommand - File ref_fasta - File ref_dict + File refFasta + File refDict String outputDirPath - String tool_jar + File toolJar Int? scatterSize File? regions Float? memory Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) + command { set -e -o pipefail ${preCommand} mkdir -p ${outputDirPath} - java -Xmx${mem}G -jar ${tool_jar} \ - -R ${ref_fasta} \ + java -Xmx${mem}G -jar ${toolJar} \ + -R ${refFasta} \ -o ${outputDirPath} \ ${"-s " + scatterSize} \ ${"-L " + regions} diff --git a/centrifuge.wdl b/centrifuge.wdl index 09d2ee79b45c7b4bcd284a35cf0b5b3c1154df04..e950284b5076276bfc6f189ecc2819be1096e665 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -1,8 +1,7 @@ # Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 # # Tasks from centrifuge -task build { - +task Build { File conversionTable File taxonomyTree File inputFasta @@ -22,9 +21,10 @@ task build { File? nameTable File? sizeTable Int? seed + Int? kmerCount + Int? threads Int? memory - Int? kmerCount command { set -e -o pipefail @@ -56,25 +56,28 @@ task build { } } -task classify { +task Classify { String outputDir Boolean? compressOutput = true String? preCommand String indexPrefix - File? unpairedReads - File read1 - File? read2 + Array[File]? unpairedReads + Array[File]+ read1 + Array[File]? read2 Boolean? fastaInput # Variables for handling output - String outputFileName = outputDir + "/centrifuge.out" - String reportFileName = outputDir + "/centrifuge_report.tsv" - String finalOutputName = if (compressOutput == true) then outputFileName + ".gz" else outputFileName - String? metFileName # If this is specified, the report file is empty + String outputFilePath = outputDir + "/centrifuge.out" + String reportFilePath = outputDir + "/centrifuge_report.tsv" + String finalOutputPath = if (compressOutput == true) + then outputFilePath + ".gz" + else outputFilePath + String? metFilePath # If this is specified, the report file is empty Int? assignments Int? minHitLen Int? minTotalLen Array[String]? hostTaxIds Array[String]? excludeTaxIds + Int? threads Int? memory @@ -86,22 +89,22 @@ task classify { ${"-p " + threads} \ ${"-x " + indexPrefix} \ ${true="-f" false="" fastaInput} \ - ${true="-k " false="" defined(assignments)} ${assignments} \ - ${true="-1 " false="-U " defined(read2)} ${read1} \ - ${"-2 " + read2} \ - ${"-U " + unpairedReads} \ - ${"--report-file " + reportFileName} \ + ${true="-k" false="" defined(assignments)} ${assignments} \ + ${true="-1" false="-U" defined(read2)} ${sep=',' read1} \ + ${true="-2" false="" defined(read2)} ${sep=',' read2} \ + ${true="-U" false="" defined(unpairedReads)} ${sep=',' unpairedReads} \ + ${"--report-file " + reportFilePath} \ ${"--min-hitlen " + minHitLen} \ ${"--min-totallen " + minTotalLen} \ - ${"--met-file " + metFileName} \ + ${"--met-file " + metFilePath} \ ${true="--host-taxids " false="" defined(hostTaxIds)} ${sep=',' hostTaxIds} \ ${true="--exclude-taxids " false="" defined(excludeTaxIds)} ${sep=',' excludeTaxIds} \ - ${true="| gzip -c >" false="-S" compressOutput} ${finalOutputName} + ${true="| gzip -c >" false="-S" compressOutput} ${finalOutputPath} } output { - File classifiedReads = finalOutputName - File reportFile = reportFileName + File classifiedReads = finalOutputPath + File reportFile = reportFilePath } runtime { @@ -110,7 +113,7 @@ task classify { } } -task download { +task Download { String libraryPath Array[String]? domain String? executable = "centrifuge-download" @@ -153,10 +156,11 @@ task download { } } -task downloadTaxonomy { +task DownloadTaxonomy { String centrifugeTaxonomyDir String? executable = "centrifuge-download" String? preCommand + command { set -e -o pipefail ${preCommand} @@ -164,23 +168,28 @@ task downloadTaxonomy { -o ${centrifugeTaxonomyDir} \ taxonomy } + output { File taxonomyTree = centrifugeTaxonomyDir + "/nodes.dmp" File nameTable = centrifugeTaxonomyDir + "/names.dmp" } } -task kreport { +task Kreport { String? preCommand File centrifugeOut Boolean inputIsCompressed - String kreportFileName=sub(centrifugeOut, "\\.out$|\\.out\\.gz$", "\\.kreport") + String outputDir + String? suffix = "kreport" + String? prefix = "centrifuge" + String kreportFilePath = outputDir + "/" + prefix + "." + suffix String indexPrefix Boolean? onlyUnique Boolean? showZeros Boolean? isCountTable Int? minScore Int? minLength + Int? cores Int? memory @@ -196,11 +205,11 @@ task kreport { ${"--min-length " + minLength} \ ${true="<(zcat" false="" inputIsCompressed} ${centrifugeOut}\ ${true=")" false="" inputIsCompressed} \ - > ${kreportFileName} + > ${kreportFilePath} } output { - File kreport = kreportFileName + File kreport = kreportFilePath } runtime { diff --git a/fastqc.wdl b/fastqc.wdl index f0fd2fec0371432f054aecd9624dc9bda628f292..d9135f73197bf1e985f5a096c45f449f37e42d94 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -55,48 +55,6 @@ task fastqc { } } -task extractAdapters { - File extractAdaptersFastqcJar - File inputFile - String outputDir - String? adapterOutputFilePath = outputDir + "/adapter.list" - String? contamsOutputFilePath = outputDir + "/contaminations.list" - Boolean? skipContams - File? knownContamFile - File? knownAdapterFile - Float? adapterCutoff - Boolean? outputAsFasta - - Float? memory - Float? memoryMultiplier - - Int mem = ceil(select_first([memory, 4.0])) - command { - set -e - mkdir -p ${outputDir} - java -Xmx${mem}G -jar ${extractAdaptersFastqcJar} \ - --inputFile ${inputFile} \ - ${"--adapterOutputFile " + adapterOutputFilePath } \ - ${"--contamsOutputFile " + contamsOutputFilePath } \ - ${"--knownContamFile " + knownContamFile} \ - ${"--knownAdapterFile " + knownAdapterFile} \ - ${"--adapterCutoff " + adapterCutoff} \ - ${true="--skipContams" false="" skipContams} \ - ${true="--outputAsFasta" false="" outputAsFasta} - } - - output { - File adapterOutputFile = select_first([adapterOutputFilePath]) - File contamsOutputFile = select_first([contamsOutputFilePath]) - Array[String] adapterList = read_lines(select_first([adapterOutputFilePath])) - Array[String] contamsList = read_lines(select_first([contamsOutputFilePath])) - } - - runtime { - memory: ceil(mem * select_first([memoryMultiplier, 2.5])) - } -} - task getConfiguration { String? preCommand String? fastqcDirFile = "fastqcDir.txt"