Merge branch 'master' into SASC-741

34bda02c · Peter van 't Hof · GitHub · 8ffea620 · 605c278c · 34bda02c
Unverified Commit 34bda02c authored 6 years ago by Peter van 't Hof Committed by GitHub 6 years ago
--- a/biopet.wdl
+++ b/biopet.wdl
@@ -145,10 +145,9 @@ task SampleConfig {
    }

    output {
-        Array[String] keys = read_lines(stdout())
+        File keysFile = stdout()
        File? jsonOutput = jsonOutputPath
        File? tsvOutput = tsvOutputPath
-        Object values = if (defined(tsvOutput) && size(tsvOutput) > 0) then read_map(tsvOutput) else { "": "" }
    }

    runtime {

--- a/centrifuge.wdl
+++ b/centrifuge.wdl
-# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2017
+# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018
 #
 # Tasks from centrifuge
+task build {
+
+    File conversionTable
+    File taxonomyTree
+    File inputFasta
+    String centrifugeIndexBase
+    String? preCommand
+    String? centrifugeBuildExecutable = "centrifuge-build"
+    #Boolean? c = false
+    Boolean? largeIndex = false
+    Boolean? noAuto = false
+    Int? bMax
+    Int? bMaxDivn
+    Boolean? noDiffCover = false
+    Boolean? noRef = false
+    Boolean? justRef = false
+    Int? offRate
+    Int? fTabChars
+    File? nameTable
+    File? sizeTable
+    Int? seed
+    Int? threads
+    Int? memory
+    Int? kmerCount
+
+    command {
+        set -e -o pipefail
+        ${preCommand}
+        ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
+        ${centrifugeBuildExecutable} \
+        ${true='--large-index' false='' largeIndex} \
+        ${true='--noauto' false='' noAuto} \
+        ${'--bmax ' + bMax} \
+        ${'--bmaxdivn ' + bMaxDivn} \
+        ${true='--nodc' false='' noDiffCover} \
+        ${true='--noref' false='' noRef} \
+        ${true='--justref' false='' justRef} \
+        ${'--offrate ' + offRate} \
+        ${'--ftabchars ' + fTabChars} \
+        ${'--name-table ' + nameTable } \
+        ${'--size-table ' + sizeTable} \
+        ${'--seed ' + seed} \
+        ${'--kmer-count' + kmerCount} \
+        ${'--threads ' + threads} \
+        --conversion-table ${conversionTable} \
+        --taxonomy-tree ${taxonomyTree} \
+        ${inputFasta} \
+        ${centrifugeIndexBase}
+    }
+    runtime {
+        cpu: select_first([threads, 8])
+        memory: select_first([memory, 20])
+    }
+}
+
+task classify {
+    String outputDir
+    Boolean? compressOutput = true
+    String? preCommand
+    String indexPrefix
+    File? unpairedReads
+    File read1
+    File? read2
+    Boolean? fastaInput
+    # Variables for handling output
+    String outputFileName = outputDir + "/centrifuge.out"
+    String reportFileName = outputDir + "/centrifuge_report.tsv"
+    String finalOutputName = if (compressOutput == true) then outputFileName + ".gz" else outputFileName
+    String? metFileName # If this is specified, the report file is empty
+    Int? assignments
+    Int? minHitLen
+    Int? minTotalLen
+    Array[String]? hostTaxIds
+    Array[String]? excludeTaxIds
+    Int? threads
+    Int? memory
+
+    command {
+        set -e -o pipefail
+        mkdir -p ${outputDir}
+        ${preCommand}
+        centrifuge \
+        ${"-p " + threads} \
+        ${"-x " + indexPrefix} \
+        ${true="-f" false="" fastaInput} \
+        ${true="-k " false="" defined(assignments)} ${assignments} \
+        ${true="-1 " false="-U " defined(read2)} ${read1} \
+        ${"-2 " + read2} \
+        ${"-U " + unpairedReads} \
+        ${"--report-file " + reportFileName} \
+        ${"--min-hitlen " + minHitLen} \
+        ${"--min-totallen " + minTotalLen} \
+        ${"--met-file " + metFileName} \
+        ${true="--host-taxids " false="" defined(hostTaxIds)} ${sep=',' hostTaxIds} \
+        ${true="--exclude-taxids " false="" defined(excludeTaxIds)} ${sep=',' excludeTaxIds} \
+        ${true="| gzip -c >" false="-S" compressOutput} ${finalOutputName}
+    }
+
+    output {
+        File classifiedReads = finalOutputName
+        File reportFile = reportFileName
+    }
+
+    runtime {
+        cpu: select_first([threads, 1])
+        memory: select_first([memory, 4])
+    }
+}

 task download {
    String libraryPath
@@ -62,55 +170,41 @@ task downloadTaxonomy {
    }
 }

-task build {
-
-    File conversionTable
-    File taxonomyTree
-    File inputFasta
-    String centrifugeIndexBase
+task kreport {
    String? preCommand
-    String? centrifugeBuildExecutable = "centrifuge-build"
-    #Boolean? c = false
-    Boolean? largeIndex = false
-    Boolean? noAuto = false
-    Int? bMax
-    Int? bMaxDivn
-    Boolean? noDiffCover = false
-    Boolean? noRef = false
-    Boolean? justRef = false
-    Int? offRate
-    Int? fTabChars
-    File? nameTable
-    File? sizeTable
-    Int? seed
-    Int? threads = 1
-    Int? kmerCount
+    File centrifugeOut
+    Boolean inputIsCompressed
+    String kreportFileName=sub(centrifugeOut, "\\.out$|\\.out\\.gz$", "\\.kreport")
+    String indexPrefix
+    Boolean? onlyUnique
+    Boolean? showZeros
+    Boolean? isCountTable
+    Int? minScore
+    Int? minLength
+    Int? cores
+    Int? memory

    command {
        set -e -o pipefail
        ${preCommand}
-        ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
-        ${centrifugeBuildExecutable} \
-        ${true='--large-index' false='' largeIndex} \
-        ${true='--noauto' false='' noAuto} \
-        ${'--bmax ' + bMax} \
-        ${'--bmaxdivn ' + bMaxDivn} \
-        ${true='--nodc' false='' noDiffCover} \
-        ${true='--noref' false='' noRef} \
-        ${true='--justref' false='' justRef} \
-        ${'--offrate ' + offRate} \
-        ${'--ftabchars ' + fTabChars} \
-        ${'--name-table ' + nameTable } \
-        ${'--size-table ' + sizeTable} \
-        ${'--seed ' + seed} \
-        ${'--kmer-count' + kmerCount} \
-        ${'--threads ' + threads} \
-        --conversion-table ${conversionTable} \
-        --taxonomy-tree ${taxonomyTree} \
-        ${inputFasta} \
-        ${centrifugeIndexBase}
+        centrifuge-kreport \
+        -x ${indexPrefix} \
+        ${true="--only-unique" false="" onlyUnique} \
+        ${true="--show-zeros" false="" showZeros} \
+        ${true="--is-count-table" false="" isCountTable} \
+        ${"--min-score " + minScore} \
+        ${"--min-length " + minLength} \
+        ${true="<(zcat" false="" inputIsCompressed} ${centrifugeOut}\
+        ${true=")" false="" inputIsCompressed} \
+        > ${kreportFileName}
+    }
+
+    output {
+        File kreport = kreportFileName
    }
+
    runtime {
-        cpu: select_first([threads])
+        cpu: select_first([cores, 1])
+        memory: select_first([memory, 4])
    }
 }
--- a/flash.wdl
+++ b/flash.wdl
+task flash {
+    String? preCommand
+    File inputR1
+    File inputR2
+    String outdirPath
+    String? outPrefix = "flash"
+    Int? minOverlap
+    Int? maxOverlap
+    Boolean? compress = true
+    Int? threads
+    Int? memory
+
+    command {
+        set -e -o pipefail
+        mkdir -p ${outdirPath}
+        ${preCommand}
+        flash \
+        ${"--threads=" + threads} \
+        ${"--output-directory=" + outdirPath} \
+        ${"--output-prefix=" + outPrefix} \
+        ${true="--compress " false="" defined(compress)} \
+        ${"--min-overlap=" + minOverlap} \
+        ${"--max-overlap=" + maxOverlap} \
+        ${inputR1} ${inputR2}
+    }
+
+    output {
+        File extendedFrags = outdirPath + "/" + outPrefix + ".extendedFrags.fastq.gz"
+        File notCombined1 = outdirPath + "/" + outPrefix + ".notCombined_1.fastq.gz"
+        File notCombined2 = outdirPath + "/" + outPrefix + ".notCombined_2.fastq.gz"
+        File hist = outdirPath + "/" + outPrefix + ".hist"
+        File histogram = outdirPath + "/" + outPrefix + ".histogram"
+    }
+
+    runtime {
+        cpu: select_first([threads, 2])
+        memory: select_first([memory, 2])
+    }
+
+}
\ No newline at end of file
--- a/gatk.wdl
+++ b/gatk.wdl
@@ -253,6 +253,7 @@ task SplitNCigarReads {
    String? preCommand

    File input_bam
+    File input_bam_index
    File ref_fasta
    File ref_fasta_index
    File ref_dict
@@ -271,13 +272,13 @@ task SplitNCigarReads {
        SplitNCigarReads \
        -I ${input_bam} \
        -R ${ref_fasta} \
-        -O ${output_bam} # might have to be -o depending on GATK version \
+        -O ${output_bam} \
        -L ${sep=' -L ' intervals}
    }

    output {
        File bam = output_bam
-        File bam_index = output_bam + ".bai"
+        File bam_index = sub(output_bam, "\\.bam$", ".bai")
    }

    runtime {

--- a/star.wdl
+++ b/star.wdl
@@ -12,6 +12,7 @@ task Star {
    String? outStd
    String? twopassMode
    Array[String]? outSAMattrRGline
+    Int? limitBAMsortRAM

    Int? memory

@@ -34,6 +35,7 @@ task Star {
        ${"--runThreadN " + runThreadN} \
        ${"--outStd " + outStd} \
        ${"--twopassMode " + twopassMode} \
+        ${"--limitBAMsortRAM " + limitBAMsortRAM} \
        ${true="--outSAMattrRGline " false="" defined(outSAMattrRGline)} ${sep=" , " outSAMattrRGline}
    }