Skip to content
Snippets Groups Projects
centrifuge.wdl 6.75 KiB
version 1.0
# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018
#
# Tasks from centrifuge
task Build {
    input {
        File conversionTable
        File taxonomyTree
        File inputFasta
        String centrifugeIndexBase
        String? preCommand
        String? centrifugeBuildExecutable = "centrifuge-build"
        #Boolean? c = false
        Boolean? largeIndex = false
        Boolean? noAuto = false
        Int? bMax
        Int? bMaxDivn
        Boolean? noDiffCover = false
        Boolean? noRef = false
        Boolean? justRef = false
        Int? offRate
        Int? fTabChars
        File? nameTable
        File? sizeTable
        Int? seed
        Int? kmerCount

        Int? threads
        Int? memory
    }
    command {
        set -e -o pipefail
        ~{preCommand}
        ~{"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
        ~{centrifugeBuildExecutable} \
        ~{true='--large-index' false='' largeIndex} \
        ~{true='--noauto' false='' noAuto} \
        ~{'--bmax ' + bMax} \
        ~{'--bmaxdivn ' + bMaxDivn} \
        ~{true='--nodc' false='' noDiffCover} \
        ~{true='--noref' false='' noRef} \
        ~{true='--justref' false='' justRef} \
        ~{'--offrate ' + offRate} \
        ~{'--ftabchars ' + fTabChars} \
        ~{'--name-table ' + nameTable } \
        ~{'--size-table ' + sizeTable} \
        ~{'--seed ' + seed} \
        ~{'--kmer-count' + kmerCount} \
        ~{'--threads ' + threads} \
        --conversion-table ~{conversionTable} \
        --taxonomy-tree ~{taxonomyTree} \
        ~{inputFasta} \
        ~{centrifugeIndexBase}
    }
    runtime {
        cpu: select_first([threads, 8])
        memory: select_first([memory, 20])
    }
}

task Classify {
    input {
        String outputDir
        Boolean? compressOutput = true
        String? preCommand
        String indexPrefix
        Array[File]? unpairedReads
        Array[File]+ read1
        Array[File]? read2
        Boolean? fastaInput
        # Variables for handling output

        String? metFilePath # If this is specified, the report file is empty
        Int? assignments
        Int? minHitLen
        Int? minTotalLen
        Array[String]? hostTaxIds
        Array[String]? excludeTaxIds

        Int? threads
        Int? memory
    }

    String outputFilePath = outputDir + "/centrifuge.out"
    String reportFilePath = outputDir + "/centrifuge_report.tsv"
    String finalOutputPath = if (compressOutput == true)
            then outputFilePath + ".gz"
            else outputFilePath

    command {
        set -e -o pipefail
        mkdir -p ~{outputDir}
        ~{preCommand}
        centrifuge \
        ~{"-p " + select_first([threads, 4])} \
        ~{"-x " + indexPrefix} \
        ~{true="-f" false="" fastaInput} \
        ~{true="-k" false="" defined(assignments)} ~{assignments} \
        ~{true="-1" false="-U" defined(read2)} ~{sep=',' read1} \
        ~{true="-2" false="" defined(read2)} ~{sep=',' read2} \
        ~{true="-U" false="" defined(unpairedReads)} ~{sep=',' unpairedReads} \
        ~{"--report-file " + reportFilePath} \
        ~{"--min-hitlen " + minHitLen} \
        ~{"--min-totallen " + minTotalLen} \
        ~{"--met-file " + metFilePath} \
        ~{true="--host-taxids " false="" defined(hostTaxIds)} ~{sep=',' hostTaxIds} \
        ~{true="--exclude-taxids " false="" defined(excludeTaxIds)} ~{sep=',' excludeTaxIds} \
        ~{true="| gzip -c >" false="-S" compressOutput} ~{finalOutputPath}
    }

    output {
        File classifiedReads = finalOutputPath
        File reportFile = reportFilePath
    }

    runtime {
        cpu: select_first([threads, 4])
        memory: select_first([memory, 8])
    }
}

task Download {
        input {
        String libraryPath
        Array[String]? domain
        String? executable = "centrifuge-download"
        String? preCommand
        String? seqTaxMapPath
        String? database = "refseq"
        String? assemblyLevel
        String? refseqCategory
        Array[String]? taxIds
        Boolean? filterUnplaced = false
        Boolean? maskLowComplexRegions = false
        Boolean? downloadRnaSeqs = false
        Boolean? modifyHeader = false
        Boolean? downloadGiMap = false
    }
    # This will use centrifuge-download to download.
    # The bash statement at the beginning is to make sure
    # the directory for the SeqTaxMapPath exists.
    command {
        set -e -o pipefail
        ~{preCommand}
        ~{"mkdir -p $(dirname " + seqTaxMapPath + ")"}
        ~{executable} \
        -o ~{libraryPath} \
        ~{true='-d ' false='' defined(domain)}~{sep=','  domain} \
        ~{'-a "' + assemblyLevel + '"'} \
        ~{"-c " + refseqCategory} \
        ~{true='-t' false='' defined(taxIds)} '~{sep=',' taxIds}' \
        ~{true='-r' false='' downloadRnaSeqs} \
        ~{true='-u' false='' filterUnplaced} \
        ~{true='-m' false='' maskLowComplexRegions} \
        ~{true='-l' false='' modifyHeader} \
        ~{true='-g' false='' downloadGiMap} \
        ~{database} ~{">> " + seqTaxMapPath}
    }
    output {
        File seqTaxMap = "~{seqTaxMapPath}"
        File library = libraryPath
        Array[File] fastaFiles = glob(libraryPath + "/*/*.fna")
    }
 }

task DownloadTaxonomy {
    input {
        String centrifugeTaxonomyDir
        String? executable = "centrifuge-download"
        String? preCommand
    }
    command {
        set -e -o pipefail
        ~{preCommand}
        ~{executable} \
        -o ~{centrifugeTaxonomyDir} \
        taxonomy
    }

    output {
        File taxonomyTree = centrifugeTaxonomyDir + "/nodes.dmp"
        File nameTable = centrifugeTaxonomyDir + "/names.dmp"
    }
 }

task Kreport {
    input {
        String? preCommand
        File centrifugeOut
        Boolean inputIsCompressed
        String outputDir
        String? suffix = "kreport"
        String? prefix = "centrifuge"
        String indexPrefix
        Boolean? onlyUnique
        Boolean? showZeros
        Boolean? isCountTable
        Int? minScore
        Int? minLength

        Int? cores
        Int? memory
    }
    String kreportFilePath = outputDir + "/" + prefix + "." + suffix
    command {
        set -e -o pipefail
        ~{preCommand}
        centrifuge-kreport \
        -x ~{indexPrefix} \
        ~{true="--only-unique" false="" onlyUnique} \
        ~{true="--show-zeros" false="" showZeros} \
        ~{true="--is-count-table" false="" isCountTable} \
        ~{"--min-score " + minScore} \
        ~{"--min-length " + minLength} \
        ~{true="<(zcat" false="" inputIsCompressed} ~{centrifugeOut}\
        ~{true=")" false="" inputIsCompressed} \
        > ~{kreportFilePath}
    }

    output {
        File kreport = kreportFilePath
    }

    runtime {
        cpu: select_first([cores, 1])
        memory: select_first([memory, 4])
    }
}