Skip to content
Snippets Groups Projects
centrifuge.wdl 4.71 KiB
Newer Older
Ruben Vorderman's avatar
Ruben Vorderman committed
# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2017
#
# Tasks from centrifuge
task build {

    File conversionTable
    File taxonomyTree
    File inputFasta
    String centrifugeIndexBase
    String? preCommand
    String? centrifugeBuildExecutable = "centrifuge-build"
    #Boolean? c = false
    Boolean? largeIndex = false
    Boolean? noAuto = false
    Int? bMax
    Int? bMaxDivn
    Boolean? noDiffCover = false
    Boolean? noRef = false
    Boolean? justRef = false
    Int? offRate
    Int? fTabChars
    File? nameTable
    File? sizeTable
    Int? seed
    Int? threads = 1
    Int? kmerCount

    command {
        set -e -o pipefail
        ${preCommand}
        ${"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
        ${centrifugeBuildExecutable} \
        ${true='--large-index' false='' largeIndex} \
        ${true='--noauto' false='' noAuto} \
        ${'--bmax ' + bMax} \
        ${'--bmaxdivn ' + bMaxDivn} \
        ${true='--nodc' false='' noDiffCover} \
        ${true='--noref' false='' noRef} \
        ${true='--justref' false='' justRef} \
        ${'--offrate ' + offRate} \
        ${'--ftabchars ' + fTabChars} \
        ${'--name-table ' + nameTable } \
        ${'--size-table ' + sizeTable} \
        ${'--seed ' + seed} \
        ${'--kmer-count' + kmerCount} \
        ${'--threads ' + threads} \
        --conversion-table ${conversionTable} \
        --taxonomy-tree ${taxonomyTree} \
        ${inputFasta} \
        ${centrifugeIndexBase}
    }
    runtime {
        cpu: select_first([threads])
    }
}

task classify {
    String outputDir
    String? preCommand
    String indexPrefix
    File? unpairedReads
    File read1
    File? read2
    Boolean? fastaInput
    String? outputFile = outputDir + "/centrifuge.out"
    String? reportFile = outputDir + "/centrifuge_report.tsv"
    Int? assignments
    Int? minHitLen
    Int? minTotalLen
    Array[String]? hostTaxIds
    Array[String]? excludeTaxIds
    Int? threads
    Int? memory

    command {
    set -e -o pipefail
    mkdir -p ${outputDir}
    ${preCommand}
    centrifuge \
    ${"-p " + threads} \
    ${"-x " + indexPrefix} \
    ${true="-f" false="" fastaInput} \
    ${true="-k " false="" defined(assignments)} ${assignments} \
    ${true="-1 " false="-U " defined(read2)} ${read1} \
    ${"-2 " + read2} \
    ${"-U " + unpairedReads} \
    ${"--report-file " + reportFile} \
    ${"--min-hitlen " + minHitLen} \
    ${"--min-totallen " + minTotalLen} \
    ${true="--host-taxids " false="" defined(hostTaxIds)} ${sep=',' hostTaxIds} \
    ${true="--exclude-taxids " false="" defined(excludeTaxIds)} ${sep=',' excludeTaxIds} \
    ${"-S " + outputFile}
    }

    output {
        File outFile = select_first([outputFile])
        File reportFile = select_first([reportFile])
    }

    runtime {
        cpu: select_first([threads, 1])
        memory: select_first([memory, 4])
    }
}
Ruben Vorderman's avatar
Ruben Vorderman committed

task download {
    String libraryPath
    Array[String]? domain
    String? executable = "centrifuge-download"
Ruben Vorderman's avatar
Ruben Vorderman committed
    String? preCommand
Ruben Vorderman's avatar
Ruben Vorderman committed
    String? seqTaxMapPath
    String? database = "refseq"
    String? assemblyLevel
    String? refseqCategory
    Array[String]? taxIds
    Boolean? filterUnplaced = false
    Boolean? maskLowComplexRegions = false
    Boolean? downloadRnaSeqs = false
    Boolean? modifyHeader = false
    Boolean? downloadGiMap = false

    # This will use centrifuge-download to download.
    # The bash statement at the beginning is to make sure
    # the directory for the SeqTaxMapPath exists.
    command {
Ruben Vorderman's avatar
Ruben Vorderman committed
        set -e -o pipefail
        ${preCommand}
        ${"mkdir -p $(dirname " + seqTaxMapPath + ")"}
Ruben Vorderman's avatar
Ruben Vorderman committed
        ${executable} \
        -o ${libraryPath} \
        ${true='-d ' false='' defined(domain)}${sep=','  domain} \
        ${'-a "' + assemblyLevel + '"'} \
        ${"-c " + refseqCategory} \
        ${true='-t' false='' defined(taxIds)} '${sep=',' taxIds}' \
        ${true='-r' false='' downloadRnaSeqs} \
        ${true='-u' false='' filterUnplaced} \
        ${true='-m' false='' maskLowComplexRegions} \
        ${true='-l' false='' modifyHeader} \
        ${true='-g' false='' downloadGiMap} \
        ${database} ${">> " + seqTaxMapPath}
    }
    output {
        File seqTaxMap = "${seqTaxMapPath}"
        File library = libraryPath
        Array[File] fastaFiles = glob(libraryPath + "/*/*.fna")
    }
 }

task downloadTaxonomy {
    String centrifugeTaxonomyDir
    String? executable = "centrifuge-download"
Ruben Vorderman's avatar
Ruben Vorderman committed
    String? preCommand
Ruben Vorderman's avatar
Ruben Vorderman committed
    command {
Ruben Vorderman's avatar
Ruben Vorderman committed
        set -e -o pipefail
        ${preCommand}
Ruben Vorderman's avatar
Ruben Vorderman committed
        ${executable} \
        -o ${centrifugeTaxonomyDir} \
        taxonomy
    }
    output {
        File taxonomyTree = centrifugeTaxonomyDir + "/nodes.dmp"
        File nameTable = centrifugeTaxonomyDir + "/names.dmp"
    }
 }