-
Ruben Vorderman authoredRuben Vorderman authored
centrifuge.wdl 6.75 KiB
version 1.0
# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018
#
# Tasks from centrifuge
task Build {
input {
File conversionTable
File taxonomyTree
File inputFasta
String centrifugeIndexBase
String? preCommand
String? centrifugeBuildExecutable = "centrifuge-build"
#Boolean? c = false
Boolean? largeIndex = false
Boolean? noAuto = false
Int? bMax
Int? bMaxDivn
Boolean? noDiffCover = false
Boolean? noRef = false
Boolean? justRef = false
Int? offRate
Int? fTabChars
File? nameTable
File? sizeTable
Int? seed
Int? kmerCount
Int? threads
Int? memory
}
command {
set -e -o pipefail
~{preCommand}
~{"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
~{centrifugeBuildExecutable} \
~{true='--large-index' false='' largeIndex} \
~{true='--noauto' false='' noAuto} \
~{'--bmax ' + bMax} \
~{'--bmaxdivn ' + bMaxDivn} \
~{true='--nodc' false='' noDiffCover} \
~{true='--noref' false='' noRef} \
~{true='--justref' false='' justRef} \
~{'--offrate ' + offRate} \
~{'--ftabchars ' + fTabChars} \
~{'--name-table ' + nameTable } \
~{'--size-table ' + sizeTable} \
~{'--seed ' + seed} \
~{'--kmer-count' + kmerCount} \
~{'--threads ' + threads} \
--conversion-table ~{conversionTable} \
--taxonomy-tree ~{taxonomyTree} \
~{inputFasta} \
~{centrifugeIndexBase}
}
runtime {
cpu: select_first([threads, 8])
memory: select_first([memory, 20])
}
}
task Classify {
input {
String outputDir
Boolean? compressOutput = true
String? preCommand
String indexPrefix
Array[File]? unpairedReads
Array[File]+ read1
Array[File]? read2
Boolean? fastaInput
# Variables for handling output
String? metFilePath # If this is specified, the report file is empty
Int? assignments
Int? minHitLen
Int? minTotalLen
Array[String]? hostTaxIds
Array[String]? excludeTaxIds
Int? threads
Int? memory
}
String outputFilePath = outputDir + "/centrifuge.out"
String reportFilePath = outputDir + "/centrifuge_report.tsv"
String finalOutputPath = if (compressOutput == true)
then outputFilePath + ".gz"
else outputFilePath
command {
set -e -o pipefail
mkdir -p ~{outputDir}
~{preCommand}
centrifuge \
~{"-p " + select_first([threads, 4])} \
~{"-x " + indexPrefix} \
~{true="-f" false="" fastaInput} \
~{true="-k" false="" defined(assignments)} ~{assignments} \
~{true="-1" false="-U" defined(read2)} ~{sep=',' read1} \
~{true="-2" false="" defined(read2)} ~{sep=',' read2} \
~{true="-U" false="" defined(unpairedReads)} ~{sep=',' unpairedReads} \
~{"--report-file " + reportFilePath} \
~{"--min-hitlen " + minHitLen} \
~{"--min-totallen " + minTotalLen} \
~{"--met-file " + metFilePath} \
~{true="--host-taxids " false="" defined(hostTaxIds)} ~{sep=',' hostTaxIds} \
~{true="--exclude-taxids " false="" defined(excludeTaxIds)} ~{sep=',' excludeTaxIds} \
~{true="| gzip -c >" false="-S" compressOutput} ~{finalOutputPath}
}
output {
File classifiedReads = finalOutputPath
File reportFile = reportFilePath
}
runtime {
cpu: select_first([threads, 4])
memory: select_first([memory, 8])
}
}
task Download {
input {
String libraryPath
Array[String]? domain
String? executable = "centrifuge-download"
String? preCommand
String? seqTaxMapPath
String? database = "refseq"
String? assemblyLevel
String? refseqCategory
Array[String]? taxIds
Boolean? filterUnplaced = false
Boolean? maskLowComplexRegions = false
Boolean? downloadRnaSeqs = false
Boolean? modifyHeader = false
Boolean? downloadGiMap = false
}
# This will use centrifuge-download to download.
# The bash statement at the beginning is to make sure
# the directory for the SeqTaxMapPath exists.
command {
set -e -o pipefail
~{preCommand}
~{"mkdir -p $(dirname " + seqTaxMapPath + ")"}
~{executable} \
-o ~{libraryPath} \
~{true='-d ' false='' defined(domain)}~{sep=',' domain} \
~{'-a "' + assemblyLevel + '"'} \
~{"-c " + refseqCategory} \
~{true='-t' false='' defined(taxIds)} '~{sep=',' taxIds}' \
~{true='-r' false='' downloadRnaSeqs} \
~{true='-u' false='' filterUnplaced} \
~{true='-m' false='' maskLowComplexRegions} \
~{true='-l' false='' modifyHeader} \
~{true='-g' false='' downloadGiMap} \
~{database} ~{">> " + seqTaxMapPath}
}
output {
File seqTaxMap = "~{seqTaxMapPath}"
File library = libraryPath
Array[File] fastaFiles = glob(libraryPath + "/*/*.fna")
}
}
task DownloadTaxonomy {
input {
String centrifugeTaxonomyDir
String? executable = "centrifuge-download"
String? preCommand
}
command {
set -e -o pipefail
~{preCommand}
~{executable} \
-o ~{centrifugeTaxonomyDir} \
taxonomy
}
output {
File taxonomyTree = centrifugeTaxonomyDir + "/nodes.dmp"
File nameTable = centrifugeTaxonomyDir + "/names.dmp"
}
}
task Kreport {
input {
String? preCommand
File centrifugeOut
Boolean inputIsCompressed
String outputDir
String? suffix = "kreport"
String? prefix = "centrifuge"
String indexPrefix
Boolean? onlyUnique
Boolean? showZeros
Boolean? isCountTable
Int? minScore
Int? minLength
Int? cores
Int? memory
}
String kreportFilePath = outputDir + "/" + prefix + "." + suffix
command {
set -e -o pipefail
~{preCommand}
centrifuge-kreport \
-x ~{indexPrefix} \
~{true="--only-unique" false="" onlyUnique} \
~{true="--show-zeros" false="" showZeros} \
~{true="--is-count-table" false="" isCountTable} \
~{"--min-score " + minScore} \
~{"--min-length " + minLength} \
~{true="<(zcat" false="" inputIsCompressed} ~{centrifugeOut}\
~{true=")" false="" inputIsCompressed} \
> ~{kreportFilePath}
}
output {
File kreport = kreportFilePath
}
runtime {
cpu: select_first([cores, 1])
memory: select_first([memory, 4])
}
}