Skip to content
Snippets Groups Projects
Unverified Commit 605c278c authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub
Browse files

Merge pull request #13 from biowdl/centrifuge-classify

Centrifuge classify and kreport tasks
parents aebf426f bee92002
No related branches found
No related tags found
No related merge requests found
# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2017
# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018
#
# Tasks from centrifuge
task build {
File conversionTable
File taxonomyTree
File inputFasta
String centrifugeIndexBase
String? preCommand
String? centrifugeBuildExecutable = "centrifuge-build"
#Boolean? c = false
Boolean? largeIndex = false
Boolean? noAuto = false
Int? bMax
Int? bMaxDivn
Boolean? noDiffCover = false
Boolean? noRef = false
Boolean? justRef = false
Int? offRate
Int? fTabChars
File? nameTable
File? sizeTable
Int? seed
Int? threads
Int? memory
Int? kmerCount
command {
set -e -o pipefail
${preCommand}
${"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
${centrifugeBuildExecutable} \
${true='--large-index' false='' largeIndex} \
${true='--noauto' false='' noAuto} \
${'--bmax ' + bMax} \
${'--bmaxdivn ' + bMaxDivn} \
${true='--nodc' false='' noDiffCover} \
${true='--noref' false='' noRef} \
${true='--justref' false='' justRef} \
${'--offrate ' + offRate} \
${'--ftabchars ' + fTabChars} \
${'--name-table ' + nameTable } \
${'--size-table ' + sizeTable} \
${'--seed ' + seed} \
${'--kmer-count' + kmerCount} \
${'--threads ' + threads} \
--conversion-table ${conversionTable} \
--taxonomy-tree ${taxonomyTree} \
${inputFasta} \
${centrifugeIndexBase}
}
runtime {
cpu: select_first([threads, 8])
memory: select_first([memory, 20])
}
}
task classify {
String outputDir
Boolean? compressOutput = true
String? preCommand
String indexPrefix
File? unpairedReads
File read1
File? read2
Boolean? fastaInput
# Variables for handling output
String outputFileName = outputDir + "/centrifuge.out"
String reportFileName = outputDir + "/centrifuge_report.tsv"
String finalOutputName = if (compressOutput == true) then outputFileName + ".gz" else outputFileName
String? metFileName # If this is specified, the report file is empty
Int? assignments
Int? minHitLen
Int? minTotalLen
Array[String]? hostTaxIds
Array[String]? excludeTaxIds
Int? threads
Int? memory
command {
set -e -o pipefail
mkdir -p ${outputDir}
${preCommand}
centrifuge \
${"-p " + threads} \
${"-x " + indexPrefix} \
${true="-f" false="" fastaInput} \
${true="-k " false="" defined(assignments)} ${assignments} \
${true="-1 " false="-U " defined(read2)} ${read1} \
${"-2 " + read2} \
${"-U " + unpairedReads} \
${"--report-file " + reportFileName} \
${"--min-hitlen " + minHitLen} \
${"--min-totallen " + minTotalLen} \
${"--met-file " + metFileName} \
${true="--host-taxids " false="" defined(hostTaxIds)} ${sep=',' hostTaxIds} \
${true="--exclude-taxids " false="" defined(excludeTaxIds)} ${sep=',' excludeTaxIds} \
${true="| gzip -c >" false="-S" compressOutput} ${finalOutputName}
}
output {
File classifiedReads = finalOutputName
File reportFile = reportFileName
}
runtime {
cpu: select_first([threads, 1])
memory: select_first([memory, 4])
}
}
task download {
String libraryPath
......@@ -62,55 +170,41 @@ task downloadTaxonomy {
}
}
task build {
File conversionTable
File taxonomyTree
File inputFasta
String centrifugeIndexBase
task kreport {
String? preCommand
String? centrifugeBuildExecutable = "centrifuge-build"
#Boolean? c = false
Boolean? largeIndex = false
Boolean? noAuto = false
Int? bMax
Int? bMaxDivn
Boolean? noDiffCover = false
Boolean? noRef = false
Boolean? justRef = false
Int? offRate
Int? fTabChars
File? nameTable
File? sizeTable
Int? seed
Int? threads = 1
Int? kmerCount
File centrifugeOut
Boolean inputIsCompressed
String kreportFileName=sub(centrifugeOut, "\\.out$|\\.out\\.gz$", "\\.kreport")
String indexPrefix
Boolean? onlyUnique
Boolean? showZeros
Boolean? isCountTable
Int? minScore
Int? minLength
Int? cores
Int? memory
command {
set -e -o pipefail
${preCommand}
${"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
${centrifugeBuildExecutable} \
${true='--large-index' false='' largeIndex} \
${true='--noauto' false='' noAuto} \
${'--bmax ' + bMax} \
${'--bmaxdivn ' + bMaxDivn} \
${true='--nodc' false='' noDiffCover} \
${true='--noref' false='' noRef} \
${true='--justref' false='' justRef} \
${'--offrate ' + offRate} \
${'--ftabchars ' + fTabChars} \
${'--name-table ' + nameTable } \
${'--size-table ' + sizeTable} \
${'--seed ' + seed} \
${'--kmer-count' + kmerCount} \
${'--threads ' + threads} \
--conversion-table ${conversionTable} \
--taxonomy-tree ${taxonomyTree} \
${inputFasta} \
${centrifugeIndexBase}
centrifuge-kreport \
-x ${indexPrefix} \
${true="--only-unique" false="" onlyUnique} \
${true="--show-zeros" false="" showZeros} \
${true="--is-count-table" false="" isCountTable} \
${"--min-score " + minScore} \
${"--min-length " + minLength} \
${true="<(zcat" false="" inputIsCompressed} ${centrifugeOut}\
${true=")" false="" inputIsCompressed} \
> ${kreportFileName}
}
output {
File kreport = kreportFileName
}
runtime {
cpu: select_first([threads])
cpu: select_first([cores, 1])
memory: select_first([memory, 4])
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment