Skip to content
Snippets Groups Projects
Unverified Commit f3b26fdb authored by Jasper Boom's avatar Jasper Boom Committed by GitHub
Browse files

Merge pull request #170 from biowdl/BIOWDL-380

BIOWDL-380: Update centrifuge task.
parents 7f1545f4 3d752965
No related branches found
No related tags found
No related merge requests found
......@@ -11,6 +11,7 @@ that users understand how the changes affect the new version.
version 2.2.0-dev
---------------------------
+ Update centrifuge tasks.
+ Removed unused "cores" inputs from transcriptclean tasks.
+ Removed unused "cores" inputs from talon tasks.
+ Removed unused "threads" input from ModifyStrelka.
......
version 1.0
# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018
# Copyright (c) 2018 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Tasks from centrifuge
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
task Build {
input {
Boolean disableDifferenceCover = false
File conversionTable
File taxonomyTree
File inputFasta
String centrifugeIndexBase
String? preCommand
String centrifugeBuildExecutable = "centrifuge-build"
#Boolean? c = false
Boolean largeIndex = false
Boolean noAuto = false
Int? bMax
Int? bMaxDivn
Boolean noDiffCover = false
Boolean noRef = false
Boolean justRef = false
Int? offRate
Int? fTabChars
File? nameTable
File? sizeTable
Int? seed
File nameTable
File referenceFile
String indexBasename = "centrifuge_index"
String outputPrefix
Int? offrate
Int? ftabChars
Int? kmerCount
File? sizeTable
Int threads = 8
Int threads = 5
String memory = "20G"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3"
}
command {
set -e -o pipefail
~{preCommand}
~{"mkdir -p $(dirname " + centrifugeIndexBase + ")"}
~{centrifugeBuildExecutable} \
~{true='--large-index' false='' largeIndex} \
~{true='--noauto' false='' noAuto} \
~{'--bmax ' + bMax} \
~{'--bmaxdivn ' + bMaxDivn} \
~{true='--nodc' false='' noDiffCover} \
~{true='--noref' false='' noRef} \
~{true='--justref' false='' justRef} \
~{'--offrate ' + offRate} \
~{'--ftabchars ' + fTabChars} \
~{'--name-table ' + nameTable } \
~{'--size-table ' + sizeTable} \
~{'--seed ' + seed} \
~{'--kmer-count' + kmerCount} \
~{'--threads ' + threads} \
--conversion-table ~{conversionTable} \
--taxonomy-tree ~{taxonomyTree} \
~{inputFasta} \
~{centrifugeIndexBase}
set -e
mkdir -p "$(dirname ~{outputPrefix})"
centrifuge-build \
~{"--threads " + threads} \
~{true="--nodc" false="" disableDifferenceCover} \
~{"--offrate " + offrate} \
~{"--ftabchars " + ftabChars} \
~{"--kmer-count " + kmerCount} \
~{"--size-table " + sizeTable} \
~{"--conversion-table " + conversionTable} \
~{"--taxonomy-tree " + taxonomyTree} \
~{"--name-table " + nameTable} \
~{referenceFile} \
~{outputPrefix + "/" + indexBasename}
}
output {
Array[File] outputIndex = glob(outputPrefix + "/" + indexBasename + "*.cf")
}
runtime {
cpu: threads
memory: memory
docker: dockerImage
}
parameter_meta {
disableDifferenceCover: {description: "Disable use of the difference-cover sample.", category: "required"}
conversionTable: {description: "List of UIDs (unique ID) and corresponding taxonomic IDs.", category: "required"}
taxonomyTree: {description: "Taxonomic tree (e.g. nodes.dmp).", category: "required"}
nameTable: {description: "Name table (e.g. names.dmp).", category: "required"}
referenceFile: {description: "A comma-separated list of FASTA files containing the reference sequences to be aligned to.", category: "required"}
indexBasename: {description: "The basename of the index files to write.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
offrate: {description: "The number of rows marked by the indexer.", category: "common"}
ftabChars: {description: "Calculate an initial BW range with respect to this character.", category: "common"}
kmerCount: {description: "Use <int> as kmer-size for counting the distinct number of k-mers in the input sequences.", category: "common"}
sizeTable: {description: "List of taxonomic IDs and lengths of the sequences belonging to the same taxonomic IDs.", category: "common"}
}
}
task Classify {
input {
String outputDir
Boolean compressOutput = true
String? preCommand
String inputFormat = "fastq"
Boolean phred64 = false
Int minHitLength = 22
String indexPrefix
Array[File]? unpairedReads
Array[File]+ read1
Array[File]? read2
Boolean? fastaInput
# Variables for handling output
String outputPrefix
String outputName = basename(outputPrefix)
String? metFilePath # If this is specified, the report file is empty
Int? assignments
Int? minHitLen
Int? minTotalLen
Array[String]? hostTaxIds
Array[String]? excludeTaxIds
Array[File]? read2
Int? trim5
Int? trim3
Int? reportMaxDistinct
String? hostTaxIDs
String? excludeTaxIDs
Int threads = 4
String memory = "8G"
String memory = "16G"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3"
}
String outputFilePath = outputDir + "/centrifuge.out"
String reportFilePath = outputDir + "/centrifuge_report.tsv"
String finalOutputPath = if (compressOutput == true)
then outputFilePath + ".gz"
else outputFilePath
Map[String, String] inputFormatOptions = {"fastq": "-q", "fasta": "-f", "qseq": "--qseq", "raw": "-r", "sequences": "-c"}
command {
set -e -o pipefail
mkdir -p ~{outputDir}
~{preCommand}
set -e
mkdir -p "$(dirname ~{outputPrefix})"
centrifuge \
~{"-p " + threads} \
~{inputFormatOptions[inputFormat]} \
~{true="--phred64" false="--phred33" phred64} \
~{"--min-hitlen " + minHitLength} \
~{"--met-file " + outputPrefix + "/" + outputName + "_alignment_metrics.tsv"} \
~{"--threads " + threads} \
~{"--trim5 " + trim5} \
~{"--trim3 " + trim3} \
~{"-k " + reportMaxDistinct} \
~{"--host-taxids " + hostTaxIDs} \
~{"--exclude-taxids " + excludeTaxIDs} \
~{"-x " + indexPrefix} \
~{true="-f" false="" fastaInput} \
~{true="-k" false="" defined(assignments)} ~{assignments} \
~{true="-1" false="-U" defined(read2)} ~{sep=',' read1} \
~{true="-2" false="" defined(read2)} ~{sep=',' read2} \
~{true="-U" false="" length(select_first([unpairedReads])) > 0} ~{sep=',' unpairedReads} \
~{"--report-file " + reportFilePath} \
~{"--min-hitlen " + minHitLen} \
~{"--min-totallen " + minTotalLen} \
~{"--met-file " + metFilePath} \
~{true="--host-taxids " false="" defined(hostTaxIds)} ~{sep=',' hostTaxIds} \
~{true="--exclude-taxids " false="" defined(excludeTaxIds)} ~{sep=',' excludeTaxIds} \
~{true="| gzip -c >" false="-S" compressOutput} ~{finalOutputPath}
~{true="-1 " false="-U " defined(read2)} ~{sep="," read1} \
~{"-2 "} ~{sep="," read2} \
~{"-S " + outputPrefix + "/" + outputName + "_classification.tsv"} \
~{"--report-file " + outputPrefix + "/" + outputName + "_output_report.tsv"}
}
output {
File classifiedReads = finalOutputPath
File reportFile = reportFilePath
File outputMetrics = outputPrefix + "/" + outputName + "_alignment_metrics.tsv"
File outputClassification = outputPrefix + "/" + outputName + "_classification.tsv"
File outputReport = outputPrefix + "/" + outputName + "_output_report.tsv"
}
runtime {
cpu: threads
memory: memory
docker: dockerImage
}
parameter_meta {
inputFormat: {description: "The format of the read file(s).", category: "required"}
phred64: {description: "If set to true, Phred+64 encoding is used.", category: "required"}
minHitLength: {description: "Minimum length of partial hits.", category: "required"}
indexPrefix: {description: "The basename of the index for the reference genomes.", category: "required"}
read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
outputName: {description: "The base name of the outputPrefix.", category: "required"}
read2: {description: "List of files containing mate 2s.", category: "common"}
trim5: {description: "Trim <int> bases from 5' (left) end of each read before alignment.", category: "common"}
trim3: {description: "Trim <int> bases from 3' (right) end of each read before alignment.", category: "common"}
reportMaxDistinct: {description: "It searches for at most <int> distinct, primary assignments for each read or pair.", category: "common"}
hostTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be preferred in classification procedure.", category: "common"}
excludeTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", category: "common"}
}
}
task Inspect {
input {
String printOption = "fasta"
String indexBasename
String outputPrefix
Int? across
String memory = "4G"
String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3"
}
Map[String, String] outputOptions = {"fasta": "", "names": "--names", "summary": "--summary", "conversionTable": "--conversion-table", "taxonomyTree": "--taxonomy-tree", "nameTable": "--name-table", "sizeTable": "--size-table"}
command {
set -e
mkdir -p "$(dirname ~{outputPrefix})"
centrifuge-inspect \
~{outputOptions[printOption]} \
~{"--across " + across} \
~{indexBasename} \
> ~{outputPrefix + "/" + printOption}
}
output {
File outputInspect = outputPrefix + "/" + printOption
}
runtime {
memory: memory
docker: dockerImage
}
parameter_meta {
printOption: {description: "The output option for inspect (fasta, summary, conversionTable, taxonomyTree, nameTable, sizeTable)", category: "required"}
indexBasename: {description: "The basename of the index to be inspected.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
across: {description: "When printing FASTA output, output a newline character every <int> bases.", category: "common"}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment