Skip to content
Snippets Groups Projects
Unverified Commit e0692dcb authored by Jasper's avatar Jasper Committed by GitHub
Browse files

Merge pull request #250 from biowdl/BIOWDL-499

BIOWDL-499: Add NanoQC and NanoPlot.
parents cb6a60ef 3df6908f
No related branches found
No related tags found
No related merge requests found
......@@ -11,16 +11,17 @@ that users understand how the changes affect the new version.
version 5.0.0-dev
---------------------------
+ Add NanoPlot and NanoQC tasks.
+ Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary
downloading tasks (alternative is refseqtools).
+ collect-columns: updated docker image to version 1.0.0 and added the
`sumOnDuplicateId` input (defaults to false).
+ survivor: replace integer boolean type to logical true or false value.
+ vt: Add option to ignore masked reference.
+ bcftools: add sorting and annotation
+ bcftools: add sorting and annotation.
+ Bam2fastx: Input bam and index are now arrays.
+ Lima: Remove globs from outputs.
+ Updated task gridss.wdl: add --jvmheap parameter
+ Updated task gridss.wdl: add --jvmheap parameter.
+ A bwa-mem2 task was created with the same interface (including usePostalt)
as the bwa mem task.
+ bwa mem and bwa kit are now one task. The usePostalt boolean can be used to
......
version 1.0
# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
task NanoPlot {
input {
File inputFile
String inputFileType
String outputDir
String outputPrefix
String outputPath = outputDir + outputPrefix
Boolean outputTsvStats = true
Boolean dropOutliers = false
Boolean logLengths = false
String format = "png"
Boolean showN50 = true
String title = basename(outputPrefix)
Int? maxLength
Int? minLength
Int? minQual
String? readType
Int threads = 2
String memory = "2G"
Int timeMinutes = 15
String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0"
}
Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "}
command {
set -e
mkdir -p "$(dirname ~{outputPath})"
NanoPlot \
--threads ~{threads} \
--outdir ~{outputDir} \
--prefix ~{outputPrefix} \
~{true="--tsv_stats" false="" outputTsvStats} \
~{true="--drop_outliers" false="" dropOutliers} \
~{true="--loglength" false="" logLengths} \
--format ~{format} \
~{true="--N50" false="--no-N50" showN50} \
~{"--maxlength " + maxLength} \
~{"--minlength " + minLength} \
~{"--minqual " + minQual} \
~{"--readtype " + readType} \
~{fileTypeOptions[inputFileType] + inputFile}
}
output {
File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html"
File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png"
File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png"
File report = outputDir + outputPrefix + "NanoPlot-report.html"
File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png"
File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png"
File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png"
File? lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png"
File? lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png"
File? stats = outputDir + outputPrefix + "NanoStats.txt"
}
runtime {
cpu: threads
memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
# inputs
inputFile: {description: "The input file.", category: "required"}
inputFileType: {description: "The format of the read file.", category: "required"}
outputDir: {description: "Output directory path.", category: "required"}
outputPrefix: {description: "Output file prefix.", category: "required"}
outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"}
dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"}
logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"}
format: {description: "Specify the output format of the plots.", category: "required"}
showN50: {description: "Show the N50 mark in the read length histogram.", category: "common"}
title: {description: "Add a title to all plots, requires quoting if using spaces.", category: "common"}
maxLength: {description: "Hide reads longer than length specified.", category: "advanced"}
minLength: {description: "Hide reads shorter than length specified.", category: "advanced"}
minQual: {description: "Drop reads with an average quality lower than specified.", category: "advanced"}
readType: {description: "Which read type to extract information about from summary. Options are 1D, 2D, 1D2", category: "advanced"}
threads: {description: "The number of threads to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
dynamicHistogram: {description: "Dynamic histogram of read length."}
readLengthHistogram: {description: "Histogram of read length."}
logScaleReadLengthHistogram: {description: "Histogram of read lengths after log transformation."}
report: {description: "Html summary report."}
weightedHistogram: {description: "Weighted histogram of read lengths."}
weightedLogScaleHistogram: {description: "Weighted histogram of read lengths after log transformation."}
yieldByLength: {description: "Cumulative yield plot."}
lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."}
lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."}
stats: {description: "NanoStats report."}
}
}
task NanoQc {
input {
File inputFile
String outputDir
Boolean directRna = false
Int? minLength
String memory = "2G"
Int timeMinutes = 15
String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0"
}
command {
set -e
mkdir -p "$(dirname ~{outputDir})"
nanoQC \
--outdir ~{outputDir} \
~{true="--rna" false="" directRna} \
~{"--minlen " + minLength} \
~{inputFile}
}
output {
File report = outputDir + "nanoQC.html"
File log = outputDir + "NanoQC.log"
}
runtime {
memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
# inputs
inputFile: {description: "The input file.", category: "required"}
outputDir: {description: "Output directory path.", category: "required"}
directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"}
minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
report: {description: "Html summary report."}
log: {description: "Progress report."}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment