diff --git a/CHANGELOG.md b/CHANGELOG.md index a9329bf5e8eeeca3a1705c43ad389cca68f58ca1..142622e23f2470f0d95e088a908db0bec3eec327 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,14 +11,20 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Centrifuge: Remove metrics file from classification (which causes the + summary report to be empty). + https://github.com/DaehwanKimLab/centrifuge/issues/83 ++ Add NanoPlot and NanoQC tasks. ++ Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary + downloading tasks (alternative is refseqtools). + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. -+ bcftools: add sorting and annotation ++ bcftools: add sorting and annotation. + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. -+ Updated task gridss.wdl: add --jvmheap parameter ++ Updated task gridss.wdl: add --jvmheap parameter. + A bwa-mem2 task was created with the same interface (including usePostalt) as the bwa mem task. + bwa mem and bwa kit are now one task. The usePostalt boolean can be used to diff --git a/bcftools.wdl b/bcftools.wdl index 520bcf159282dac1b000b1e2978330026daf2c88..affa805a78e7fcd2755fc1dab1365741968ff885 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -52,6 +52,8 @@ task Annotate { String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } + Boolean indexing = if outputType == "z" then true else false + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -77,13 +79,14 @@ task Annotate { ~{true="--single-overlaps" false="" singleOverlaps} \ ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ ~{inputFile} - bcftools index --tbi ~{outputPath} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File? outputVcfIndex = outputPath + ".tbi" } runtime { @@ -132,6 +135,8 @@ task Sort { String outputType = "z" } + Boolean indexing = if outputType == "z" then true else false + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -139,12 +144,13 @@ task Sort { -o ~{outputPath} \ -O ~{outputType} \ ~{inputFile} - bcftools index --tbi ~{outputPath} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File? outputVcfIndex = outputPath + ".tbi" } runtime { @@ -165,50 +171,6 @@ task Sort { } -task View { - input { - File inputFile - String outputPath = "output.vcf.gz" - String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) - String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" - String outputType = "z" - Int compressionLevel = 1 - } - - command { - set -e - mkdir -p "$(dirname ~{outputPath})" - bcftools view \ - -o ~{outputPath} \ - -O ~{outputType} \ - -l ~{compressionLevel} \ - ~{inputFile} - bcftools index --tbi ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - inputFile: {description: "A vcf or bcf file.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} - task Stats { input { File inputVcf @@ -313,3 +275,49 @@ task Stats { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} } } + +task View { + input { + File inputFile + String outputPath = "output.vcf" + Int compressionLevel = 0 + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + String outputType = if compressionLevel > 0 then "z" else "v" + Boolean indexing = if compressionLevel > 0 then true else false + String outputFilePath = if compressionLevel > 0 then outputPath + ".gz" else outputPath + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools view \ + -o ~{outputPath} \ + -l ~{compressionLevel} \ + -O ~{outputType} \ + ~{inputFile} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + } + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputFile: {description: "A vcf or bcf file.", category: "required"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} diff --git a/centrifuge.wdl b/centrifuge.wdl index ee30532570c43a3ab137d51d926ebd5cda6ece20..1e7a0b4566c1b782877ee37689a7a49045ea7fad 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -110,6 +110,7 @@ task Classify { Int threads = 4 String memory = "16G" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -127,7 +128,6 @@ task Classify { ~{inputFormatOptions[inputFormat]} \ ~{true="--phred64" false="--phred33" phred64} \ --min-hitlen ~{minHitLength} \ - ~{"--met-file " + outputPrefix + "_alignment_metrics.tsv"} \ --threads ~{threads} \ ~{"--trim5 " + trim5} \ ~{"--trim3 " + trim3} \ @@ -142,7 +142,6 @@ task Classify { >>> output { - File metrics = outputPrefix + "_alignment_metrics.tsv" File classification = outputPrefix + "_classification.tsv" File report = outputPrefix + "_output_report.tsv" } @@ -150,6 +149,7 @@ task Classify { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -169,10 +169,10 @@ task Classify { excludeTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", category: "common"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - metrics: {description: "File with centrifuge metrics."} classification: {description: "File with the classification results."} report: {description: "File with a classification summary."} } @@ -233,73 +233,6 @@ task Inspect { } } -task Download { - input { - String libraryPath - Array[String]? domain - String executable = "centrifuge-download" - String? preCommand - String? seqTaxMapPath - String database = "refseq" - String? assemblyLevel - String? refseqCategory - Array[String]? taxIds - Boolean filterUnplaced = false - Boolean maskLowComplexRegions = false - Boolean downloadRnaSeqs = false - Boolean modifyHeader = false - Boolean downloadGiMap = false - } - - # This will use centrifuge-download to download. - # The bash statement at the beginning is to make sure - # the directory for the SeqTaxMapPath exists. - command { - set -e -o pipefail - ~{preCommand} - ~{"mkdir -p $(dirname " + seqTaxMapPath + ")"} - ~{executable} \ - -o ~{libraryPath} \ - ~{true='-d ' false='' defined(domain)}~{sep=',' domain} \ - ~{'-a "' + assemblyLevel + '"'} \ - ~{"-c " + refseqCategory} \ - ~{true='-t' false='' defined(taxIds)} '~{sep=',' taxIds}' \ - ~{true='-r' false='' downloadRnaSeqs} \ - ~{true='-u' false='' filterUnplaced} \ - ~{true='-m' false='' maskLowComplexRegions} \ - ~{true='-l' false='' modifyHeader} \ - ~{true='-g' false='' downloadGiMap} \ - ~{database} ~{">> " + seqTaxMapPath} - } - - output { - File seqTaxMap = "~{seqTaxMapPath}" - File library = libraryPath - Array[File] fastaFiles = glob(libraryPath + "/*/*.fna") - } - } - -task DownloadTaxonomy { - input { - String taxonomyDir - String executable = "centrifuge-download" - String? preCommand - } - - command { - set -e -o pipefail - ~{preCommand} - ~{executable} \ - -o ~{taxonomyDir} \ - taxonomy - } - - output { - File taxonomyTree = taxonomyDir + "/nodes.dmp" - File nameTable = taxonomyDir + "/names.dmp" - } - } - task KReport { input { File classification diff --git a/nanopack.wdl b/nanopack.wdl new file mode 100644 index 0000000000000000000000000000000000000000..6860cf13828d50b0190c88d23e62601111bffb8d --- /dev/null +++ b/nanopack.wdl @@ -0,0 +1,172 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task NanoPlot { + input { + File inputFile + String inputFileType + String outputDir + String outputPrefix + String outputPath = outputDir + outputPrefix + Boolean outputTsvStats = true + Boolean dropOutliers = false + Boolean logLengths = false + String format = "png" + Boolean showN50 = true + String title = basename(outputPrefix) + + Int? maxLength + Int? minLength + Int? minQual + String? readType + + Int threads = 2 + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0" + } + + Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + NanoPlot \ + --threads ~{threads} \ + --outdir ~{outputDir} \ + --prefix ~{outputPrefix} \ + ~{true="--tsv_stats" false="" outputTsvStats} \ + ~{true="--drop_outliers" false="" dropOutliers} \ + ~{true="--loglength" false="" logLengths} \ + --format ~{format} \ + ~{true="--N50" false="--no-N50" showN50} \ + ~{"--maxlength " + maxLength} \ + ~{"--minlength " + minLength} \ + ~{"--minqual " + minQual} \ + ~{"--readtype " + readType} \ + ~{fileTypeOptions[inputFileType] + inputFile} + } + + output { + File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html" + File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png" + File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png" + File report = outputDir + outputPrefix + "NanoPlot-report.html" + File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png" + File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png" + File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png" + File? lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" + File? lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" + File? stats = outputDir + outputPrefix + "NanoStats.txt" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input file.", category: "required"} + inputFileType: {description: "The format of the read file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + outputPrefix: {description: "Output file prefix.", category: "required"} + outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"} + dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"} + logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"} + format: {description: "Specify the output format of the plots.", category: "required"} + showN50: {description: "Show the N50 mark in the read length histogram.", category: "common"} + title: {description: "Add a title to all plots, requires quoting if using spaces.", category: "common"} + maxLength: {description: "Hide reads longer than length specified.", category: "advanced"} + minLength: {description: "Hide reads shorter than length specified.", category: "advanced"} + minQual: {description: "Drop reads with an average quality lower than specified.", category: "advanced"} + readType: {description: "Which read type to extract information about from summary. Options are 1D, 2D, 1D2", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dynamicHistogram: {description: "Dynamic histogram of read length."} + readLengthHistogram: {description: "Histogram of read length."} + logScaleReadLengthHistogram: {description: "Histogram of read lengths after log transformation."} + report: {description: "Html summary report."} + weightedHistogram: {description: "Weighted histogram of read lengths."} + weightedLogScaleHistogram: {description: "Weighted histogram of read lengths after log transformation."} + yieldByLength: {description: "Cumulative yield plot."} + lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} + lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} + stats: {description: "NanoStats report."} + } +} + +task NanoQc { + input { + File inputFile + String outputDir + Boolean directRna = false + + Int? minLength + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputDir})" + nanoQC \ + --outdir ~{outputDir} \ + ~{true="--rna" false="" directRna} \ + ~{"--minlen " + minLength} \ + ~{inputFile} + } + + output { + File report = outputDir + "nanoQC.html" + File log = outputDir + "NanoQC.log" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"} + minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + report: {description: "Html summary report."} + log: {description: "Progress report."} + } +}