Add NanoQC and NanoPlot.

68120ed6 · JasperBoom · 3327f388 · 68120ed6
Commit 68120ed6 authored 4 years ago by JasperBoom
--- a/nanopack.wdl
+++ b/nanopack.wdl
+version 1.0
+
+# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task NanoPlot {
+    input {
+        File inputFile
+        String inputFileType
+        String outputDir
+        String outputPrefix
+        String outputPath = outputDir + outputPrefix
+        Boolean outputTsvStats = true
+        Boolean dropOutliers = false
+        Boolean logLengths = false
+        String format = "png"
+        Boolean showN50 = true
+        String title = basename(outputPrefix)
+
+        Int? maxLength
+        Int? minLength
+        Int? minQual
+        String? readType
+
+        Int threads = 2
+        String memory = "2G"
+        Int timeMinutes = 15
+        String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0"
+    }
+
+    Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "}
+
+    command {
+        set -e
+        mkdir -p "$(dirname ~{outputPath})"
+        NanoPlot \
+        --threads ~{threads} \
+        --outdir ~{outputDir} \
+        --prefix ~{outputPrefix} \
+        ~{true="--tsv_stats" false="" outputTsvStats} \
+        ~{true="--drop_outliers" false="" dropOutliers} \
+        ~{true="--loglength" false="" logLengths} \
+        --format ~{format} \
+        ~{true="--N50" false="--no-N50" showN50} \
+        ~{fileTypeOptions[inputFileType] + inputFile} \
+        ~{"--maxlength " + maxLength} \
+        ~{"--minlength " + minLength} \
+        ~{"--minqual " + minQual} \
+        ~{"--readtype " + readType}
+    }
+
+    output {
+        File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html"
+        File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png"
+        File lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png"
+        File lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png"
+        File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png"
+        File report = outputDir + outputPrefix + "NanoPlot-report.html"
+        File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png"
+        File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png"
+        File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png"
+        File? stats = outputDir + outputPrefix + "NanoStats.txt"
+    }
+
+    runtime {
+        cpu: threads
+        memory: memory
+        time_minutes: timeMinutes
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        # inputs
+        inputFile: {description: "The input file.", category: "required"}
+        inputFileType: {description: "The format of the read file.", category: "required"}
+        outputDir: {description: "Output directory path.", category: "required"}
+        outputPrefix: {description: "Output file prefix.", category: "required"}
+        outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"}
+        dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"}
+        logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"}
+        format: {description: "Specify the output format of the plots.", category: "required"}
+        showN50: {description: "Show the N50 mark in the read length histogram.", category: "common"}
+        title: {description: "Add a title to all plots, requires quoting if using spaces.", category: "common"}
+        maxLength: {description: "Hide reads longer than length specified.", category: "advanced"}
+        minLength: {description: "Hide reads shorter than length specified.", category: "advanced"}
+        minQual: {description: "Drop reads with an average quality lower than specified.", category: "advanced"}
+        readType: {description: "Which read type to extract information about from summary. Options are 1D, 2D, 1D2", category: "advanced"}
+        threads: {description: "The number of threads to be used.", category: "advanced"}
+        memory: {description: "The amount of memory available to the job.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+        # outputs
+        dynamicHistogram: {description: ""}
+        readLengthHistogram: {description: ""}
+        lengthVsQualityScatterPlotDot: {description: ""}
+        lengthVsQualityScatterPlotKde: {description: ""}
+        logScaleReadLengthHistogram: {description: ""}
+        report: {description: ""}
+        weightedHistogram: {description: ""}
+        weightedLogScaleHistogram: {description: ""}
+        yieldByLength: {description: ""}
+        stats: {description: ""}
+    }
+}
+
+task NanoQc {
+    input {
+        File inputFile
+        String outputDir
+        Boolean directRna = false
+
+        Int? minLength
+
+        Int threads = 2
+        String memory = "2G"
+        Int timeMinutes = 15
+        String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0"
+    }
+
+    command {
+        set -e
+        mkdir -p "$(dirname ~{outputDir})"
+        nanoQC \
+        --outdir ~{outputDir} \
+        ~{true="--rna" false="" directRna} \
+        ~{"--minlen " + minLength} \
+        ~{inputFile}
+    }
+
+    output {
+        File report = outputDir + "nanoQC.html"
+        File log = outputDir + "NanoQC.log"
+    }
+
+    runtime {
+        cpu: threads
+        memory: memory
+        time_minutes: timeMinutes
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        # inputs
+        inputFile: {description: "The input file.", category: "required"}
+        outputDir: {description: "Output directory path.", category: "required"}
+        directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"}
+        minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"}
+        threads: {description: "The number of threads to be used.", category: "advanced"}
+        memory: {description: "The amount of memory available to the job.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+        # outputs
+        report: {description: ""}
+        log: {description: ""}
+    }
+}