diff --git a/CHANGELOG.md b/CHANGELOG.md index 21019ef229a0951320ad07cb7b492b68649c41a7..33f85337a40d19520b36117a57720916ddbd3288 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,24 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 3.2.0 + +version 3.2.0-develop --------------------------- + Added STAR GenomeGenerate task. ++ GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and + `--standard-min-confidence-threshold-for-calling` options. These are + required for RNA seq variant calling according to GATK best practices. ++ Samtools: Fix quotations in sort command. ++ Samtools SortByName is now called Sort. ++ Generalize sort task to now also sort by position, instead of just read name. ++ Add CreateSequenceDictionary task to picard. ++ Add faidx task to samtools. ++ Isoseq3: Remove dirname command from output folder creation step. ++ Isoseq3: Requires more memory by default, is now 2G. ++ Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. ++ Lima: Replace mv command with cp. ++ Add WDL task for smoove (lumpy) sv-caller. + version 3.1.0 --------------------------- diff --git a/ccs.wdl b/ccs.wdl index 39bb0a19578e353a596c471e7490556de89896a9..3a8f887947faab7bc3ee898cf06fcd6ca28586d8 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -31,8 +31,8 @@ task CCS { File subreadsFile String outputPrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" } diff --git a/gatk.wdl b/gatk.wdl index b730cbee75fd470f8d6eb4d446d7bba06742c6e5..586c25d09edfde9cb6efa6387c445fb8b37753e9 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -910,6 +910,8 @@ task HaplotypeCaller { String? outputMode Boolean gvcf = false String emitRefConfidence = if gvcf then "GVCF" else "NONE" + Boolean dontUseSoftClippedBases = false + Float? standardMinConfidenceThresholdForCalling String memory = "12G" String javaXmx = "4G" @@ -931,7 +933,9 @@ task HaplotypeCaller { ~{"--pedigree " + pedigree} \ ~{"--contamination-fraction-per-sample-file " + contamination} \ ~{"--output-mode " + outputMode} \ - --emit-ref-confidence ~{emitRefConfidence} + --emit-ref-confidence ~{emitRefConfidence} \ + ~{true="--dont-use-soft-clipped-bases" false="" dontUseSoftClippedBases} \ + ~{"--standard-min-confidence-threshold-for-calling " + standardMinConfidenceThresholdForCalling} } output { @@ -962,6 +966,8 @@ task HaplotypeCaller { category: "advanced"} emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'", category: "advanced"} + dontUseSoftClippedBases: {description: "Do not use soft-clipped bases. Should be 'true' for RNA variant calling.", category: "common"} + standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} diff --git a/isoseq3.wdl b/isoseq3.wdl index 8cc0db8fe0456f5626ee4eebb8bb586846a370d1..10d87bbc2264ac1aad7661f8ab8786249503684a 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -27,50 +27,35 @@ task Refine { String logLevel = "WARN" File inputBamFile File primerFile - String outputPrefix + String outputDir + String outputNamePrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } - command <<< + command { set -e - mkdir -p "$(dirname ~{outputPrefix})" - - # Create a unique output name base on the input bam file. - bamBasename="$(basename ~{inputBamFile})" - bamNewName="${bamBasename/fl/flnc}" - folderDirname="$(dirname ~{outputPrefix})" - combinedOutput="${folderDirname}/${bamNewName}" - + mkdir -p "~{outputDir}" isoseq3 refine \ --min-polya-length ~{minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - --log-file "${bamNewName}.stderr.log" \ + --log-file "~{outputDir}/~{outputNamePrefix}.stderr.log" \ ~{inputBamFile} \ ~{primerFile} \ - ${bamNewName} - - # Copy commands below are needed because naming schema for Refine output - # can not be correctly handled in the WDL output section. - cp "${bamNewName}" "${combinedOutput}" - cp "${bamNewName}.pbi" "${combinedOutput}.pbi" - cp "${bamNewName/bam/consensusreadset}.xml" "${combinedOutput/bam/consensusreadset}.xml" - cp "${bamNewName/bam/filter_summary}.json" "${combinedOutput/bam/filter_summary}.json" - cp "${bamNewName/bam/report}.csv" "${combinedOutput/bam/report}.csv" - cp "${bamNewName}.stderr.log" "${combinedOutput}.stderr.log" - >>> + "~{outputDir}/~{outputNamePrefix}.bam" + } output { - Array[File] outputFLNCfile = glob("*.bam") - Array[File] outputFLNCindexFile = glob("*.bam.pbi") - Array[File] outputConsensusReadsetFile = glob("*.consensusreadset.xml") - Array[File] outputFilterSummaryFile = glob("*.filter_summary.json") - Array[File] outputReportFile = glob("*.report.csv") - Array[File] outputSTDERRfile = glob("*.stderr.log") + File outputFLNCfile = outputDir + "/" + outputNamePrefix + ".bam" + File outputFLNCindexFile = outputDir + "/" + outputNamePrefix + ".bam.pbi" + File outputConsensusReadsetFile = outputDir + "/" + outputNamePrefix + ".consensusreadset.xml" + File outputFilterSummaryFile = outputDir + "/" + outputNamePrefix + ".filter_summary.json" + File outputReportFile = outputDir + "/" + outputNamePrefix + ".report.csv" + File outputSTDERRfile = outputDir + "/" + outputNamePrefix + ".stderr.log" } runtime { @@ -86,7 +71,8 @@ task Refine { logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "BAM input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + outputNamePrefix: {description: "Basename of the output files.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/lima.wdl b/lima.wdl index 747959a1eb9b1695ae891e178b36725d95204bed..ba8a5407fa1be7757beaff0b5bdb404016793352 100644 --- a/lima.wdl +++ b/lima.wdl @@ -48,8 +48,8 @@ task Lima { File barcodeFile String outputPrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" } @@ -87,13 +87,13 @@ task Lima { ~{barcodeFile} \ ~{basename(outputPrefix) + ".fl.bam"} - # Move commands below are needed because glob command does not find + # copy commands below are needed because glob command does not find # multiple bam/bam.pbi/subreadset.xml files when not located in working # directory. - mv "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" - mv "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" - mv "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" - mv "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" + cp "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" + cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" + cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" + cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" } output { diff --git a/picard.wdl b/picard.wdl index 7df96aa93e758566419a73f7a98f01669cb7d3d6..5393cd3a30487f4cda0a40b4744717a9ad6ad505 100644 --- a/picard.wdl +++ b/picard.wdl @@ -313,6 +313,48 @@ task CollectTargetedPcrMetrics { } } +task CreateSequenceDictionary { + input { + File inputFile + String outputDir + + String memory = "3G" + String javaXmx = "2G" + String dockerImage = "quay.io/biocontainers/picard:2.22.3--0" + } + + command { + set -e + mkdir -p "~{outputDir}" + picard -Xmx~{javaXmx} \ + -XX:ParallelGCThreads=1 \ + CreateSequenceDictionary \ + REFERENCE=~{inputFile} \ + OUTPUT="~{outputDir}/$(basename ~{inputFile}).dict" + } + + output { + File outputDict = outputDir + "/" + basename(inputFile) + ".dict" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputDict: {description: "Dictionary of the input fasta file."} + } +} + # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs task GatherBamFiles { input { diff --git a/samtools.wdl b/samtools.wdl index a4a893a1ee34b8ce81a9b07f9cf976cd5bdc1292..5521c6aaa5f93badddc571acf400f52afa98de1f 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -57,6 +57,44 @@ task BgzipAndIndex { } } +task Faidx { + input { + File inputFile + String outputDir + + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + } + + command { + set -e + mkdir -p "~{outputDir}" + ln -s ~{inputFile} "~{outputDir}/$(basename ~{inputFile})" + samtools faidx \ + "~{outputDir}/$(basename ~{inputFile})" + } + + output { + File outputIndex = outputDir + "/" + basename(inputFile) + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputIndex: {description: "Index of the input fasta file."} + } +} + task Index { input { File bamFile @@ -136,34 +174,52 @@ task Merge { } } -task SortByName { +task Sort { input { - File bamFile - String outputBamPath = "namesorted.bam" + File inputBam + String outputPath + Boolean sortByName = false + Int compressionLevel = 1 - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + + Int? threads } command { set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools sort -n ~{bamFile} -o ~{outputBamPath} + mkdir -p "$(dirname ~{outputPath})" + samtools sort \ + -l ~{compressionLevel} \ + ~{true="-n" false="" sortByName} \ + ~{"--threads " + threads} \ + -o ~{outputPath} \ + ~{inputBam} } output { - File outputBam = outputBamPath + File outputSortedBam = outputPath } runtime { + cpu: 1 + select_first([threads, 0]) + memory: memory docker: dockerImage } parameter_meta { # inputs - bamFile: {description: "The BAM file to get sorted.", category: "required"} - outputBamPath: {description: "The location the sorted BAM file should be written to.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + + # outputs + outputSortedBam: {description: "Sorted BAM file."} } } diff --git a/scripts b/scripts index dfef7cb2555667126dc1751add414527240d71bc..b83da72b9b43b956a3062b78fb08044eb9fae464 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit dfef7cb2555667126dc1751add414527240d71bc +Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464 diff --git a/smoove.wdl b/smoove.wdl new file mode 100644 index 0000000000000000000000000000000000000000..e8846f72b48ea5e0702af49c0954e6d27e20812c --- /dev/null +++ b/smoove.wdl @@ -0,0 +1,72 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Call { + input { + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputDir = "./smoove" + + String memory = "15G" + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + Int timeMinutes = 1440 + } + + command { + set -e + mkdir -p ~{outputDir} + smoove call \ + --outdir ~{outputDir} \ + --name ~{sample} \ + --fasta ~{referenceFasta} \ + ~{bamFile} + } + + output { + File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz" + } + + runtime { + memory: memory + docker: dockerImage + time_minutes: timeMinutes + + } + + parameter_meta { + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + outputDir: {description: "The location the output VCF file should be written.", category: "common"} + sample: {description: "The name of the sample.", category: "required"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +}