diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3a3744b744383e7e3bbc4fc526e8223ef6efb5..e4dea7c7b9869b3d3abb5a7bdbe41c4eb238aaad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,20 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> + +version 3.2.0-develop +--------------------------- ++ Samtools: Fix quotations in sort command. ++ Samtools SortByName is now called Sort. ++ Generalize sort task to now also sort by position, instead of just read name. ++ Add CreateSequenceDictionary task to picard. ++ Add faidx task to samtools. ++ Isoseq3: Remove dirname command from output folder creation step. ++ Isoseq3: Requires more memory by default, is now 2G. ++ Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. ++ Lima: Replace mv command with cp. ++ Add WDL task for smoove (lumpy) sv-caller. + version 3.1.0 --------------------------- + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the diff --git a/ccs.wdl b/ccs.wdl index 39bb0a19578e353a596c471e7490556de89896a9..3a8f887947faab7bc3ee898cf06fcd6ca28586d8 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -31,8 +31,8 @@ task CCS { File subreadsFile String outputPrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" } diff --git a/isoseq3.wdl b/isoseq3.wdl index 8cc0db8fe0456f5626ee4eebb8bb586846a370d1..10d87bbc2264ac1aad7661f8ab8786249503684a 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -27,50 +27,35 @@ task Refine { String logLevel = "WARN" File inputBamFile File primerFile - String outputPrefix + String outputDir + String outputNamePrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } - command <<< + command { set -e - mkdir -p "$(dirname ~{outputPrefix})" - - # Create a unique output name base on the input bam file. - bamBasename="$(basename ~{inputBamFile})" - bamNewName="${bamBasename/fl/flnc}" - folderDirname="$(dirname ~{outputPrefix})" - combinedOutput="${folderDirname}/${bamNewName}" - + mkdir -p "~{outputDir}" isoseq3 refine \ --min-polya-length ~{minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - --log-file "${bamNewName}.stderr.log" \ + --log-file "~{outputDir}/~{outputNamePrefix}.stderr.log" \ ~{inputBamFile} \ ~{primerFile} \ - ${bamNewName} - - # Copy commands below are needed because naming schema for Refine output - # can not be correctly handled in the WDL output section. - cp "${bamNewName}" "${combinedOutput}" - cp "${bamNewName}.pbi" "${combinedOutput}.pbi" - cp "${bamNewName/bam/consensusreadset}.xml" "${combinedOutput/bam/consensusreadset}.xml" - cp "${bamNewName/bam/filter_summary}.json" "${combinedOutput/bam/filter_summary}.json" - cp "${bamNewName/bam/report}.csv" "${combinedOutput/bam/report}.csv" - cp "${bamNewName}.stderr.log" "${combinedOutput}.stderr.log" - >>> + "~{outputDir}/~{outputNamePrefix}.bam" + } output { - Array[File] outputFLNCfile = glob("*.bam") - Array[File] outputFLNCindexFile = glob("*.bam.pbi") - Array[File] outputConsensusReadsetFile = glob("*.consensusreadset.xml") - Array[File] outputFilterSummaryFile = glob("*.filter_summary.json") - Array[File] outputReportFile = glob("*.report.csv") - Array[File] outputSTDERRfile = glob("*.stderr.log") + File outputFLNCfile = outputDir + "/" + outputNamePrefix + ".bam" + File outputFLNCindexFile = outputDir + "/" + outputNamePrefix + ".bam.pbi" + File outputConsensusReadsetFile = outputDir + "/" + outputNamePrefix + ".consensusreadset.xml" + File outputFilterSummaryFile = outputDir + "/" + outputNamePrefix + ".filter_summary.json" + File outputReportFile = outputDir + "/" + outputNamePrefix + ".report.csv" + File outputSTDERRfile = outputDir + "/" + outputNamePrefix + ".stderr.log" } runtime { @@ -86,7 +71,8 @@ task Refine { logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "BAM input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + outputNamePrefix: {description: "Basename of the output files.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/lima.wdl b/lima.wdl index 747959a1eb9b1695ae891e178b36725d95204bed..ba8a5407fa1be7757beaff0b5bdb404016793352 100644 --- a/lima.wdl +++ b/lima.wdl @@ -48,8 +48,8 @@ task Lima { File barcodeFile String outputPrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" } @@ -87,13 +87,13 @@ task Lima { ~{barcodeFile} \ ~{basename(outputPrefix) + ".fl.bam"} - # Move commands below are needed because glob command does not find + # copy commands below are needed because glob command does not find # multiple bam/bam.pbi/subreadset.xml files when not located in working # directory. - mv "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" - mv "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" - mv "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" - mv "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" + cp "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" + cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" + cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" + cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" } output { diff --git a/picard.wdl b/picard.wdl index c9f9b8351eeda34549cfcf58c5f61a29eb46ca19..a63c1ba286ad19298ccbe575d4bae2cad394e631 100644 --- a/picard.wdl +++ b/picard.wdl @@ -313,6 +313,48 @@ task CollectTargetedPcrMetrics { } } +task CreateSequenceDictionary { + input { + File inputFile + String outputDir + + String memory = "3G" + String javaXmx = "2G" + String dockerImage = "quay.io/biocontainers/picard:2.22.3--0" + } + + command { + set -e + mkdir -p "~{outputDir}" + picard -Xmx~{javaXmx} \ + -XX:ParallelGCThreads=1 \ + CreateSequenceDictionary \ + REFERENCE=~{inputFile} \ + OUTPUT="~{outputDir}/$(basename ~{inputFile}).dict" + } + + output { + File outputDict = outputDir + "/" + basename(inputFile) + ".dict" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputDict: {description: "Dictionary of the input fasta file."} + } +} + # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs task GatherBamFiles { input { diff --git a/samtools.wdl b/samtools.wdl index b1c748576402ecbb6fbfd425580e4e990bc2af6b..1c2f5d7871a737adf2c5da691669930e278bef1a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -60,6 +60,44 @@ task BgzipAndIndex { } } +task Faidx { + input { + File inputFile + String outputDir + + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + } + + command { + set -e + mkdir -p "~{outputDir}" + ln -s ~{inputFile} "~{outputDir}/$(basename ~{inputFile})" + samtools faidx \ + "~{outputDir}/$(basename ~{inputFile})" + } + + output { + File outputIndex = outputDir + "/" + basename(inputFile) + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputIndex: {description: "Index of the input fasta file."} + } +} + task Index { input { File bamFile @@ -148,37 +186,53 @@ task Merge { } } -task SortByName { +task Sort { input { - File bamFile - String outputBamPath = "namesorted.bam" + File inputBam + String outputPath + Boolean sortByName = false + Int compressionLevel = 1 + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + Int? threads } command { set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools sort -n ~{bamFile} -o ~{outputBamPath} + mkdir -p "$(dirname ~{outputPath})" + samtools sort \ + -l ~{compressionLevel} \ + ~{true="-n" false="" sortByName} \ + ~{"--threads " + threads} \ + -o ~{outputPath} \ + ~{inputBam} } output { - File outputBam = outputBamPath + File outputSortedBam = outputPath } runtime { + cpu: 1 + select_first([threads, 0]) + memory: memory docker: dockerImage time_minutes: timeMinutes } parameter_meta { # inputs - bamFile: {description: "The BAM file to get sorted.", category: "required"} - outputBamPath: {description: "The location the sorted BAM file should be written to.", category: "common"} + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + # outputs + outputSortedBam: {description: "Sorted BAM file."} } } @@ -464,4 +518,38 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} + +task FilterShortReadsBam { + input { + File bamFile + String outputPathBam + String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + } + + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") + + command { + set -e + mkdir -p "$(dirname ~{outputPathBam})" + samtools view -h ~{bamFile} | \ + awk 'length($10) > 30 || $1 ~/^@/' | \ + samtools view -bS -> ~{outputPathBam} + samtools index ~{outputPathBam} ~{outputPathBamIndex} + } + + output { + File filteredBam = outputPathBam + File filteredBamIndex = outputPathBamIndex + } + + runtime { + docker: dockerImage + } + + parameter_meta { + bamFile: {description: "The bam file to process.", category: "required"} + outputPathBam: {description: "The filtered bam file.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} diff --git a/scripts b/scripts index dfef7cb2555667126dc1751add414527240d71bc..b83da72b9b43b956a3062b78fb08044eb9fae464 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit dfef7cb2555667126dc1751add414527240d71bc +Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464 diff --git a/smoove.wdl b/smoove.wdl new file mode 100644 index 0000000000000000000000000000000000000000..e8846f72b48ea5e0702af49c0954e6d27e20812c --- /dev/null +++ b/smoove.wdl @@ -0,0 +1,72 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Call { + input { + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputDir = "./smoove" + + String memory = "15G" + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + Int timeMinutes = 1440 + } + + command { + set -e + mkdir -p ~{outputDir} + smoove call \ + --outdir ~{outputDir} \ + --name ~{sample} \ + --fasta ~{referenceFasta} \ + ~{bamFile} + } + + output { + File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz" + } + + runtime { + memory: memory + docker: dockerImage + time_minutes: timeMinutes + + } + + parameter_meta { + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + outputDir: {description: "The location the output VCF file should be written.", category: "common"} + sample: {description: "The name of the sample.", category: "required"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +}