diff --git a/CHANGELOG.md b/CHANGELOG.md index 96f4559cd9e859cad7fd718f1a80ecc369cdde9f..272499c40629072137c99ac6afaa039e13848727 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.0-dev +--------------------------- ++ Add wdl file for pacbio's bam2fastx tool. + version 4.0.0 --------------------------- + Picard MergeVcf now uses compression level 1 by default. diff --git a/bam2fastx.wdl b/bam2fastx.wdl new file mode 100644 index 0000000000000000000000000000000000000000..09c5689734db291480e467eef99b38f04ae26dcb --- /dev/null +++ b/bam2fastx.wdl @@ -0,0 +1,129 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Bam2Fasta { + input { + File inputFile + String outputPrefix + Int compressionLevel = 1 + Boolean uncompressedOutput = false + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bam2fasta \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} + ~{true="-u" false="" uncompressedOutput} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + ~{inputFile} + } + + output { + File fastaFile = outputPrefix + ".fasta.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input pacbio bam file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fasta output file."} + } +} + +task Bam2Fastq { + input { + File inputFile + String outputPrefix + Int compressionLevel = 1 + Boolean uncompressedOutput = false + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bam2fastq \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} + ~{true="-u" false="" uncompressedOutput} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + ~{inputFile} + } + + output { + File fastaFile = outputPrefix + ".fastq.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input pacbio bam file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fastq output file."} + } +}