diff --git a/samtools.wdl b/samtools.wdl index e123e6356e99df28a9e7311204230572be89c482..5648eb1cf403fbe3c877fe72f3cae066514d7026 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -98,297 +98,301 @@ task Faidx { } } -task Index { +task Fastq { input { - File bamFile - String? outputBamPath - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + File inputBam + String outputRead1 + String? outputRead2 + String? outputRead0 + Int? includeFilter + Int? excludeFilter + Int? excludeSpecificFilter + Boolean appendReadNumber = false + Boolean outputQuality = false + Int? compressionLevel + + Int threads = 1 + String memory = "1G" + Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - # Select_first is needed, otherwise womtool validate fails. - String outputPath = select_first([outputBamPath, basename(bamFile)]) - String bamIndexPath = sub(outputPath, "\.bam$", ".bai") - command { - bash -c ' - set -e - # Make sure outputBamPath does not exist. - if [ ! -f ~{outputPath} ] - then - mkdir -p "$(dirname ~{outputPath})" - ln ~{bamFile} ~{outputPath} - fi - samtools index ~{outputPath} ~{bamIndexPath} - ' + samtools fastq \ + ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-2 " + outputRead2} \ + ~{"-0 " + outputRead0} \ + ~{"-f " + includeFilter} \ + ~{"-F " + excludeFilter} \ + ~{"-G " + excludeSpecificFilter} \ + ~{true="-N" false="-n" appendReadNumber} \ + ~{true="-O" false="" outputQuality} \ + ~{"-c " + compressionLevel} \ + ~{"--threads " + threads} \ + ~{inputBam} } output { - File indexedBam = outputPath - File index = bamIndexPath + File read1 = outputRead1 + File? read2 = outputRead2 + File? read0 = outputRead0 } runtime { + cpu: threads memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs - bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} - memory: {description: "The amount of memory needed for the job.", category: "advanced"} + inputBam: {description: "The bam file to process.", category: "required"} + outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} + outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} + outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} + excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} + excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} + appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} + outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Merge { +task FilterShortReadsBam { input { - Array[File]+ bamFiles - String outputBamPath = "merged.bam" - Boolean force = true - - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + File bamFile + String outputPathBam + String memory = "1G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - String indexPath = sub(outputBamPath, "\.bam$",".bai") + + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") command { set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} - samtools index ~{outputBamPath} ~{indexPath} + mkdir -p "$(dirname ~{outputPathBam})" + samtools view -h ~{bamFile} | \ + awk 'length($10) > 30 || $1 ~/^@/' | \ + samtools view -bS -> ~{outputPathBam} + samtools index ~{outputPathBam} ~{outputPathBamIndex} } output { - File outputBam = outputBamPath - File outputBamIndex = indexPath + File filteredBam = outputPathBam + File filteredBamIndex = outputPathBamIndex } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - # inputs - bamFiles: {description: "The BAM files to merge.", category: "required"} - outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + bamFile: {description: "The bam file to process.", category: "required"} + outputPathBam: {description: "The filtered bam file.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Sort { +task Flagstat { input { File inputBam String outputPath - Boolean sortByName = false - Int compressionLevel = 1 - String memory = "2G" - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - Int? threads + String memory = "1G" + Int timeMinutes = 1 + ceil(size(inputBam, "G")) + String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } command { set -e mkdir -p "$(dirname ~{outputPath})" - samtools sort \ - -l ~{compressionLevel} \ - ~{true="-n" false="" sortByName} \ - ~{"--threads " + threads} \ - -o ~{outputPath} \ - ~{inputBam} + samtools flagstat ~{inputBam} > ~{outputPath} } output { - File outputSortedBam = outputPath + File flagstat = outputPath } runtime { - cpu: 1 + select_first([threads, 0]) memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs - inputBam: {description: "The input SAM file.", category: "required"} - outputPath: {description: "Output directory path + output file.", category: "required"} - sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} + outputPath: {description: "The location the ouput should be written to.", category: "required"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - # outputs - outputSortedBam: {description: "Sorted BAM file."} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } -task Markdup { +task Index { input { - File inputBam - String outputBamPath - - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + File bamFile + String? outputBamPath + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } + # Select_first is needed, otherwise womtool validate fails. + String outputPath = select_first([outputBamPath, basename(bamFile)]) + String bamIndexPath = sub(outputPath, "\.bam$", ".bai") + command { + bash -c ' set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools markdup ~{inputBam} ~{outputBamPath} + # Make sure outputBamPath does not exist. + if [ ! -f ~{outputPath} ] + then + mkdir -p "$(dirname ~{outputPath})" + ln ~{bamFile} ~{outputPath} + fi + samtools index ~{outputPath} ~{bamIndexPath} + ' } output { - File outputBam = outputBamPath + File indexedBam = outputPath + File index = bamIndexPath } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs - inputBam: {description: "The BAM file to be processed.", category: "required"} - outputBamPath: {description: "The location of the output BAM file.", category: "required"} + bamFile: {description: "The BAM file for which an index should be made.", category: "required"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", + category: "common"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task FilterShortReadsBam { +task Markdup { input { - File bamFile - String outputPathBam - String memory = "1G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) + File inputBam + String outputBamPath + + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") - command { set -e - mkdir -p "$(dirname ~{outputPathBam})" - samtools view -h ~{bamFile} | \ - awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} ~{outputPathBamIndex} + mkdir -p "$(dirname ~{outputBamPath})" + samtools markdup ~{inputBam} ~{outputBamPath} } output { - File filteredBam = outputPathBam - File filteredBamIndex = outputPathBamIndex + File outputBam = outputBamPath } runtime { - memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + # inputs + inputBam: {description: "The BAM file to be processed.", category: "required"} + outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } -task Flagstat { +task Merge { input { - File inputBam - String outputPath + Array[File]+ bamFiles + String outputBamPath = "merged.bam" + Boolean force = true - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inputBam, "G")) + Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } + String indexPath = sub(outputBamPath, "\.bam$",".bai") command { set -e - mkdir -p "$(dirname ~{outputPath})" - samtools flagstat ~{inputBam} > ~{outputPath} + mkdir -p "$(dirname ~{outputBamPath})" + samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} + samtools index ~{outputBamPath} ~{indexPath} } output { - File flagstat = outputPath + File outputBam = outputBamPath + File outputBamIndex = indexPath } runtime { - memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs - inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} - outputPath: {description: "The location the ouput should be written to.", category: "required"} - memory: {description: "The amount of memory needed for the job.", category: "advanced"} + bamFiles: {description: "The BAM files to merge.", category: "required"} + outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Fastq { +task Sort { input { File inputBam - String outputRead1 - String? outputRead2 - String? outputRead0 - Int? includeFilter - Int? excludeFilter - Int? excludeSpecificFilter - Boolean appendReadNumber = false - Boolean outputQuality = false - Int? compressionLevel + String outputPath + Boolean sortByName = false + Int compressionLevel = 1 - Int threads = 1 - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int? threads } command { - samtools fastq \ - ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ - ~{"-2 " + outputRead2} \ - ~{"-0 " + outputRead0} \ - ~{"-f " + includeFilter} \ - ~{"-F " + excludeFilter} \ - ~{"-G " + excludeSpecificFilter} \ - ~{true="-N" false="-n" appendReadNumber} \ - ~{true="-O" false="" outputQuality} \ - ~{"-c " + compressionLevel} \ + set -e + mkdir -p "$(dirname ~{outputPath})" + samtools sort \ + -l ~{compressionLevel} \ + ~{true="-n" false="" sortByName} \ ~{"--threads " + threads} \ + -o ~{outputPath} \ ~{inputBam} } output { - File read1 = outputRead1 - File? read2 = outputRead2 - File? read0 = outputRead0 + File outputSortedBam = outputPath } runtime { - cpu: threads + cpu: 1 + select_first([threads, 0]) memory: memory docker: dockerImage time_minutes: timeMinutes @@ -396,20 +400,16 @@ task Fastq { parameter_meta { # inputs - inputBam: {description: "The bam file to process.", category: "required"} - outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} - outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} - outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} - includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} - excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} - excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} - appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} - outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + # outputs + outputSortedBam: {description: "Sorted BAM file."} } } @@ -518,38 +518,4 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} - -task FilterShortReadsBam { - input { - File bamFile - String outputPathBam - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" - } - - String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") - - command { - set -e - mkdir -p "$(dirname ~{outputPathBam})" - samtools view -h ~{bamFile} | \ - awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} ~{outputPathBamIndex} - } - - output { - File filteredBam = outputPathBam - File filteredBamIndex = outputPathBamIndex - } - - runtime { - docker: dockerImage - } - - parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} +} \ No newline at end of file