version 1.0 # Copyright (c) 2017 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. task Mem { input { File read1 File? read2 BwaIndex bwaIndex String outputPrefix Boolean sixtyFour = false Boolean usePostalt = false Boolean useSoftclippingForSupplementary = false Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 String? readgroup Int? sortThreads Int threads = 4 Int? memoryGb Int timeMinutes = 10 + ceil(size([read1, read2], "GiB") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "GiB") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e mkdir -p "$(dirname ~{outputPrefix})" bwa mem \ -t ~{threads} \ ~{if useSoftclippingForSupplementary then "-Y" else ""} \ ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ ~{bwaIndex.fastaFile} \ ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ ~{true="" false="#" usePostalt} bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ -o ~{outputPrefix}.aln.bam } output { File outputBam = outputPrefix + ".aln.bam" File? outputHla = outputPrefix + ".hla" } runtime { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } parameter_meta { # inputs read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} usePostalt: {description: "Whether to use the postalt script from bwa kit."} useSoftclippingForSupplementary: {description: "Use soft-clipping for supplementary alignments instead of hard-clipping", category: "common"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputBam: {description: "The produced BAM file."} outputHla: {description: "The produced HLA file."} } } struct BwaIndex { File fastaFile Array[File] indexFiles } task Index { input { File fasta } String indexedFile = basename(fasta) command { set -e cp ~{fasta} ~{indexedFile} bwa index ~{indexedFile} } output { BwaIndex index = object { fastaFile: indexedFile, indexFiles: [ indexedFile + ".amb", indexedFile + ".ann", indexedFile + ".bwt", indexedFile + ".pac", indexedFile + ".sa" ] } } runtime { docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" cpu: 1 memory: "~{size(fasta, 'G') + 1}GiB" } }