From 5b096cee1b35d8ff404567571ce429f3a46ec7c4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman <r.h.p.vorderman@lumc.nl> Date: Wed, 24 Jun 2020 10:43:27 +0200 Subject: [PATCH] Put sorting back into bwakit task --- bwa.wdl | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 3e11eb2..0095f48 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,7 +92,16 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - String memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G")) + + # Samtools uses *additional* threads. So by default this option should + # not be used. + Int sortThreads = 0 + # Compression uses zlib. Higher than level 2 causes enormous slowdowns. + # GATK/Picard default is level 2. + Int sortMemoryPerThreadGb = 4 + Int compressionLevel = 1 + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } @@ -112,7 +121,7 @@ task Kit { ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ ~{"-@ " + sortThreads} \ - -m ~{sortMemoryPerThread} \ + -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ -o ~{outputPrefix}.aln.bam @@ -121,6 +130,7 @@ task Kit { output { File outputBam = outputPrefix + ".aln.bam" + File outputBamIndex = outputPrefix + ".aln.bai" } runtime { @@ -141,14 +151,18 @@ task Kit { readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputBam: "The produced BAM file." + outputBamIndex: "The index of the produced BAM file." } } -- GitLab