From 187e1277c00f4ce25b496fc8a9f0d986e9870512 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Tue, 30 Jun 2020 08:47:34 +0200
Subject: [PATCH] update sambamba memory requirements and parameter_meta

---
 sambamba.wdl | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/sambamba.wdl b/sambamba.wdl
index 4ef62dd..bf58dbc 100644
--- a/sambamba.wdl
+++ b/sambamba.wdl
@@ -25,18 +25,23 @@ task Markdup {
     input {
         Array[File] inputBams
         String outputPath
-        Int threads = 1
+        # Sambamba scales like this: 1 thread is fully utilized (1). 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7.
+        # 2 threads reduces wall clock time by more than 40%.
+        Int threads = 2
         Int compressionLevel = 1
         Int? hashTableSize
         Int? overFlowListSize
-        Int? sortBufferSize
-        Int? ioBufferSize
+        # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1
+        Int sortBufferSize = 2048
+        Int ioBufferSize = 128
         Boolean removeDuplicates = false 
 
-        # According to the manual sambamba markdup uses about 2G per 100 million reads.
-        Int memoryGb = 1 + ceil(size(inputBams, 'G') / 8)
+        # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size.
+        # Added 1024 mb as a margin of safety
+        Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize
         String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
-        Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8)
+        # Time minute calculation does not work well for higher number of threads.
+        Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads
     }
     String bamIndexPath = sub(outputPath, "\.bam$", ".bai")
 
@@ -62,11 +67,29 @@ task Markdup {
     }
 
     runtime {
-        memory: "~{memoryGb}G"
+        memory: "~{memoryMb}M"
         cpu: threads
         time_minutes: timeMinutes
         docker: dockerImage
     }
+
+    parameter_meta {
+        # inputs
+        inputBams: {description: "The input BAM files.", category: "required"}
+        outputPath: {description: "Output directory path + output file.", category: "required"}
+        compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
+        memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"}
+        removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"}
+        hashTableSize: {description: "Sets sambamba's hash table size", category: "advanced"}
+        overFlowListSize: {description: "Sets sambamba's overflow list size", category: "advanced"}
+        sortBufferSize: {description: "The amount of mb allocated to the sort buffer", category: "advanced"}
+        ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+        threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        # outputs
+        outputBam: {description: "Sorted BAM file."}
+    }
 }
 
 task Sort {
-- 
GitLab