From 50c5d957408dbf8a6f1d6aa79c0a3b05ffdde664 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Tue, 7 Jul 2020 12:41:49 +0200
Subject: [PATCH] Add samtools controls to hisat2

---
 CHANGELOG.md |  3 +++
 hisat2.wdl   | 21 +++++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4acadc5..edfffb5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,9 @@ that users understand how the changes affect the new version.
 
 version 4.0.0-develop
 ---------------------------
++ Hisat2 task has added controls for samtools.
++ Alignment tasks no longer produce BAM indexes as these are not needed
+  by the markduplicates step.
 + Picard SortSam added as a task.
 + Md5 files are no longer created by default on Picard tasks that generate
   BAM files.
diff --git a/hisat2.wdl b/hisat2.wdl
index 5937f86..77c370f 100644
--- a/hisat2.wdl
+++ b/hisat2.wdl
@@ -34,7 +34,10 @@ task Hisat2 {
         String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt"
 
         Int threads = 4
-        String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G"
+        Int sortThreads = 1
+        Int sortMemoryPerThreadGb = 2
+        Int compressionLevel = 1
+        Int memoryGb = 1 + threads + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads
         Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads)
         # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1
         # is a combination of hisat2 and samtools
@@ -59,18 +62,21 @@ task Hisat2 {
         ~{true="--dta" false="" downstreamTranscriptomeAssembly} \
         --new-summary \
         --summary-file ~{summaryFilePath} \
-        | samtools sort > ~{outputBam}
-        samtools index ~{outputBam} ~{bamIndexPath}
+        | samtools sort \
+        ~{"-@ " + sortThreads} \
+        -m ~{sortMemoryPerThreadGb}G \
+        -l ~{compressionLevel} \
+        - \
+        -o ~{outputBam}
     }
 
     output {
         File bamFile = outputBam
-        File bamIndex = bamIndexPath
         File summaryFile = summaryFilePath
     }
 
     runtime {
-        memory: memory
+        memory: "~{memoryGb}G"
         cpu: threads + 1
         time_minutes: timeMinutes
         docker: dockerImage
@@ -88,9 +94,12 @@ task Hisat2 {
         downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"}
         summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"}
         threads: {description: "The number of threads to use.", category: "advanced"}
-        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"}
         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                       category: "advanced"}
+        sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"}
+        sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"}
+        compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
     }
 }
\ No newline at end of file
-- 
GitLab