diff --git a/CHANGELOG.md b/CHANGELOG.md
index 95241551a95ad5400cd7b642bfafc1e72074b92f..55fb1e8ab8385737b340dd91c7cb925a6abb6089 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,10 @@ that users understand how the changes affect the new version.
 
 version 4.0.0-develop
 ---------------------------
++ bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The
+  number of threads is now related to the number of threads on the aligner.
+  Using more threads reduces the chance of the samtools sort pipe getting 
+  blocked if it's full.
 + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl,
   transcriptclean.wdl to be more descriptive.
 + Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing &
diff --git a/bwa.wdl b/bwa.wdl
index 78881ad295f94d53eb84760b366534b40f7868e4..58e1dc8031dfb04221cb1711b63e6fb4cabd6a87 100644
--- a/bwa.wdl
+++ b/bwa.wdl
@@ -29,16 +29,23 @@ task Mem {
         String? readgroup
 
         Int threads = 4
-        Int sortThreads = 1
+        Int? sortThreads
         Int sortMemoryPerThreadGb = 2
         Int compressionLevel = 1
-        # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here.
-        Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads
+        Int? memoryGb 
         Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads)
         # This container contains: samtools (1.10), bwa (0.7.17-r1188)
         String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0"
     }
 
+    # Samtools sort may block the pipe while it is writing data to disk. 
+    # This can lead to cpu underutilization.
+    # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. 
+    Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0)
+    Int totalSortThreads = select_first([sortThreads, estimatedSortThreads])
+    # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here.  
+    Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads
+
     command {
         set -e -o pipefail
         mkdir -p "$(dirname ~{outputPath})"
@@ -49,7 +56,7 @@ task Mem {
         ~{read1} \
         ~{read2} \
         | samtools sort \
-        ~{"-@ " + sortThreads} \
+        ~{"-@ " + totalSortThreads} \
         -m ~{sortMemoryPerThreadGb}G \
         -l ~{compressionLevel} \
         - \
@@ -62,7 +69,7 @@ task Mem {
 
     runtime {
         cpu: threads
-        memory: "~{memoryGb}G"
+        memory: "~{select_first([memoryGb, estimatedMemoryGb])}G"
         time_minutes: timeMinutes
         docker: dockerImage
     }
@@ -95,16 +102,23 @@ task Kit {
         Boolean sixtyFour = false
 
         Int threads = 4
-        Int sortThreads = 1
+        Int? sortThreads
         Int sortMemoryPerThreadGb = 2
         Int compressionLevel = 1
-        # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here.
-        Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads
+        Int? memoryGb 
         Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads)
         # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10
         String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0"
     }
 
+    # Samtools sort may block the pipe while it is writing data to disk. 
+    # This can lead to cpu underutilization.
+    # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. 
+    Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0)
+    Int totalSortThreads = select_first([sortThreads, estimatedSortThreads])
+    # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here.  
+    Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads
+    
     command {
         set -e
         mkdir -p "$(dirname ~{outputPrefix})"
@@ -119,7 +133,7 @@ task Kit {
           -p ~{outputPrefix}.hla \
           ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \
         samtools sort \
-          ~{"-@ " + sortThreads} \
+          ~{"-@ " + totalSortThreads} \
           -m ~{sortMemoryPerThreadGb}G \
           -l ~{compressionLevel} \
           - \
@@ -134,7 +148,7 @@ task Kit {
         # One extra thread for bwa-postalt + samtools is not needed.
         # These only use 5-10% of compute power and not always simultaneously.
         cpu: threads  
-        memory: "~{memoryGb}G"
+        memory: "~{select_first([memoryGb, estimatedMemoryGb])}G"
         time_minutes: timeMinutes
         docker: dockerImage
     }
diff --git a/hisat2.wdl b/hisat2.wdl
index c24610edc6fdc732a70040756b62483d7dc509d4..f9a4bc599f0fc43ce759807e4c5f138f02e3aa20 100644
--- a/hisat2.wdl
+++ b/hisat2.wdl
@@ -34,10 +34,10 @@ task Hisat2 {
         String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt"
 
         Int threads = 4
-        Int sortThreads = 1
+        Int? sortThreads
         Int sortMemoryPerThreadGb = 2
         Int compressionLevel = 1
-        Int memoryGb = 1 + threads + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads
+        Int? memoryGb
         Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads)
         # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1
         # is a combination of hisat2 and samtools
@@ -45,7 +45,12 @@ task Hisat2 {
         String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0"
     }
 
-    String bamIndexPath = sub(outputBam, "\.bam$", ".bai")
+    # Samtools sort may block the pipe while it is writing data to disk. 
+    # This can lead to cpu underutilization.
+    # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. 
+    Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0)
+    Int totalSortThreads = select_first([sortThreads, estimatedSortThreads])
+    Int estimatedMemoryGb = 1 + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads
 
     command {
         set -e -o pipefail
@@ -63,7 +68,7 @@ task Hisat2 {
         --new-summary \
         --summary-file ~{summaryFilePath} \
         | samtools sort \
-        ~{"-@ " + sortThreads} \
+        ~{"-@ " + totalSortThreads} \
         -m ~{sortMemoryPerThreadGb}G \
         -l ~{compressionLevel} \
         - \
@@ -76,8 +81,8 @@ task Hisat2 {
     }
 
     runtime {
-        memory: "~{memoryGb}G"
-        cpu: threads + 1
+        memory: "~{select_first([memoryGb, estimatedMemoryGb])}G"
+        cpu: threads
         time_minutes: timeMinutes
         docker: dockerImage
     }