From c6fe0300c5d2e5275739148c051f931e717cd6f1 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Fri, 28 Oct 2022 11:38:30 +0200
Subject: [PATCH] Use samtools dict instead of Picard CreateSequenceDictionary

---
 CHANGELOG.md |  2 +-
 biowdl.wdl   | 45 ---------------------------------------------
 samtools.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d94c2b5..b9df32a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,7 +9,7 @@ that users understand how the changes affect the new version.
 -->
 version 5.1.0-dev
 ---------------------------
-+ Add a combined Picard CreateSequenceDictionary and samtools faidx task.
++ Add a combined samtools dict and samtools faidx task.
 + Add a BWA index task.
 + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` 
   previously. The WDL spec clearly distuingishes between SI and binary 
diff --git a/biowdl.wdl b/biowdl.wdl
index 58e94df..463dab7 100644
--- a/biowdl.wdl
+++ b/biowdl.wdl
@@ -72,49 +72,4 @@ task InputConverter {
         # outputs
         json: {description: "JSON file version of the input sample sheet."}
     }
-}
-
-task IndexFastaFile {
-    input {
-        File inputFile
-        String javaXmx = "2G"
-        String memory = "3GiB"
-    }
-    String outputFile = basename(inputFile)
-    # Capture .faÂ¸ .fna and .fasta
-    String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict"
-    # This executes both picard and samtools, so indexes are co-located in the same folder.
-    command <<<
-        set -e
-        cp ~{inputFile} ~{outputFile}
-        picard -Xmx~{javaXmx} \
-            -XX:ParallelGCThreads=1 \
-            CreateSequenceDictionary \
-            REFERENCE=~{inputFile} \
-            OUTPUT="~{outputDict}"
-        samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai
-    >>>
-
-    output {
-        File outputFasta = outputFile
-        File outputFastaDict = outputDict
-        File outputFastaFai = outputFile + ".fai"
-    }
-
-    runtime {
-        memory: memory
-        # Contains picard 2.27.4, samtools 1.15.1
-        docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0"
-    }
-
-    parameter_meta {
-        # inputs
-        inputFile: {description: "The input fasta file.", category: "required"}
-        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
-        memory: {description: "The amount of memory available to the job.", category: "advanced"}
-        # outputs
-        outputFasta: {description: "Fasta file that is co-located with the indexes"}
-        outputFastaFai: {description: "Fasta index file for the outputFasta file."}
-        outputFastaDict: {description: "Sequence dictionary for the outputFasta file."}
-    }
 }
\ No newline at end of file
diff --git a/samtools.wdl b/samtools.wdl
index bee38d1..d5e3ce0 100644
--- a/samtools.wdl
+++ b/samtools.wdl
@@ -66,6 +66,49 @@ task BgzipAndIndex {
     }
 }
 
+task DictAndFaidx {
+    input {
+        File inputFile
+        String javaXmx = "2G"
+        String memory = "3GiB"
+        String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0"
+    }
+
+    String outputFile = basename(inputFile)
+    # Capture .faÂ¸ .fna and .fasta
+    String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict"
+    # This executes both dict and faidx, so indexes are co-located in the same folder.
+    command <<<
+        set -e
+        cp ~{inputFile} ~{outputFile}
+        samtools dict -o ~{outputDict}  ~{outputFile}
+        samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai
+    >>>
+
+    output {
+        File outputFasta = outputFile
+        File outputFastaDict = outputDict
+        File outputFastaFai = outputFile + ".fai"
+    }
+
+    runtime {
+        memory: memory
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        # inputs
+        inputFile: {description: "The input fasta file.", category: "required"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
+        memory: {description: "The amount of memory available to the job.", category: "advanced"}
+        # outputs
+        outputFasta: {description: "Fasta file that is co-located with the indexes"}
+        outputFastaFai: {description: "Fasta index file for the outputFasta file."}
+        outputFastaDict: {description: "Sequence dictionary for the outputFasta file."}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+    }
+}
+
 task Faidx {
     input {
         File inputFile
-- 
GitLab