From f25df1aeb3fb07a166e3a5b3029537da28c25b3a Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Thu, 6 Feb 2020 14:14:44 +0100
Subject: [PATCH] scatterregions always outputs ordered scatters

---
 biopet/biopet.wdl | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl
index f91f93e..4760682 100644
--- a/biopet/biopet.wdl
+++ b/biopet/biopet.wdl
@@ -254,7 +254,7 @@ task ScatterRegions {
     # linking does not work.
     String outputDirPath = "scatters"
 
-    command {
+    command <<<
         set -e -o pipefail
         mkdir -p ~{outputDirPath}
         biopet-scatterregions -Xmx~{javaXmx} \
@@ -264,10 +264,29 @@ task ScatterRegions {
           ~{"-L " + regions} \
           ~{"--bamFile " + bamFile} \
           ~{true="--notSplitContigs" false="" notSplitContigs}
-    }
+
+        # Glob messes with order of scatters (10 comes before 1), which causes
+        # problems at gatherGvcfs
+        # Therefore we reorder the scatters with python.
+        # Copy all the scatter files to the CWD so the output matches paths in
+        # the cwd.
+        for file in ~{outputDirPath}/*
+          do cp $file .
+        done
+        python << CODE
+        import os
+        scatters = os.listdir("~{outputDirPath}")
+        splitext = [ x.split(".") for x in scatters]
+        splitnum = [x.split("-") + [y] for x,y in splitext]
+        ordered = sorted(splitnum, key=lambda x: int(x[1]))
+        merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered]
+        for x in merged:
+          print(x)
+        CODE
+    >>>
 
     output {
-        Array[File] scatters = glob(outputDirPath + "/scatter-*.bed")
+        Array[File] scatters =  read_lines(stdout())
     }
 
     runtime {
-- 
GitLab