From f25df1aeb3fb07a166e3a5b3029537da28c25b3a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman <r.h.p.vorderman@lumc.nl> Date: Thu, 6 Feb 2020 14:14:44 +0100 Subject: [PATCH] scatterregions always outputs ordered scatters --- biopet/biopet.wdl | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index f91f93e..4760682 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -254,7 +254,7 @@ task ScatterRegions { # linking does not work. String outputDirPath = "scatters" - command { + command <<< set -e -o pipefail mkdir -p ~{outputDirPath} biopet-scatterregions -Xmx~{javaXmx} \ @@ -264,10 +264,29 @@ task ScatterRegions { ~{"-L " + regions} \ ~{"--bamFile " + bamFile} \ ~{true="--notSplitContigs" false="" notSplitContigs} - } + + # Glob messes with order of scatters (10 comes before 1), which causes + # problems at gatherGvcfs + # Therefore we reorder the scatters with python. + # Copy all the scatter files to the CWD so the output matches paths in + # the cwd. + for file in ~{outputDirPath}/* + do cp $file . + done + python << CODE + import os + scatters = os.listdir("~{outputDirPath}") + splitext = [ x.split(".") for x in scatters] + splitnum = [x.split("-") + [y] for x,y in splitext] + ordered = sorted(splitnum, key=lambda x: int(x[1])) + merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] + for x in merged: + print(x) + CODE + >>> output { - Array[File] scatters = glob(outputDirPath + "/scatter-*.bed") + Array[File] scatters = read_lines(stdout()) } runtime { -- GitLab