From fa1901c451dc3465e94d1b3b36be26a7260203b7 Mon Sep 17 00:00:00 2001
From: DavyCats <davycats.dc@gmail.com>
Date: Thu, 14 May 2020 16:47:17 +0200
Subject: [PATCH] adress comments

---
 CHANGELOG.md            |  4 +++-
 bedtools.wdl            |  9 +++++---
 biopet/bamstats.wdl     |  2 +-
 biopet/biopet.wdl       | 51 +----------------------------------------
 biopet/sampleconfig.wdl |  2 +-
 biowdl.wdl              |  2 +-
 bowtie.wdl              |  2 +-
 bwa.wdl                 |  2 +-
 common.wdl              |  2 +-
 gatk.wdl                | 24 +++++++++----------
 hisat2.wdl              |  2 +-
 picard.wdl              | 12 +++++-----
 rtg.wdl                 |  4 ++--
 star.wdl                |  2 +-
 stringtie.wdl           |  2 +-
 15 files changed, 39 insertions(+), 83 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 05e79ac..540fbbf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,8 +11,10 @@ that users understand how the changes affect the new version.
 
 version 3.2.0-develop
 ---------------------------
++ The struct `BowtieIndex` was removed, as it has become obsolete.
++ The task `ReorderGlobbedScatters` was removed, as it has become obsolete.
 + Adjusted the memory settings of many tools, especially java tools.
-  The should now more accurately represent actual memory usage (as
+  They should now more accurately represent actual memory usage (as
   opposed to virtual memory).
 + Added `-XX:ParallelGCThreads=1` to the java options of java tasks.
 + Added `timeMinutes` input to many tasks, this indicates a maximum
diff --git a/bedtools.wdl b/bedtools.wdl
index a64cef1..c228d6c 100644
--- a/bedtools.wdl
+++ b/bedtools.wdl
@@ -25,7 +25,7 @@ task Complement {
         File faidx
         File inputBed
         String outputBed = basename(inputBed, "\.bed") + ".complement.bed"
-        String memory = "2G"
+        String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M"
         Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G"))
         String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
     }
@@ -66,6 +66,7 @@ task Merge {
     input {
         File inputBed
         String outputBed = "merged.bed"
+        String memory = "~{512 + ceil(size(inputBed, "M"))}M"
         Int timeMinutes = 1 + ceil(size(inputBed, "G"))
         String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
     }
@@ -79,6 +80,7 @@ task Merge {
     }
 
     runtime {
+        memory: memory
         time_minutes: timeMinutes
         docker: dockerImage
     }
@@ -86,6 +88,7 @@ task Merge {
     parameter_meta {
         inputBed: {description: "The bed to merge.", category: "required"}
         outputBed: {description: "The path to write the output to.", category: "advanced"}
+        memory: {description: "The amount of memory needed for the job.", category: "advanced"}
         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                       category: "advanced"}
@@ -97,7 +100,7 @@ task MergeBedFiles {
     input {
         Array[File]+ bedFiles
         String outputBed = "merged.bed"
-        String memory = "2G"
+        String memory = "~{512 + ceil(size(bedFiles, "M"))}M"
         Int timeMinutes = 1 + ceil(size(bedFiles, "G"))
         String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
     }
@@ -174,7 +177,7 @@ task Intersect {
         # Giving a faidx file will set the sorted option.
         File? faidx
         String outputBed = "intersect.bed"
-        String memory = "2G"
+        String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M"
         Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G"))
         String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
     }
diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl
index af01bb2..d71355d 100644
--- a/biopet/bamstats.wdl
+++ b/biopet/bamstats.wdl
@@ -34,7 +34,7 @@ task Generate {
         String outputDir
         Reference? reference
 
-        String memory = "10G"
+        String memory = "9G"
         String javaXmx = "8G"
     }
 
diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl
index b90c5f4..d56ed57 100644
--- a/biopet/biopet.wdl
+++ b/biopet/biopet.wdl
@@ -104,7 +104,7 @@ task ExtractAdaptersFastqc {
         Float? adapterCutoff
         Boolean? outputAsFasta
 
-        String memory = "10G"
+        String memory = "9G"
         String javaXmx = "8G"
         String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1"
         Int timeMinutes = 5
@@ -210,55 +210,6 @@ task FastqSync {
     }
 }
 
-task ReorderGlobbedScatters {
-    input {
-        Array[File]+ scatters
-
-        # Should not be changed from the main pipeline. As it should not influence results.
-        # The 3.7-slim container is 143 mb on the filesystem. 3.7 is 927 mb.
-        # The slim container is sufficient for this small task.
-        String dockerImage = "python:3.7-slim"
-        Int timeMinutes = 5
-    }
-
-    command <<<
-       set -e
-       # Copy all the scatter files to the CWD so the output matches paths in
-       # the cwd.
-       for file in ~{sep=" " scatters}
-          do cp $file .
-       done
-       python << CODE
-       from os.path import basename
-       scatters = ['~{sep="','" scatters}']
-       splitext = [basename(x).split(".") for x in scatters]
-       splitnum = [x.split("-") + [y] for x,y in splitext]
-       ordered = sorted(splitnum, key=lambda x: int(x[1]))
-       merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered]
-       for x in merged:
-           print(x)
-       CODE
-    >>>
-
-    output {
-        Array[File] reorderedScatters = read_lines(stdout())
-    }
-
-    runtime {
-        docker: dockerImage
-        time_minutes = timeMinutes
-        # 4 gigs of memory to be able to build the docker image in singularity
-        memory: "4G"
-    }
-
-    parameter_meta {
-        scatters: {description: "The files which should be ordered.", category: "required"}
-        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
-        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
-                      category: "advanced"}
-    }
-}
-
 task ScatterRegions {
     input {
         File referenceFasta
diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl
index 50f2631..2b36952 100644
--- a/biopet/sampleconfig.wdl
+++ b/biopet/sampleconfig.wdl
@@ -34,7 +34,7 @@ task SampleConfig {
         String? jsonOutputPath
         String? tsvOutputPath
 
-        String memory = "18G"
+        String memory = "17G"
         String javaXmx = "16G"
     }
 
diff --git a/biowdl.wdl b/biowdl.wdl
index 7661a59..838755d 100644
--- a/biowdl.wdl
+++ b/biowdl.wdl
@@ -52,7 +52,7 @@ task InputConverter {
     }
 
     runtime {
-        memory: "2G"
+        memory: "128M"
         time_minutes: timeMinutes
         docker: dockerImage
     }
diff --git a/bowtie.wdl b/bowtie.wdl
index 500afea..b3f3cea 100644
--- a/bowtie.wdl
+++ b/bowtie.wdl
@@ -38,7 +38,7 @@ task Bowtie {
 
         Int threads = 1
         Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads)
-        String memory = "10G"
+        String memory = "~{5 + ceil(size(indexFiles, "G"))}G"
         String picardXmx = "4G"
         # Image contains bowtie=1.2.2 and picard=2.9.2
         String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0"
diff --git a/bwa.wdl b/bwa.wdl
index a39eb3e..01dae9b 100644
--- a/bwa.wdl
+++ b/bwa.wdl
@@ -29,7 +29,7 @@ task Mem {
         String? readgroup
 
         Int threads = 4
-        String memory = "20G"
+        String memory = "~{5 + ceil(size(indexFiles, "G"))}G"
         String picardXmx = "4G"
         Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads)
         # A mulled container is needed to have both picard and bwa in one container.
diff --git a/common.wdl b/common.wdl
index 88848df..f832552 100644
--- a/common.wdl
+++ b/common.wdl
@@ -214,7 +214,7 @@ task YamlToJson {
         String outputJson = basename(yaml, "\.ya?ml$") + ".json"
 
         Int timeMinutes = 1
-        String  memory = "1G"
+        String  memory = "128M"
         # biowdl-input-converter has python and pyyaml.
         String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0"
     }
diff --git a/gatk.wdl b/gatk.wdl
index ff30b2b..edafc4d 100644
--- a/gatk.wdl
+++ b/gatk.wdl
@@ -317,7 +317,7 @@ task CollectAllelicCounts {
         File referenceFastaDict
         File referenceFastaFai
 
-        String memory = "12G"
+        String memory = "11G"
         String javaXmx = "10G"
         Int timeMinutes = 120
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
@@ -373,8 +373,8 @@ task CollectReadCounts {
         File referenceFastaFai
         String intervalMergingRule = "OVERLAPPING_ONLY"
 
-        String memory = "5G"
-        String javaXmx = "4G"
+        String memory = "8G"
+        String javaXmx = "7G"
         Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5)
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
     }
@@ -557,8 +557,8 @@ task CreateReadCountPanelOfNormals {
         Array[File]+ readCountsFiles
         File? annotatedIntervals
 
-        String memory = "5G"
-        String javaXmx = "4G"
+        String memory = "8G"
+        String javaXmx = "7G"
         Int timeMinutes = 5
         String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason...
     }
@@ -604,7 +604,7 @@ task DenoiseReadCounts {
         File readCounts
         String outputPrefix
 
-        String memory = "6G"
+        String memory = "5G"
         String javaXmx = "4G"
         Int timeMinutes = 5
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
@@ -662,7 +662,7 @@ task FilterMutectCalls {
         Int uniqueAltReadCount = 4
         File mutect2Stats
 
-        String memory = "14G"
+        String memory = "13G"
         String javaXmx = "12G"
         Int timeMinutes = 60
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
@@ -771,7 +771,7 @@ task GenomicsDBImport {
         String genomicsDBWorkspacePath = "genomics_db"
         String genomicsDBTarFile = "genomics_db.tar.gz"
         String? tmpDir
-        String memory = "6G"
+        String memory = "5G"
         String javaXmx = "4G"
         Int timeMinutes = 180
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
@@ -896,7 +896,7 @@ task GetPileupSummaries {
         File sitesForContaminationIndex
         String outputPrefix
 
-        String memory = "14G"
+        String memory = "13G"
         String javaXmx = "12G"
         Int timeMinutes = 120
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
@@ -1036,7 +1036,7 @@ task LearnReadOrientationModel {
     input {
         Array[File]+ f1r2TarGz
 
-        String memory = "14G"
+        String memory = "13G"
         String javaXmx = "12G"
         Int timeMinutes = 120
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
@@ -1075,7 +1075,7 @@ task MergeStats {
     input {
         Array[File]+ stats
 
-        String memory = "16G"
+        String memory = "15G"
         String javaXmx = "14G"
         Int timeMinutes = 30
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
@@ -1122,7 +1122,7 @@ task ModelSegments {
             else 30
         Int maximumNumberOfSmoothingIterations = 10
 
-        String memory = "12G"
+        String memory = "11G"
         String javaXmx = "10G"
         Int timeMinutes = 60
         String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
diff --git a/hisat2.wdl b/hisat2.wdl
index 3ea18ee..7d638f1 100644
--- a/hisat2.wdl
+++ b/hisat2.wdl
@@ -33,7 +33,7 @@ task Hisat2 {
         Boolean downstreamTranscriptomeAssembly = true
 
         Int threads = 4
-        String memory = "48G"
+        String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G"
         Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads)
         # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1
         # is a combination of hisat2 and samtools
diff --git a/picard.wdl b/picard.wdl
index a63c1ba..9d40163 100644
--- a/picard.wdl
+++ b/picard.wdl
@@ -84,7 +84,7 @@ task CollectMultipleMetrics {
         Boolean collectSequencingArtifactMetrics = true
         Boolean collectQualityYieldMetrics = true
 
-        String memory = "10G"
+        String memory = "9G"
         String javaXmx = "8G"
         Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6)
         String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
@@ -200,7 +200,7 @@ task CollectRnaSeqMetrics {
         String basename
         String strandSpecificity = "NONE"
 
-        String memory = "10G"
+        String memory = "9G"
         String javaXmx =  "8G"
         Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6)
         String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
@@ -460,7 +460,7 @@ task MarkDuplicates {
         String outputBamPath
         String metricsPath
 
-        String memory = "10G"
+        String memory = "9G"
         String javaXmx = "8G"
         Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8)
         String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
@@ -581,7 +581,7 @@ task SamToFastq {
         File inputBamIndex
         Boolean paired = true
 
-        String memory = "18G"
+        String memory = "17G"
         String javaXmx = "16G" # High memory default to avoid crashes.
         String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
         File? NONE
@@ -653,7 +653,7 @@ task SortVcf {
         String outputVcfPath
         File? dict
 
-        String memory = "10G"
+        String memory = "9G"
         String javaXmx = "8G"
         Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5)
         String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
@@ -701,7 +701,7 @@ task RenameSample {
         File inputVcf
         String outputPath = "./picard/renamed.vcf"
         String newSampleName
-        String memory = "10G"
+        String memory = "9G"
         String javaXmx = "8G"
         Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2)
         String dockerImage = "quay.io/biocontainers/picard:2.19.0--0"
diff --git a/rtg.wdl b/rtg.wdl
index 03a3f5d..104a5ef 100644
--- a/rtg.wdl
+++ b/rtg.wdl
@@ -27,7 +27,7 @@ task Format {
         Array[File]+ inputFiles
         String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0"
         String rtgMem = "8G"
-        String memory = "10G"
+        String memory = "9G"
         Int timeMinutes = 1 + ceil(size(inputFiles) * 2)
     }
 
@@ -80,7 +80,7 @@ task VcfEval {
         String outputMode = "split"
         Int threads = 1  # tool default is number of cores in the system 😱
         String rtgMem = "8G"
-        String memory = "10G"
+        String memory = "9G"
         Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5)
         String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0"
     }
diff --git a/star.wdl b/star.wdl
index 7824c76..11fde46 100644
--- a/star.wdl
+++ b/star.wdl
@@ -101,7 +101,7 @@ task Star {
         Int? limitBAMsortRAM
 
         Int runThreadN = 4
-        String memory = "48G"
+        String memory = "~{5 + ceil(size(indexFiles, "G"))}G"
         Int timeMinutes = 1 + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN)
         String dockerImage = "quay.io/biocontainers/star:2.7.3a--0"
     }
diff --git a/stringtie.wdl b/stringtie.wdl
index f1d994b..5ed62de 100644
--- a/stringtie.wdl
+++ b/stringtie.wdl
@@ -32,7 +32,7 @@ task Stringtie {
         String? geneAbundanceFile
 
         Int threads = 1
-        String memory = "10G"
+        String memory = "2G"
         Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads)
         String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0"
     }
-- 
GitLab