From fa1901c451dc3465e94d1b3b36be26a7260203b7 Mon Sep 17 00:00:00 2001 From: DavyCats <davycats.dc@gmail.com> Date: Thu, 14 May 2020 16:47:17 +0200 Subject: [PATCH] adress comments --- CHANGELOG.md | 4 +++- bedtools.wdl | 9 +++++--- biopet/bamstats.wdl | 2 +- biopet/biopet.wdl | 51 +---------------------------------------- biopet/sampleconfig.wdl | 2 +- biowdl.wdl | 2 +- bowtie.wdl | 2 +- bwa.wdl | 2 +- common.wdl | 2 +- gatk.wdl | 24 +++++++++---------- hisat2.wdl | 2 +- picard.wdl | 12 +++++----- rtg.wdl | 4 ++-- star.wdl | 2 +- stringtie.wdl | 2 +- 15 files changed, 39 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05e79ac..540fbbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,10 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ The struct `BowtieIndex` was removed, as it has become obsolete. ++ The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. - The should now more accurately represent actual memory usage (as + They should now more accurately represent actual memory usage (as opposed to virtual memory). + Added `-XX:ParallelGCThreads=1` to the java options of java tasks. + Added `timeMinutes` input to many tasks, this indicates a maximum diff --git a/bedtools.wdl b/bedtools.wdl index a64cef1..c228d6c 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -25,7 +25,7 @@ task Complement { File faidx File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" - String memory = "2G" + String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -66,6 +66,7 @@ task Merge { input { File inputBed String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(inputBed, "M"))}M" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -79,6 +80,7 @@ task Merge { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -86,6 +88,7 @@ task Merge { parameter_meta { inputBed: {description: "The bed to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -97,7 +100,7 @@ task MergeBedFiles { input { Array[File]+ bedFiles String outputBed = "merged.bed" - String memory = "2G" + String memory = "~{512 + ceil(size(bedFiles, "M"))}M" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -174,7 +177,7 @@ task Intersect { # Giving a faidx file will set the sorted option. File? faidx String outputBed = "intersect.bed" - String memory = "2G" + String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index af01bb2..d71355d 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -34,7 +34,7 @@ task Generate { String outputDir Reference? reference - String memory = "10G" + String memory = "9G" String javaXmx = "8G" } diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index b90c5f4..d56ed57 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -104,7 +104,7 @@ task ExtractAdaptersFastqc { Float? adapterCutoff Boolean? outputAsFasta - String memory = "10G" + String memory = "9G" String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" Int timeMinutes = 5 @@ -210,55 +210,6 @@ task FastqSync { } } -task ReorderGlobbedScatters { - input { - Array[File]+ scatters - - # Should not be changed from the main pipeline. As it should not influence results. - # The 3.7-slim container is 143 mb on the filesystem. 3.7 is 927 mb. - # The slim container is sufficient for this small task. - String dockerImage = "python:3.7-slim" - Int timeMinutes = 5 - } - - command <<< - set -e - # Copy all the scatter files to the CWD so the output matches paths in - # the cwd. - for file in ~{sep=" " scatters} - do cp $file . - done - python << CODE - from os.path import basename - scatters = ['~{sep="','" scatters}'] - splitext = [basename(x).split(".") for x in scatters] - splitnum = [x.split("-") + [y] for x,y in splitext] - ordered = sorted(splitnum, key=lambda x: int(x[1])) - merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] - for x in merged: - print(x) - CODE - >>> - - output { - Array[File] reorderedScatters = read_lines(stdout()) - } - - runtime { - docker: dockerImage - time_minutes = timeMinutes - # 4 gigs of memory to be able to build the docker image in singularity - memory: "4G" - } - - parameter_meta { - scatters: {description: "The files which should be ordered.", category: "required"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - task ScatterRegions { input { File referenceFasta diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 50f2631..2b36952 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -34,7 +34,7 @@ task SampleConfig { String? jsonOutputPath String? tsvOutputPath - String memory = "18G" + String memory = "17G" String javaXmx = "16G" } diff --git a/biowdl.wdl b/biowdl.wdl index 7661a59..838755d 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -52,7 +52,7 @@ task InputConverter { } runtime { - memory: "2G" + memory: "128M" time_minutes: timeMinutes docker: dockerImage } diff --git a/bowtie.wdl b/bowtie.wdl index 500afea..b3f3cea 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -38,7 +38,7 @@ task Bowtie { Int threads = 1 Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) - String memory = "10G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" String picardXmx = "4G" # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" diff --git a/bwa.wdl b/bwa.wdl index a39eb3e..01dae9b 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,7 +29,7 @@ task Mem { String? readgroup Int threads = 4 - String memory = "20G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" String picardXmx = "4G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. diff --git a/common.wdl b/common.wdl index 88848df..f832552 100644 --- a/common.wdl +++ b/common.wdl @@ -214,7 +214,7 @@ task YamlToJson { String outputJson = basename(yaml, "\.ya?ml$") + ".json" Int timeMinutes = 1 - String memory = "1G" + String memory = "128M" # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } diff --git a/gatk.wdl b/gatk.wdl index ff30b2b..edafc4d 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -317,7 +317,7 @@ task CollectAllelicCounts { File referenceFastaDict File referenceFastaFai - String memory = "12G" + String memory = "11G" String javaXmx = "10G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -373,8 +373,8 @@ task CollectReadCounts { File referenceFastaFai String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "5G" - String javaXmx = "4G" + String memory = "8G" + String javaXmx = "7G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -557,8 +557,8 @@ task CreateReadCountPanelOfNormals { Array[File]+ readCountsFiles File? annotatedIntervals - String memory = "5G" - String javaXmx = "4G" + String memory = "8G" + String javaXmx = "7G" Int timeMinutes = 5 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason... } @@ -604,7 +604,7 @@ task DenoiseReadCounts { File readCounts String outputPrefix - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -662,7 +662,7 @@ task FilterMutectCalls { Int uniqueAltReadCount = 4 File mutect2Stats - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -771,7 +771,7 @@ task GenomicsDBImport { String genomicsDBWorkspacePath = "genomics_db" String genomicsDBTarFile = "genomics_db.tar.gz" String? tmpDir - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -896,7 +896,7 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1036,7 +1036,7 @@ task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1075,7 +1075,7 @@ task MergeStats { input { Array[File]+ stats - String memory = "16G" + String memory = "15G" String javaXmx = "14G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -1122,7 +1122,7 @@ task ModelSegments { else 30 Int maximumNumberOfSmoothingIterations = 10 - String memory = "12G" + String memory = "11G" String javaXmx = "10G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" diff --git a/hisat2.wdl b/hisat2.wdl index 3ea18ee..7d638f1 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -33,7 +33,7 @@ task Hisat2 { Boolean downstreamTranscriptomeAssembly = true Int threads = 4 - String memory = "48G" + String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G" Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools diff --git a/picard.wdl b/picard.wdl index a63c1ba..9d40163 100644 --- a/picard.wdl +++ b/picard.wdl @@ -84,7 +84,7 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -200,7 +200,7 @@ task CollectRnaSeqMetrics { String basename String strandSpecificity = "NONE" - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -460,7 +460,7 @@ task MarkDuplicates { String outputBamPath String metricsPath - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -581,7 +581,7 @@ task SamToFastq { File inputBamIndex Boolean paired = true - String memory = "18G" + String memory = "17G" String javaXmx = "16G" # High memory default to avoid crashes. String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" File? NONE @@ -653,7 +653,7 @@ task SortVcf { String outputVcfPath File? dict - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -701,7 +701,7 @@ task RenameSample { File inputVcf String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" diff --git a/rtg.wdl b/rtg.wdl index 03a3f5d..104a5ef 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,7 +27,7 @@ task Format { Array[File]+ inputFiles String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" String rtgMem = "8G" - String memory = "10G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(inputFiles) * 2) } @@ -80,7 +80,7 @@ task VcfEval { String outputMode = "split" Int threads = 1 # tool default is number of cores in the system 😱 String rtgMem = "8G" - String memory = "10G" + String memory = "9G" Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } diff --git a/star.wdl b/star.wdl index 7824c76..11fde46 100644 --- a/star.wdl +++ b/star.wdl @@ -101,7 +101,7 @@ task Star { Int? limitBAMsortRAM Int runThreadN = 4 - String memory = "48G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" Int timeMinutes = 1 + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } diff --git a/stringtie.wdl b/stringtie.wdl index f1d994b..5ed62de 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -32,7 +32,7 @@ task Stringtie { String? geneAbundanceFile Int threads = 1 - String memory = "10G" + String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" } -- GitLab