From 93240a92ad1b5c171698b7fc00bb9ae0e4e54c14 Mon Sep 17 00:00:00 2001 From: DavyCats <davycats.dc@gmail.com> Date: Tue, 3 Apr 2018 16:20:29 +0200 Subject: [PATCH] add runtime settings --- biopet.wdl | 4 ++++ bwa.wdl | 10 +++++++++- common.wdl | 48 ++++++++++++++++++++++++++++++++++++++++++------ fastqc.wdl | 6 ++++++ gatk.wdl | 32 ++++++++++++++++++++++++++++++++ htseq.wdl | 4 ++++ mergecounts.wdl | 4 ++++ picard.wdl | 20 ++++++++++++++++++++ star.wdl | 3 ++- stringtie.wdl | 2 +- 10 files changed, 124 insertions(+), 9 deletions(-) diff --git a/biopet.wdl b/biopet.wdl index f2ca0a0..8bdf7fb 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -134,4 +134,8 @@ task BaseCounter { File transcriptIntronicSense = outputDir + "/" + prefix + ".base.transcript.intronic.sense.counts" File transcriptSense = outputDir + "/" + prefix + ".base.transcript.sense.counts" } + + runtime { + memory: 16 + } } diff --git a/bwa.wdl b/bwa.wdl index 6f316d2..774f932 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -6,15 +6,23 @@ task BwaMem { String outputPath String? readgroup + Int? threads + Int? memory + command { set -e -o pipefail mkdir -p $(dirname ${outputPath}) ${preCommand} - bwa mem ${"-R '" + readgroup + "'"} \ + bwa mem ${"-t " + threads} \ + ${"-R '" + readgroup + "'"} \ ${referenceFasta} ${inputR1} ${inputR2} | samtools sort --output-fmt BAM - > ${outputPath} } output { File bamFile = outputPath } + runtime{ + threads: select_first([threads]) + memory: if defined(memory) then memory else 8 + } } diff --git a/common.wdl b/common.wdl index 97731d4..a780747 100644 --- a/common.wdl +++ b/common.wdl @@ -1,31 +1,49 @@ task objectMd5 { Object the_object + command { cat ${write_object(the_object)} | md5sum - | sed -e 's/ -//' } + output { String md5sum = read_string(stdout()) } + + runtime { + memory: 1 + } } task mapMd5 { Map[String,String] map + command { - cat ${write_map(map)} | md5sum - | sed -e 's/ -//' + cat ${write_map(map)} | md5sum - | sed -e 's/ -//' } + output { String md5sum = read_string(stdout()) } + + runtime { + memory: 1 + } } task stringArrayMd5 { Array[String] stringArray + command { - set -eu -o pipefail - echo ${sep=',' stringArray} | md5sum - | sed -e 's/ -//' + set -eu -o pipefail + echo ${sep=',' stringArray} | md5sum - | sed -e 's/ -//' } + output { - String md5sum = read_string(stdout()) + String md5sum = read_string(stdout()) + } + + runtime { + memory: 1 } } @@ -33,37 +51,55 @@ task concatenateTextFiles { Array[File] fileList String combinedFilePath Boolean? unzip=false + command { mkdir -p ${combinedFilePath} rm -d ${combinedFilePath} ${true='zcat' false= 'cat' unzip} ${sep=' ' fileList} \ > ${combinedFilePath} } + output { File combinedFile = combinedFilePath } + + runtime { + memory: 1 + } } # inspired by https://gatkforums.broadinstitute.org/wdl/discussion/9616/is-there-a-way-to-flatten-arrays task flattenStringArray { Array[Array[String]] arrayList + command { - for line in $(echo ${sep=', ' arrayList}) ; \ - do echo $line | tr -d '"[],' ; done + for line in $(echo ${sep=', ' arrayList}) ; \ + do echo $line | tr -d '"[],' ; done } + output { Array[String] flattenedArray = read_lines(stdout()) } + + runtime { + memory: 1 + } } task appendToStringArray { Array[String] array String string + command { echo "${sep='\n' array} ${string}" } + output { Array[String] out_array = read_lines(stdout()) } + + runtime { + memory: 1 + } } \ No newline at end of file diff --git a/fastqc.wdl b/fastqc.wdl index cdbda5e..515e9a6 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -87,15 +87,21 @@ task extractAdapters { task getConfiguration { String? preCommand String? fastqcDirFile = "fastqcDir.txt" + command { set -e -o pipefail ${preCommand} echo $(dirname $(readlink -f $(which fastqc))) > ${fastqcDirFile} } + output { String fastqcDir = read_string(fastqcDirFile) File adapterList = fastqcDir + "/Configuration/adapter_list.txt" File contaminantList = fastqcDir + "/Configuration/contaminant_list.txt" File limits = fastqcDir + "/Configuration/limits.txt" } + + runtime { + memory: 1 + } } \ No newline at end of file diff --git a/gatk.wdl b/gatk.wdl index 6a3d160..6edac93 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -24,9 +24,14 @@ task BaseRecalibrator { --known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \ -L ${sep=" -L " sequence_group_interval} } + output { File recalibration_report = "${recalibration_report_filename}" } + + runtime { + memory: 6 + } } # Apply Base Quality Score Recalibration (BQSR) model @@ -57,10 +62,15 @@ task ApplyBQSR { --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \ -L ${sep=" -L " sequence_group_interval} } + output { File recalibrated_bam = "${output_bam_path}" File recalibrated_bam_checksum = "${output_bam_path}.md5" } + + runtime { + memory: 6 + } } # Combine multiple recalibration tables from scattered BaseRecalibrator runs @@ -78,9 +88,14 @@ task GatherBqsrReports { -I ${sep=' -I ' input_bqsr_reports} \ -O ${output_report_filepath} } + output { File output_bqsr_report = "${output_report_filepath}" } + + runtime { + memory: 4 + } } # Call variants on a single sample with HaplotypeCaller to produce a GVCF @@ -109,10 +124,15 @@ task HaplotypeCallerGvcf { -contamination ${default=0 contamination} \ -ERC GVCF } + output { File output_gvcf = "${gvcf_basename}.vcf.gz" File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi" } + + runtime { + memory: 6 + } } task GenotypeGVCFs { @@ -154,6 +174,10 @@ task GenotypeGVCFs { File output_vcf = output_basename + ".vcf.gz" File output_vcf_index = output_basename + ".vcf.gz.tbi" } + + runtime{ + memory: 6 + } } task CombineGVCFs { @@ -193,6 +217,10 @@ task CombineGVCFs { File output_gvcf = output_basename + ".vcf.gz" File output_gvcf_index = output_basename + ".vcf.gz.tbi" } + + runtime { + memory: 6 + } } task SplitNCigarReads { @@ -221,4 +249,8 @@ task SplitNCigarReads { File bam = output_bam File bam_index = output_bam + ".bai" } + + runtime { + memory: 6 + } } diff --git a/htseq.wdl b/htseq.wdl index b740b4f..62091ff 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -22,4 +22,8 @@ task HTSeqCount { output { File counts = outputTable } + + runtime { + memory: 3 + } } \ No newline at end of file diff --git a/mergecounts.wdl b/mergecounts.wdl index ed5d5e7..c2373f7 100644 --- a/mergecounts.wdl +++ b/mergecounts.wdl @@ -32,4 +32,8 @@ task MergeCounts { output { File mergedCounts = outputFile } + + runtime { + memory: 4 + (2*length(inputFiles)) + } } \ No newline at end of file diff --git a/picard.wdl b/picard.wdl index 4c6e167..24e8a51 100644 --- a/picard.wdl +++ b/picard.wdl @@ -17,10 +17,15 @@ task ScatterIntervalList { INPUT=${interval_list} \ OUTPUT=scatter_list } + output { Array[File] out = glob("scatter_list/*/*.interval_list") Int interval_count = read_int(stdout()) } + + runtime { + memory: 6 + } } # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs @@ -41,11 +46,16 @@ task GatherBamFiles { CREATE_INDEX=true \ CREATE_MD5_FILE=true } + output { File output_bam = "${output_bam_path}" File output_bam_index = sub(output_bam_path, ".bam$", ".bai") File output_bam_md5 = "${output_bam_path}.md5" } + + runtime { + memory: 6 + } } # Mark duplicate reads to avoid counting non-independent observations @@ -81,11 +91,16 @@ task MarkDuplicates { CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false } + output { File output_bam = output_bam_path File output_bam_index = sub(output_bam_path, ".bam$", ".bai") File duplicate_metrics = metrics_path } + + runtime { + memory: 6 + } } # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs @@ -107,8 +122,13 @@ task MergeVCFs { INPUT=${sep=' INPUT=' input_vcfs} \ OUTPUT=${output_vcf_path} } + output { File output_vcf = output_vcf_path File output_vcf_index = output_vcf_path + ".tbi" } + + runtime { + memory: 6 + } } \ No newline at end of file diff --git a/star.wdl b/star.wdl index a8e2453..1ba1545 100644 --- a/star.wdl +++ b/star.wdl @@ -37,6 +37,7 @@ task Star { } runtime { - threads: runThreadN + threads: select_first([runThreadN]) + memory: 10 } } \ No newline at end of file diff --git a/stringtie.wdl b/stringtie.wdl index 33118d3..2f20133 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -28,6 +28,6 @@ task Stringtie { } runtime { - threads: threads + threads: select_first([threads]) } } \ No newline at end of file -- GitLab