diff --git a/biopet.wdl b/biopet.wdl index f2ca0a02aab35d6b60b9630bbe3b637117543759..89293a9a5cbffd680e72dcf4ccfe397f397bd159 100644 --- a/biopet.wdl +++ b/biopet.wdl @@ -32,11 +32,15 @@ task ScatterRegions { Int? scatterSize File? regions + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} mkdir -p ${outputDirPath} - java -Xmx2G -jar ${tool_jar} \ + java -Xmx${mem}G -jar ${tool_jar} \ -R ${ref_fasta} \ -o ${outputDirPath} \ ${"-s " + scatterSize} \ @@ -46,6 +50,10 @@ task ScatterRegions { output { Array[File] scatters = glob(outputDirPath + "/scatter-*.bed") } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 2.0])) + } } task SampleConfig { @@ -58,11 +66,15 @@ task SampleConfig { String? jsonOutputPath String? tsvOutputPath + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - mkdir -p . $(dirname ${jsonOutputPath}) $(dirname ${tsvOutputPath}) - java -jar ${tool_jar} \ + mkdir -p . ${"$(dirname " + jsonOutputPath + ")"} ${"$(dirname " + tsvOutputPath + ")"} + java -Xmx${mem}G -jar ${tool_jar} \ -i ${sep="-i " inputFiles} \ ${"--sample " + sample} \ ${"--library " + library} \ @@ -77,6 +89,10 @@ task SampleConfig { File? tsvOutput = tsvOutputPath Object values = if (defined(tsvOutput) && size(tsvOutput) > 0) then read_map(tsvOutput) else { "": "" } } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 2.0])) + } } task BaseCounter { @@ -87,11 +103,15 @@ task BaseCounter { String outputDir String prefix + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 12.0])) command { set -e -o pipefail ${preCommand} mkdir -p ${outputDir} - java -jar ${tool_jar} \ + java -Xmx${mem}G -jar ${tool_jar} \ -b ${bam} \ -r ${refFlat} \ -o ${outputDir} \ @@ -134,4 +154,8 @@ task BaseCounter { File transcriptIntronicSense = outputDir + "/" + prefix + ".base.transcript.intronic.sense.counts" File transcriptSense = outputDir + "/" + prefix + ".base.transcript.sense.counts" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } diff --git a/bwa.wdl b/bwa.wdl index 6f316d299251ac15cf733f3e210811f412c3f37b..0a8b37fa8ad3fec1d7d3400f6e9b4b5d342dba0a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -6,15 +6,23 @@ task BwaMem { String outputPath String? readgroup + Int? threads + Int? memory + command { set -e -o pipefail mkdir -p $(dirname ${outputPath}) ${preCommand} - bwa mem ${"-R '" + readgroup + "'"} \ + bwa mem ${"-t " + threads} \ + ${"-R '" + readgroup + "'"} \ ${referenceFasta} ${inputR1} ${inputR2} | samtools sort --output-fmt BAM - > ${outputPath} } output { File bamFile = outputPath } + runtime{ + cpu: if defined(threads) then threads else 1 + memory: if defined(memory) then memory else 8 + } } diff --git a/common.wdl b/common.wdl index 97731d40938e255cdac7f77da92fb398b3f6b716..79be3870be1f1d4d932564b42a8263bce1be4ba9 100644 --- a/common.wdl +++ b/common.wdl @@ -1,31 +1,49 @@ task objectMd5 { Object the_object + command { cat ${write_object(the_object)} | md5sum - | sed -e 's/ -//' } + output { String md5sum = read_string(stdout()) } + + runtime { + memory: 1 + } } task mapMd5 { Map[String,String] map + command { - cat ${write_map(map)} | md5sum - | sed -e 's/ -//' + cat ${write_map(map)} | md5sum - | sed -e 's/ -//' } + output { String md5sum = read_string(stdout()) } + + runtime { + memory: 1 + } } task stringArrayMd5 { Array[String] stringArray + command { - set -eu -o pipefail - echo ${sep=',' stringArray} | md5sum - | sed -e 's/ -//' + set -eu -o pipefail + echo ${sep=',' stringArray} | md5sum - | sed -e 's/ -//' } + output { - String md5sum = read_string(stdout()) + String md5sum = read_string(stdout()) + } + + runtime { + memory: 1 } } @@ -33,37 +51,68 @@ task concatenateTextFiles { Array[File] fileList String combinedFilePath Boolean? unzip=false + command { mkdir -p ${combinedFilePath} rm -d ${combinedFilePath} ${true='zcat' false= 'cat' unzip} ${sep=' ' fileList} \ > ${combinedFilePath} } + output { File combinedFile = combinedFilePath } + + runtime { + memory: 1 + } } # inspired by https://gatkforums.broadinstitute.org/wdl/discussion/9616/is-there-a-way-to-flatten-arrays task flattenStringArray { Array[Array[String]] arrayList + command { - for line in $(echo ${sep=', ' arrayList}) ; \ - do echo $line | tr -d '"[],' ; done + for line in $(echo ${sep=', ' arrayList}) ; \ + do echo $line | tr -d '"[],' ; done } + output { Array[String] flattenedArray = read_lines(stdout()) } + + runtime { + memory: 1 + } } task appendToStringArray { Array[String] array String string + command { echo "${sep='\n' array} ${string}" } + output { Array[String] out_array = read_lines(stdout()) } + + runtime { + memory: 1 + } +} + +task createLink { + File inputFile + String outputPath + + command { + ln -sf ${inputFile} ${outputPath} + } + + output { + File link = outputPath + } } \ No newline at end of file diff --git a/fastqc.wdl b/fastqc.wdl index cdbda5ef48548ffacf6a8024806c1465ae1a7c7b..72b4a538358a5fac7c27f6d004ff7a9a026fe4e8 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -62,10 +62,15 @@ task extractAdapters { File? knownAdapterFile Float? adapterCutoff Boolean? outputAsFasta + + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e mkdir -p ${outputDir} - java -jar ${extractAdaptersFastqcJar} \ + java -Xmx${mem}G -jar ${extractAdaptersFastqcJar} \ --inputFile ${inputFile} \ ${"--adapterOutputFile " + adapterOutputFilePath } \ ${"--contamsOutputFile " + contamsOutputFilePath } \ @@ -82,20 +87,30 @@ task extractAdapters { Array[String] adapterList = read_lines(select_first([adapterOutputFilePath])) Array[String] contamsList = read_lines(select_first([contamsOutputFilePath])) } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 2.5])) + } } task getConfiguration { String? preCommand String? fastqcDirFile = "fastqcDir.txt" + command { set -e -o pipefail ${preCommand} echo $(dirname $(readlink -f $(which fastqc))) > ${fastqcDirFile} } + output { String fastqcDir = read_string(fastqcDirFile) File adapterList = fastqcDir + "/Configuration/adapter_list.txt" File contaminantList = fastqcDir + "/Configuration/contaminant_list.txt" File limits = fastqcDir + "/Configuration/limits.txt" } + + runtime { + memory: 1 + } } \ No newline at end of file diff --git a/gatk.wdl b/gatk.wdl index 6a3d160a7d8741931858391f8e3acef813a529d8..160849ad00e3d849bfb26a44ce717b73e2c4918f 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -12,10 +12,14 @@ task BaseRecalibrator { File ref_fasta File ref_fasta_index + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - java -Xms4G -jar ${gatk_jar} \ + java -Xms${mem}G -jar ${gatk_jar} \ BaseRecalibrator \ -R ${ref_fasta} \ -I ${input_bam} \ @@ -24,9 +28,14 @@ task BaseRecalibrator { --known-sites ${sep=" --known-sites " known_indels_sites_VCFs} \ -L ${sep=" -L " sequence_group_interval} } + output { File recalibration_report = "${recalibration_report_filename}" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } # Apply Base Quality Score Recalibration (BQSR) model @@ -42,10 +51,15 @@ task ApplyBQSR { File ref_fasta_index Int? compression_level + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} -Xms4G -jar ${gatk_jar} \ + java ${"-Dsamjdk.compression_level=" + compression_level} \ + -Xms${mem}G -jar ${gatk_jar} \ ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ @@ -57,10 +71,15 @@ task ApplyBQSR { --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \ -L ${sep=" -L " sequence_group_interval} } + output { File recalibrated_bam = "${output_bam_path}" File recalibrated_bam_checksum = "${output_bam_path}.md5" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } # Combine multiple recalibration tables from scattered BaseRecalibrator runs @@ -70,17 +89,26 @@ task GatherBqsrReports { Array[File] input_bqsr_reports String output_report_filepath + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - java -Xms3G -jar ${gatk_jar} \ + java -Xms${mem}G -jar ${gatk_jar} \ GatherBQSRReports \ -I ${sep=' -I ' input_bqsr_reports} \ -O ${output_report_filepath} } + output { File output_bqsr_report = "${output_report_filepath}" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } # Call variants on a single sample with HaplotypeCaller to produce a GVCF @@ -97,10 +125,15 @@ task HaplotypeCallerGvcf { Int? compression_level String gatk_jar + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} -Xmx4G -jar ${gatk_jar} \ + java ${"-Dsamjdk.compression_level=" + compression_level} \ + -Xmx${mem}G -jar ${gatk_jar} \ HaplotypeCaller \ -R ${ref_fasta} \ -O ${gvcf_basename}.vcf.gz \ @@ -109,10 +142,15 @@ task HaplotypeCallerGvcf { -contamination ${default=0 contamination} \ -ERC GVCF } + output { File output_gvcf = "${gvcf_basename}.vcf.gz" File output_gvcf_index = "${gvcf_basename}.vcf.gz.tbi" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } task GenotypeGVCFs { @@ -133,12 +171,16 @@ task GenotypeGVCFs { File dbsnp_vcf_index Int? compression_level + Float? memory + Float? memoryMultiplier + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} -Xmx4G -jar ${gatk_jar} \ + java ${"-Dsamjdk.compression_level=" + compression_level} \ + -Xmx${mem}G -jar ${gatk_jar} \ GenotypeGVCFs \ -R ${ref_fasta} \ -O ${output_basename + ".vcf.gz"} \ @@ -154,6 +196,10 @@ task GenotypeGVCFs { File output_vcf = output_basename + ".vcf.gz" File output_vcf_index = output_basename + ".vcf.gz.tbi" } + + runtime{ + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } task CombineGVCFs { @@ -171,13 +217,17 @@ task CombineGVCFs { File ref_dict Int? compression_level + Float? memory + Float? memoryMultiplier + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} if [ ${length(gvcf_files)} -gt 1 ]; then - java ${"-Dsamjdk.compression_level=" + compression_level} -Xmx4G -jar ${gatk_jar} \ + java ${"-Dsamjdk.compression_level=" + compression_level} \ + -Xmx${mem}G -jar ${gatk_jar} \ CombineGVCFs \ -R ${ref_fasta} \ -O ${output_basename + ".vcf.gz"} \ @@ -193,6 +243,10 @@ task CombineGVCFs { File output_gvcf = output_basename + ".vcf.gz" File output_gvcf_index = output_basename + ".vcf.gz.tbi" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } task SplitNCigarReads { @@ -206,11 +260,15 @@ task SplitNCigarReads { String gatk_jar Array[File]+ intervals + Float? memory + Float? memoryMultiplier + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - java -Xms4G -jar ${gatk_jar} \ + java -Xms${mem}G -jar ${gatk_jar} \ + SplitNCigarReads \ -I ${input_bam} \ -R ${ref_fasta} \ -O ${output_bam} # might have to be -o depending on GATK version \ @@ -221,4 +279,8 @@ task SplitNCigarReads { File bam = output_bam File bam_index = output_bam + ".bai" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } diff --git a/htseq.wdl b/htseq.wdl index b740b4f0a0fb90c70eac43baa8d9688e9422327c..6376e3ebeac324848bc20fe2a73f6be9fa6a13b2 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -1,12 +1,14 @@ task HTSeqCount { String? preCommand Array[File] alignmentFiles - File gffFile + File gtfFile String outputTable String? format String? order String? stranded + Int? memory + command { set -e -o pipefail ${preCommand} @@ -15,11 +17,15 @@ task HTSeqCount { -r ${default="pos" order} \ -s ${default="no" stranded} \ ${sep=" " alignmentFiles} \ - ${gffFile} \ + ${gtfFile} \ > ${outputTable} } output { File counts = outputTable } + + runtime { + memory: select_first([memory, 3]) + } } \ No newline at end of file diff --git a/mergecounts.wdl b/mergecounts.wdl index ed5d5e75d2ac7ce60fd0b836d01ac7a34e57dd58..c2373f7f02596607f1c9999ecfb35ff49aa6c43a 100644 --- a/mergecounts.wdl +++ b/mergecounts.wdl @@ -32,4 +32,8 @@ task MergeCounts { output { File mergedCounts = outputFile } + + runtime { + memory: 4 + (2*length(inputFiles)) + } } \ No newline at end of file diff --git a/picard.wdl b/picard.wdl index 4c6e167b11b54c8e5d917ae67d5d2e0d5bb9e667..104261816f42dea6126dc5c645ab7871e618fe1e 100644 --- a/picard.wdl +++ b/picard.wdl @@ -4,11 +4,15 @@ task ScatterIntervalList { Int scatter_count String picard_jar + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} mkdir scatter_list - java -Xmx4G -jar ${picard_jar} \ + java -Xmx${mem}G -jar ${picard_jar} \ IntervalListTools \ SCATTER_COUNT=${scatter_count} \ SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ @@ -17,10 +21,15 @@ task ScatterIntervalList { INPUT=${interval_list} \ OUTPUT=scatter_list } + output { Array[File] out = glob("scatter_list/*/*.interval_list") Int interval_count = read_int(stdout()) } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs @@ -31,21 +40,31 @@ task GatherBamFiles { Int? compression_level String picard_jar + Float? memory + Float? memoryMultiplier + + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} -Xmx4G -jar ${picard_jar} \ + java ${"-Dsamjdk.compression_level=" + compression_level} \ + -Xmx${mem}G -jar ${picard_jar} \ GatherBamFiles \ INPUT=${sep=' INPUT=' input_bams} \ OUTPUT=${output_bam_path} \ CREATE_INDEX=true \ CREATE_MD5_FILE=true } + output { File output_bam = "${output_bam_path}" File output_bam_index = sub(output_bam_path, ".bam$", ".bai") File output_bam_md5 = "${output_bam_path}.md5" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } # Mark duplicate reads to avoid counting non-independent observations @@ -57,6 +76,9 @@ task MarkDuplicates { Int? compression_level String picard_jar + Float? memory + Float? memoryMultiplier + # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection # This can be desirable if you don't mind the estimated library size being wrong and optical duplicate detection is taking >7 days and failing @@ -65,11 +87,13 @@ task MarkDuplicates { # Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly # This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment. # While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname" + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} mkdir -p $(dirname ${output_bam_path}) - java ${"-Dsamjdk.compression_level=" + compression_level} -Xmx4G -jar ${picard_jar} \ + java ${"-Dsamjdk.compression_level=" + compression_level} \ + -Xmx${mem}G -jar ${picard_jar} \ MarkDuplicates \ INPUT=${sep=' INPUT=' input_bams} \ OUTPUT=${output_bam_path} \ @@ -81,11 +105,16 @@ task MarkDuplicates { CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false } + output { File output_bam = output_bam_path File output_bam_index = sub(output_bam_path, ".bam$", ".bai") File duplicate_metrics = metrics_path } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs @@ -97,18 +126,28 @@ task MergeVCFs { Int? compression_level String picard_jar + Float? memory + Float? memoryMultiplier + # Using MergeVcfs instead of GatherVcfs so we can create indices # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket + Int mem = ceil(select_first([memory, 4.0])) command { set -e -o pipefail ${preCommand} - java ${"-Dsamjdk.compression_level=" + compression_level} -Xmx4G -jar ${picard_jar} \ + java ${"-Dsamjdk.compression_level=" + compression_level} \ + -Xmx${mem}G -jar ${picard_jar} \ MergeVcfs \ INPUT=${sep=' INPUT=' input_vcfs} \ OUTPUT=${output_vcf_path} } + output { File output_vcf = output_vcf_path File output_vcf_index = output_vcf_path + ".tbi" } + + runtime { + memory: ceil(mem * select_first([memoryMultiplier, 1.5])) + } } \ No newline at end of file diff --git a/samtools.wdl b/samtools.wdl index 4dd5e296a9e517eb310690feae0cff3638c30f49..249143ffa7d4215650b6b8dce6d0b2b216548d2b 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -21,7 +21,12 @@ task Merge { command { set -e -o pipefail ${preCommand} - samtools merge ${outputBamPath} ${sep=' ' bamFiles} + if [ ${length(bamFiles)} -gt 1 ] + then + samtools merge ${outputBamPath} ${sep=' ' bamFiles} + else + ln -sf ${bamFiles} ${outputBamPath} + fi } output { diff --git a/star.wdl b/star.wdl index a8e2453800a756a127738dd122f1e804e3236b4c..d7d3b7b595953704ab0de936b82e1ba7405fe279 100644 --- a/star.wdl +++ b/star.wdl @@ -2,7 +2,7 @@ task Star { String? preCommand Array[File] inputR1 - Array[File]? inputR2 + Array[File?] inputR2 String genomeDir String outFileNamePrefix @@ -13,9 +13,14 @@ task Star { String? twopassMode Array[String]? outSAMattrRGline + Int? memory + #TODO needs to be extended for all possible output extensions Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} + # converts String? to String for use as key (for the Map above) in output + String key = select_first([outSAMtype, "BAM SortedByCoordinate"]) + command { set -e -o pipefail mkdir -p ${sub(outFileNamePrefix, basename(outFileNamePrefix) + "$", "")} @@ -33,10 +38,11 @@ task Star { } output { - File bamFile = outFileNamePrefix + "Aligned." + samOutputNames["${outSAMtype}"] + File bamFile = outFileNamePrefix + "Aligned." + samOutputNames[key] } runtime { - threads: runThreadN + cpu: select_first([runThreadN, 1]) + memory: select_first([memory, 10]) } } \ No newline at end of file diff --git a/stringtie.wdl b/stringtie.wdl index 33118d3ed7a3384658e160c7a846bc2547fae41c..5fdcd6ddedcac23e06eb4728b01289a95eaa665e 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -1,7 +1,7 @@ task Stringtie { String? preCommand File alignedReads - File? referenceGFF + File? referenceGtf Int? threads String assembledTranscriptsFile Boolean? firstStranded @@ -13,7 +13,7 @@ task Stringtie { ${preCommand} stringtie \ ${"-p " + threads} \ - ${"-G " + referenceGFF} \ + ${"-G " + referenceGtf} \ ${true="--rf" false="" firstStranded} \ ${true="fr" false="" secondStranded} \ -o ${assembledTranscriptsFile} \ @@ -28,6 +28,6 @@ task Stringtie { } runtime { - threads: threads + cpu: select_first([threads, 1]) } } \ No newline at end of file