From 50c5d957408dbf8a6f1d6aa79c0a3b05ffdde664 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman <r.h.p.vorderman@lumc.nl> Date: Tue, 7 Jul 2020 12:41:49 +0200 Subject: [PATCH] Add samtools controls to hisat2 --- CHANGELOG.md | 3 +++ hisat2.wdl | 21 +++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4acadc5..edfffb5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Hisat2 task has added controls for samtools. ++ Alignment tasks no longer produce BAM indexes as these are not needed + by the markduplicates step. + Picard SortSam added as a task. + Md5 files are no longer created by default on Picard tasks that generate BAM files. diff --git a/hisat2.wdl b/hisat2.wdl index 5937f86..77c370f 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -34,7 +34,10 @@ task Hisat2 { String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" Int threads = 4 - String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G" + Int sortThreads = 1 + Int sortMemoryPerThreadGb = 2 + Int compressionLevel = 1 + Int memoryGb = 1 + threads + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools @@ -59,18 +62,21 @@ task Hisat2 { ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ --new-summary \ --summary-file ~{summaryFilePath} \ - | samtools sort > ~{outputBam} - samtools index ~{outputBam} ~{bamIndexPath} + | samtools sort \ + ~{"-@ " + sortThreads} \ + -m ~{sortMemoryPerThreadGb}G \ + -l ~{compressionLevel} \ + - \ + -o ~{outputBam} } output { File bamFile = outputBam - File bamIndex = bamIndexPath File summaryFile = summaryFilePath } runtime { - memory: memory + memory: "~{memoryGb}G" cpu: threads + 1 time_minutes: timeMinutes docker: dockerImage @@ -88,9 +94,12 @@ task Hisat2 { downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} } } \ No newline at end of file -- GitLab