diff --git a/CHANGELOG.md b/CHANGELOG.md index b0b7c3e8a711acb890ed1cc0e218dc66daf4ac6e..b522c02c68e156359ac084b35abda805c53c010e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ that users understand how the changes affect the new version. version 5.1.0-dev --------------------------- + Updated SURVIVOR version to 1.0.7 ++ Add a combined samtools dict and samtools faidx task. ++ Add a BWA index task. + + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and diff --git a/bwa.wdl b/bwa.wdl index d4f4495afbc008023ac8fb138791f2f93952cc14..66b8e8cc9581a897cf339234ac699ca86309e885 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -114,3 +114,48 @@ struct BwaIndex { File fastaFile Array[File] indexFiles } + +task Index { + input { + File fasta + String dockerImage = "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + Int? timeMinutes = 5 + ceil(size(fasta, "G") * 5) + } + String indexedFile = basename(fasta) + + command { + set -e + cp ~{fasta} ~{indexedFile} + bwa index ~{indexedFile} + } + + output { + BwaIndex index = object { + fastaFile: indexedFile, + indexFiles: [ + indexedFile + ".amb", + indexedFile + ".ann", + indexedFile + ".bwt", + indexedFile + ".pac", + indexedFile + ".sa" + ] + } + } + + runtime { + docker: dockerImage + cpu: 1 + memory: "~{size(fasta, 'G') + 1}GiB" + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + fasta: {description: "Reference fasta file.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + index: {description: "The produced BWA index."} + } +} diff --git a/samtools.wdl b/samtools.wdl index e1b081733e1398081281559a8735e017007972cc..df712e51ceb92f58da2764dd0855fdc12078a4a0 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -66,6 +66,53 @@ task BgzipAndIndex { } } +task DictAndFaidx { + input { + File inputFile + String javaXmx = "2G" + String memory = "3GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + } + + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" + # This executes both dict and faidx, so indexes are co-located in the same folder. + command <<< + set -e + cp ~{inputFile} ~{outputFile} + samtools dict -o ~{outputDict} ~{outputFile} + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputDict + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + time_minutes: timeMinutes + cpu: 1 + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + } +} + task Faidx { input { File inputFile @@ -470,7 +517,7 @@ task Sort { task Tabix { input { File inputFile - String outputFilePath = "indexed.vcf.gz" + String outputFilePath = basename(inputFile) String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2)