diff --git a/CHANGELOG.md b/CHANGELOG.md index 58e31684d88063db1c9d6446967a9ab5eb102ed1..26641097973c23d9579a52cde2ed18e365a051aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add common.TextToFile task. ++ Add bedtools.Intersect. ++ Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files + from going unnoticed. + Centrifuge: Fix -1/-U options for single end data. + Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple bed files called bedtools.MergeBedFiles. This task combines bedtools merge diff --git a/bedtools.wdl b/bedtools.wdl index 407250fe97127ddb23e643223335a5e0fc22bfff..4f39e2a8907b3b8a713373a562e466905f727587 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -102,6 +102,7 @@ task MergeBedFiles { # A sorted bed is needed for bedtools merge command { + set -e -o pipefail cat ~{sep=" " bedFiles} | bedtools sort | bedtools merge > ~{outputBed} } @@ -163,3 +164,49 @@ task Sort { docker: dockerImage } } + +task Intersect { + input { + File regionsA + File regionsB + # Giving a faidx file will set the sorted option. + File? faidx + String outputBed = "intersect.bed" + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + Boolean sorted = defined(faidx) + + command { + set -e + ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted} + bedtools intersect \ + -a ~{regionsA} \ + -b ~{regionsB} \ + ~{true="-sorted" false="" sorted} \ + ~{true="-g sorted.genome" false="" sorted} \ + > ~{outputBed} + } + + output { + File intersectedBed = outputBed + } + + runtime { + docker: dockerImage + } + + parameter_meta { + faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", + category: "common"} + regionsA: {description: "Region file a to intersect", + category: "required"} + regionsB: {description: "Region file b to intersect", + category: "required"} + outputBed: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} diff --git a/common.wdl b/common.wdl index 73325bf4c726f0716b067e6ddc3f7f96b3cb5587..87dcce1391bc938848fee0f551cd230de05af3f5 100644 --- a/common.wdl +++ b/common.wdl @@ -158,6 +158,34 @@ task StringArrayMd5 { } } +task TextToFile { + + input { + String text + String outputFile = "out.txt" + String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" + } + + command <<< + echo $'~{text}' > ~{outputFile} + >>> + + output { + File out = outputFile + } + + parameter_meta { + text: {description: "The text to print", category: "required"} + outputFile: {description: "The name of the output file", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } + runtime { + memory: "1G" + docker: dockerImage + } +} + task YamlToJson { input { File yaml diff --git a/gatk.wdl b/gatk.wdl index b44620aea45bc4be45d4c6a491224bf0a1c85bb9..0b4c71c701b6d7a2ef1013b6ddeef8f77ff6c612 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -342,8 +342,10 @@ task HaplotypeCallerGvcf { parameter_meta { inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} + intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} + excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} gvcfPath: {description: "The location to write the output GVCF to.", category: "required"} + ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",