diff --git a/CHANGELOG.md b/CHANGELOG.md index 8043dfe0998fd55195de132b0185a0093079d854..b186098cf5e7e4974ccd2b6c591e0111adbf08ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,16 @@ version 2.2.0-dev + PlotDenoisedCopyRatios + PlotModeledSegments + PreprocessIntervals ++ Add common.TextToFile task. ++ Add bedtools.Intersect. ++ Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files + from going unnoticed. ++ Centrifuge: Fix -1/-U options for single end data. ++ Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple + bed files called bedtools.MergeBedFiles. This task combines bedtools merge + and sort. ++ Change `g` parameter on bedtools.Sort to `genome`. ++ Add `ploidity` and `excludeIntervalList` to gatk.HaplotypeCallerGvcf. + Update centrifuge tasks. + Removed unused "cores" inputs from transcriptclean tasks. + Removed unused "cores" inputs from talon tasks. diff --git a/bedtools.wdl b/bedtools.wdl index f6748f3189589fb032ff662dad5da392e0287d97..4f39e2a8907b3b8a713373a562e466905f727587 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -20,6 +20,111 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Complement { + input { + File faidx + File inputBed + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String outputBed = basename(inputBed, "\.bed") + ".complement.bed" + } + + # Use a fasta index file to get the genome sizes. And convert that to the + # bedtools specific "genome" format. + command { + set -e + cut -f1,2 ~{faidx} > sizes.genome + bedtools complement \ + -g sizes.genome \ + -i ~{inputBed} \ + > ~{outputBed} + } + + output { + File complementBed = outputBed + } + + runtime { + docker: dockerImage + } + + parameter_meta { + faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes", + category: "required"} + inputBed: {description: "The inputBed to complement", + category: "required"} + outputBed: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} + +task Merge { + input { + File inputBed + String outputBed = "merged.bed" + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + + command { + bedtools merge -i ~{inputBed} > ~{outputBed} + } + + output { + File mergedBed = outputBed + } + + runtime { + docker: dockerImage + } + + parameter_meta { + inputBed: {description: "The bed to merge", + category: "required"} + outputBed: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} + +# Use cat, bedtools sort and bedtools merge to merge bedfiles in a single task. +task MergeBedFiles { + input { + Array[File]+ bedFiles + String outputBed = "merged.bed" + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + + # A sorted bed is needed for bedtools merge + command { + set -e -o pipefail + cat ~{sep=" " bedFiles} | bedtools sort | bedtools merge > ~{outputBed} + } + + output { + File mergedBed = outputBed + } + + runtime { + docker: dockerImage + } + parameter_meta { + bedFiles: {description: "The bed files to merge", + category: "required"} + outputBed: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} + task Sort { input { File inputBed @@ -29,7 +134,7 @@ task Sort { Boolean chrThenSizeD = false Boolean chrThenScoreA = false Boolean chrThenScoreD = false - File? g + File? genome File? faidx String outputBed = "output.sorted.bed" String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -46,16 +151,62 @@ task Sort { ~{true="-chrThenSizeD" false="" chrThenSizeD} \ ~{true="-chrThenScoreA" false="" chrThenScoreA} \ ~{true="-chrThenScoreD" false="" chrThenScoreD} \ - ~{"-g " + g} \ + ~{"-g " + genome} \ ~{"-faidx" + faidx} \ > ~{outputBed} } output { - File bedFile = outputBed + File sortedBed = outputBed } runtime { docker: dockerImage } } + +task Intersect { + input { + File regionsA + File regionsB + # Giving a faidx file will set the sorted option. + File? faidx + String outputBed = "intersect.bed" + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + Boolean sorted = defined(faidx) + + command { + set -e + ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted} + bedtools intersect \ + -a ~{regionsA} \ + -b ~{regionsB} \ + ~{true="-sorted" false="" sorted} \ + ~{true="-g sorted.genome" false="" sorted} \ + > ~{outputBed} + } + + output { + File intersectedBed = outputBed + } + + runtime { + docker: dockerImage + } + + parameter_meta { + faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", + category: "common"} + regionsA: {description: "Region file a to intersect", + category: "required"} + regionsB: {description: "Region file b to intersect", + category: "required"} + outputBed: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} diff --git a/centrifuge.wdl b/centrifuge.wdl index 5110b8723e20a2a964032c88921bbb917d0fc949..b9eb7624a378355b1420c022e5e1e1e5b18044ab 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -91,8 +91,8 @@ task Classify { Array[File]+ read1 String outputPrefix String outputName = basename(outputPrefix) + Array[File] read2 = [] - Array[File]? read2 Int? trim5 Int? trim3 Int? reportMaxDistinct @@ -121,8 +121,8 @@ task Classify { ~{"--host-taxids " + hostTaxIDs} \ ~{"--exclude-taxids " + excludeTaxIDs} \ ~{"-x " + indexPrefix} \ - ~{true="-1 " false="-U " defined(read2)} ~{sep="," read1} \ - ~{"-2 "} ~{sep="," read2} \ + ~{true="-1" false="-U" length(read2) > 0} ~{sep="," read1} \ + ~{true="-2" false="" length(read2) > 0} ~{sep="," read2} \ ~{"-S " + outputPrefix + "/" + outputName + "_classification.tsv"} \ ~{"--report-file " + outputPrefix + "/" + outputName + "_output_report.tsv"} } diff --git a/common.wdl b/common.wdl index 73325bf4c726f0716b067e6ddc3f7f96b3cb5587..87dcce1391bc938848fee0f551cd230de05af3f5 100644 --- a/common.wdl +++ b/common.wdl @@ -158,6 +158,34 @@ task StringArrayMd5 { } } +task TextToFile { + + input { + String text + String outputFile = "out.txt" + String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" + } + + command <<< + echo $'~{text}' > ~{outputFile} + >>> + + output { + File out = outputFile + } + + parameter_meta { + text: {description: "The text to print", category: "required"} + outputFile: {description: "The name of the output file", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } + runtime { + memory: "1G" + docker: dockerImage + } +} + task YamlToJson { input { File yaml diff --git a/gatk.wdl b/gatk.wdl index d6b77ffb4018a5d1b78e3c4ebf3bd82a4bd94351..eff98bf8b1260f14d9691c2ebb92235916ad4c21 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -841,7 +841,8 @@ task HaplotypeCallerGvcf { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+ intervalList + Array[File]+? intervalList + Array[File]+? excludeIntervalList String gvcfPath File referenceFasta File referenceFastaIndex @@ -849,6 +850,7 @@ task HaplotypeCallerGvcf { Float contamination = 0.0 File? dbsnpVCF File? dbsnpVCFIndex + Int? ploidy String memory = "12G" String javaXmx = "4G" @@ -863,7 +865,9 @@ task HaplotypeCallerGvcf { -R ~{referenceFasta} \ -O ~{gvcfPath} \ -I ~{sep=" -I " inputBams} \ - -L ~{sep=' -L ' intervalList} \ + ~{"--sample-ploidy " + ploidy} \ + ~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \ + ~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \ ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \ -contamination ~{contamination} \ -ERC GVCF @@ -882,8 +886,10 @@ task HaplotypeCallerGvcf { parameter_meta { inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} + intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} + excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} gvcfPath: {description: "The location to write the output GVCF to.", category: "required"} + ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",