Merge remote-tracking branch 'origin/develop' into CNV_calling

1194e70c · Cats · 85ceead3 · 94e54514 · 1194e70c · 1194e70c
Commit 1194e70c authored 5 years ago by Cats
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,16 @@ version 2.2.0-dev
    + PlotDenoisedCopyRatios
    + PlotModeledSegments
    + PreprocessIntervals
+ Add common.TextToFile task.
+ Add bedtools.Intersect.
+ Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files 
+  from going unnoticed.
+ Centrifuge: Fix -1/-U options for single end data.
+ Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple
+  bed files called bedtools.MergeBedFiles. This task combines bedtools merge 
+  and sort.
+ Change `g` parameter on bedtools.Sort to `genome`.
+ Add `ploidity` and `excludeIntervalList` to gatk.HaplotypeCallerGvcf.
 + Update centrifuge tasks.
 + Removed unused "cores" inputs from transcriptclean tasks.
 + Removed unused "cores" inputs from talon tasks.

--- a/bedtools.wdl
+++ b/bedtools.wdl
@@ -20,6 +20,111 @@ version 1.0
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

+task Complement {
+    input {
+        File faidx
+        File inputBed
+        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
+        String outputBed = basename(inputBed, "\.bed") + ".complement.bed"
+    }
+
+    # Use a fasta index file to get the genome sizes. And convert that to the
+    # bedtools specific "genome" format.
+    command {
+        set -e
+        cut -f1,2 ~{faidx} > sizes.genome
+        bedtools complement \
+        -g sizes.genome \
+        -i ~{inputBed} \
+        > ~{outputBed}
+    }
+
+    output {
+        File complementBed = outputBed
+    }
+
+    runtime {
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes",
+                category: "required"}
+        inputBed: {description: "The inputBed to complement",
+                category: "required"}
+        outputBed: {description: "The path to write the output to",
+                     category: "advanced"}
+        dockerImage: {
+            description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+            category: "advanced"
+        }
+    }
+}
+
+task Merge {
+    input {
+        File inputBed
+        String outputBed = "merged.bed"
+        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
+    }
+
+    command {
+        bedtools merge -i ~{inputBed} > ~{outputBed}
+    }
+
+    output {
+        File mergedBed = outputBed
+    }
+
+    runtime {
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        inputBed: {description: "The bed to merge",
+                   category: "required"}
+        outputBed: {description: "The path to write the output to",
+                    category: "advanced"}
+        dockerImage: {
+            description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+            category: "advanced"
+        }
+    }
+}
+
+# Use cat, bedtools sort and bedtools merge to merge bedfiles in a single task.
+task MergeBedFiles {
+    input {
+        Array[File]+ bedFiles
+        String outputBed = "merged.bed"
+        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
+    }
+
+    # A sorted bed is needed for bedtools merge
+    command {
+        set -e -o pipefail
+        cat ~{sep=" " bedFiles} | bedtools sort | bedtools merge > ~{outputBed}
+    }
+
+    output {
+        File mergedBed = outputBed
+    }
+
+    runtime {
+        docker: dockerImage
+    }
+    parameter_meta {
+        bedFiles: {description: "The bed files to merge",
+                category: "required"}
+        outputBed: {description: "The path to write the output to",
+                     category: "advanced"}
+        dockerImage: {
+            description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+            category: "advanced"
+        }
+    }
+}
+
 task Sort {
    input {
        File inputBed
@@ -29,7 +134,7 @@ task Sort {
        Boolean chrThenSizeD = false
        Boolean chrThenScoreA = false
        Boolean chrThenScoreD = false
-        File? g
+        File? genome
        File? faidx
        String outputBed = "output.sorted.bed"
        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
@@ -46,16 +151,62 @@ task Sort {
        ~{true="-chrThenSizeD" false="" chrThenSizeD} \
        ~{true="-chrThenScoreA" false="" chrThenScoreA} \
        ~{true="-chrThenScoreD" false="" chrThenScoreD} \
-        ~{"-g " + g} \
+        ~{"-g " + genome} \
        ~{"-faidx" + faidx} \
        > ~{outputBed}
    }

    output {
-        File bedFile = outputBed
+        File sortedBed = outputBed
    }

    runtime {
        docker: dockerImage
    }
 }
+
+task Intersect {
+    input {
+        File regionsA
+        File regionsB
+        # Giving a faidx file will set the sorted option.
+        File? faidx
+        String outputBed = "intersect.bed"
+        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
+    }
+    Boolean sorted = defined(faidx)
+
+    command {
+        set -e
+        ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted}
+        bedtools intersect \
+        -a ~{regionsA} \
+        -b ~{regionsB} \
+        ~{true="-sorted" false="" sorted} \
+        ~{true="-g sorted.genome" false="" sorted} \
+        > ~{outputBed}
+    }
+
+    output {
+        File intersectedBed = outputBed
+    }
+
+    runtime {
+        docker: dockerImage
+    }
+
+    parameter_meta {
+        faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.",
+                category: "common"}
+        regionsA: {description: "Region file a to intersect",
+                   category: "required"}
+        regionsB: {description: "Region file b to intersect",
+                   category: "required"}
+        outputBed: {description: "The path to write the output to",
+                    category: "advanced"}
+        dockerImage: {
+            description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+            category: "advanced"
+        }
+    }
+}
--- a/centrifuge.wdl
+++ b/centrifuge.wdl
@@ -91,8 +91,8 @@ task Classify {
        Array[File]+ read1
        String outputPrefix
        String outputName = basename(outputPrefix)
+        Array[File] read2 = []

-        Array[File]? read2
        Int? trim5
        Int? trim3
        Int? reportMaxDistinct
@@ -121,8 +121,8 @@ task Classify {
        ~{"--host-taxids " + hostTaxIDs} \
        ~{"--exclude-taxids " + excludeTaxIDs} \
        ~{"-x " + indexPrefix} \
-        ~{true="-1 " false="-U " defined(read2)} ~{sep="," read1} \
-        ~{"-2 "} ~{sep="," read2} \
+        ~{true="-1" false="-U" length(read2) > 0} ~{sep="," read1} \
+        ~{true="-2" false="" length(read2) > 0} ~{sep="," read2} \
        ~{"-S " + outputPrefix + "/" + outputName + "_classification.tsv"} \
        ~{"--report-file " + outputPrefix + "/" + outputName + "_output_report.tsv"}
    }

--- a/common.wdl
+++ b/common.wdl
@@ -158,6 +158,34 @@ task StringArrayMd5 {
    }
 }

+task TextToFile {
+
+    input {
+        String text
+        String outputFile = "out.txt"
+        String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa"
+    }
+
+    command <<<
+        echo $'~{text}' > ~{outputFile}
+    >>>
+
+    output {
+        File out = outputFile
+    }
+
+    parameter_meta {
+        text: {description: "The text to print", category: "required"}
+        outputFile: {description: "The name of the output file", category: "common"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+    }
+    runtime {
+        memory: "1G"
+        docker: dockerImage
+    }
+}
+
 task YamlToJson {
    input {
        File yaml

--- a/gatk.wdl
+++ b/gatk.wdl
@@ -841,7 +841,8 @@ task HaplotypeCallerGvcf {
    input {
        Array[File]+ inputBams
        Array[File]+ inputBamsIndex
-        Array[File]+ intervalList
+        Array[File]+? intervalList
+        Array[File]+? excludeIntervalList
        String gvcfPath
        File referenceFasta
        File referenceFastaIndex
@@ -849,6 +850,7 @@ task HaplotypeCallerGvcf {
        Float contamination = 0.0
        File? dbsnpVCF
        File? dbsnpVCFIndex
+        Int? ploidy

        String memory = "12G"
        String javaXmx = "4G"
@@ -863,7 +865,9 @@ task HaplotypeCallerGvcf {
        -R ~{referenceFasta} \
        -O ~{gvcfPath} \
        -I ~{sep=" -I " inputBams} \
-        -L ~{sep=' -L ' intervalList} \
+        ~{"--sample-ploidy " + ploidy} \
+        ~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \
+        ~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \
        ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \
        -contamination ~{contamination} \
        -ERC GVCF
@@ -882,8 +886,10 @@ task HaplotypeCallerGvcf {
    parameter_meta {
        inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"}
        inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"}
-        intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"}
+        intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"}
+        excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"}
        gvcfPath: {description: "The location to write the output GVCF to.", category: "required"}
+        ploidy: {description: "The ploidy with which the variants should be called.", category: "common"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",