Skip to content
Snippets Groups Projects
bedtools.wdl 7.42 KiB
version 1.0

# Copyright (c) 2017 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

task Complement {
    input {
        File faidx
        File inputBed
        String outputBed = basename(inputBed, "\.bed") + ".complement.bed"
        String memory = "2G"
        Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G"))
        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
    }

    # Use a fasta index file to get the genome sizes. And convert that to the
    # bedtools specific "genome" format.
    command {
        set -e
        cut -f1,2 ~{faidx} > sizes.genome
        bedtools complement \
        -g sizes.genome \
        -i ~{inputBed} \
        > ~{outputBed}
    }

    output {
        File complementBed = outputBed
    }

    runtime {
        memory: memory
        time_minutes: timeMinutes
        docker: dockerImage
    }

    parameter_meta {
        faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes.", category: "required"}
        inputBed: {description: "The inputBed to complement.", category: "required"}
        outputBed: {description: "The path to write the output to.", category: "advanced"}
        memory: {description: "The amount of memory needed for the job.", category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task Merge {
    input {
        File inputBed
        String outputBed = "merged.bed"
        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
    }

    command {
        bedtools merge -i ~{inputBed} > ~{outputBed}
    }

    output {
        File mergedBed = outputBed
    }

    runtime {
        docker: dockerImage
    }

    parameter_meta {
        inputBed: {description: "The bed to merge",
                   category: "required"}
        outputBed: {description: "The path to write the output to",
                    category: "advanced"}
        dockerImage: {
            description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
            category: "advanced"
        }
    }
}

# Use cat, bedtools sort and bedtools merge to merge bedfiles in a single task.
task MergeBedFiles {
    input {
        Array[File]+ bedFiles
        String outputBed = "merged.bed"
        String memory = "2G"
        Int timeMinutes = 1 + ceil(size(bedFiles, "G"))
        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
    }

    # A sorted bed is needed for bedtools merge
    command {
        set -e -o pipefail
        cat ~{sep=" " bedFiles} | bedtools sort | bedtools merge > ~{outputBed}
    }

    output {
        File mergedBed = outputBed
    }

    runtime {
        memory: memory
        time_minutes: timeMinutes
        docker: dockerImage
    }
    parameter_meta {
        bedFiles: {description: "The bed files to merge.", category: "required"}
        outputBed: {description: "The path to write the output to.", category: "advanced"}
        memory: {description: "The amount of memory needed for the job.", category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task Sort {
    input {
        File inputBed
        Boolean sizeA = false
        Boolean sizeD = false
        Boolean chrThenSizeA = false
        Boolean chrThenSizeD = false
        Boolean chrThenScoreA = false
        Boolean chrThenScoreD = false
        File? genome
        File? faidx
        String outputBed = "output.sorted.bed"
        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputBed})"
        bedtools sort \
        -i ~{inputBed} \
        ~{true="-sizeA" false="" sizeA} \
        ~{true="-sizeD" false="" sizeD} \
        ~{true="-chrThenSizeA" false="" chrThenSizeA} \
        ~{true="-chrThenSizeD" false="" chrThenSizeD} \
        ~{true="-chrThenScoreA" false="" chrThenScoreA} \
        ~{true="-chrThenScoreD" false="" chrThenScoreD} \
        ~{"-g " + genome} \
        ~{"-faidx" + faidx} \
        > ~{outputBed}
    }

    output {
        File sortedBed = outputBed
    }

    runtime {
        docker: dockerImage
    }
}

task Intersect {
    input {
        File regionsA
        File regionsB
        # Giving a faidx file will set the sorted option.
        File? faidx
        String outputBed = "intersect.bed"
        String memory = "2G"
        Int timeMinutes = 1 + ceil([regionsA, regionsB], "G"))
        String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3"
    }
    Boolean sorted = defined(faidx)

    command {
        set -e
        ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted}
        bedtools intersect \
        -a ~{regionsA} \
        -b ~{regionsB} \
        ~{true="-sorted" false="" sorted} \
        ~{true="-g sorted.genome" false="" sorted} \
        > ~{outputBed}
    }

    output {
        File intersectedBed = outputBed
    }

    runtime {
        memory: memory
        time_minutes: timeMinutes
        docker: dockerImage
    }

    parameter_meta {
        faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.",
                category: "common"}
        regionsA: {description: "Region file a to intersect", category: "required"}
        regionsB: {description: "Region file b to intersect", category: "required"}
        outputBed: {description: "The path to write the output to", category: "advanced"}
        memory: {description: "The amount of memory needed for the job.", category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}