version 1.0

# Copyright (c) 2018 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

task AnnotateIntervals {
    input {
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
        String annotatedIntervalsPath = "intervals.annotated.tsv"
        File intervals
        String intervalMergingRule = "OVERLAPPING_ONLY"
        File? mappabilityTrack
        File? segmentalDuplicationTrack
        Int featureQueryLookahead = 1000000

        String memory = "3G"
        String javaXmx = "2G"
        Int timeMinutes = 5
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{annotatedIntervalsPath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        AnnotateIntervals \
        -R ~{referenceFasta} \
        -L ~{intervals} \
        ~{"--mappability-track  " + mappabilityTrack} \
        ~{"--segmental-duplication-track " + segmentalDuplicationTrack} \
        --feature-query-lookahead ~{featureQueryLookahead} \
        --interval-merging-rule ~{intervalMergingRule} \
        -O ~{annotatedIntervalsPath}
    }

    output {
        File annotatedIntervals = annotatedIntervalsPath
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        referenceFasta: {description: "The reference fasta file.", category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        annotatedIntervalsPath: {description: "The location the output should be written to.", category: "advanced"}
        intervals: {description: "An interval list describinig the intervals to annotate.", category: "required"}
        intervalMergingRule: {description: "Equivalent to gatk AnnotateIntervals' `--interval-merging-rule` option.", category: "advanced"}
        mappabilityTrack: {description: "Equivalent to gatk AnnotateIntervals' `--mappability-track` option.", category: "common"}
        segmentalDuplicationTrack: {description: "Equivalent to gatk AnnotateIntervals' `--segmenta-duplicarion-track` option.", category: "common"}
        featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option", category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

# Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR {
    input {
        File inputBam
        File inputBamIndex
        String outputBamPath
        File recalibrationReport
        Array[File] sequenceGroupInterval = []
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai

        Int memoryMb = javaXmxMb + 512
        Int javaXmxMb = 2048
        Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used.
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputBamPath})"
        gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \
        ApplyBQSR \
        --create-output-bam-md5 \
        --add-output-sam-program-record \
        -R ~{referenceFasta} \
        -I ~{inputBam} \
        --use-original-qualities \
        -O ~{outputBamPath} \
        -bqsr ~{recalibrationReport} \
        --static-quantized-quals 10 \
        --static-quantized-quals 20 \
        --static-quantized-quals 30 \
        ~{true="-L" false="" length(sequenceGroupInterval) > 0} ~{sep=' -L ' sequenceGroupInterval}
    }

    output {
        File recalibratedBam = outputBamPath
        File recalibratedBamIndex = sub(outputBamPath, "\.bam$", ".bai")
        File recalibratedBamMd5 = outputBamPath + ".md5"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: "~{memoryMb}M"
    }

    parameter_meta {
        inputBam: {description: "The BAM file which should be recalibrated.", category: "required"}
        inputBamIndex: {description: "The input BAM file's index.", category: "required"}
        outputBamPath: {description: "The location the resulting BAM file should be written.", category: "required"}
        recalibrationReport: {description: "The BQSR report the be used for recalibration.", category: "required"}
        sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                             category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}

        memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
        javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

# Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator {
    input {
        File inputBam
        File inputBamIndex
        String recalibrationReportPath
        Array[File] sequenceGroupInterval = []
        Array[File] knownIndelsSitesVCFs = []
        Array[File] knownIndelsSitesVCFIndexes = []
        File? dbsnpVCF
        File? dbsnpVCFIndex
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai

        Int memoryMb = javaXmxMb + 512
        Int javaXmxMb = 1024
        Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used.
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{recalibrationReportPath})"
        gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \
        BaseRecalibrator \
        -R ~{referenceFasta} \
        -I ~{inputBam} \
        --use-original-qualities \
        -O ~{recalibrationReportPath} \
        ~{true="--known-sites" false="" length(knownIndelsSitesVCFs) > 0} ~{sep=" --known-sites " knownIndelsSitesVCFs} \
        ~{"--known-sites " + dbsnpVCF} \
        ~{true="-L" false="" length(sequenceGroupInterval) > 0} ~{sep=' -L ' sequenceGroupInterval}
    }

    output {
        File recalibrationReport = recalibrationReportPath
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: "~{memoryMb}M"
    }

    parameter_meta {
        inputBam: {description: "The BAM file to generate a BQSR report for.", category: "required"}
        inputBamIndex: {description: "The index of the input BAM file.", category: "required"}
        recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"}
        sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"}
        knownIndelsSitesVCFs: {description: "VCF files with known indels.", category: "advanced"}
        knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs.", category: "advanced"}
        dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
        dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                             category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}

        memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
        javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task CalculateContamination {
    input {
        File tumorPileups
        File? normalPileups

        String memory = "13G"
        String javaXmx = "12G"
        Int timeMinutes = 180
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        CalculateContamination \
        -I ~{tumorPileups} \
        ~{"-matched " + normalPileups} \
        -O "contamination.table" \
        --tumor-segmentation "segments.table"
    }

    output {
        File contaminationTable = "contamination.table"
        File mafTumorSegments = "segments.table"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        tumorPileups: {description: "The pileup summary of a tumor/case sample.", category: "required"}
        normalPileups: {description: "The pileup summary of the normal/control sample.", category: "common"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task CallCopyRatioSegments {
    input {
        String outputPrefix
        File copyRatioSegments

        String memory = "3G"
        String javaXmx = "2G"
        Int timeMinutes = 2
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputPrefix})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        CallCopyRatioSegments \
        -I ~{copyRatioSegments} \
        -O ~{outputPrefix}.called.seg
    }

    output {
        File calledSegments = outputPrefix + ".called.seg"
        File calledSegmentsIgv = outputPrefix + ".called.igv.seg"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        outputPrefix: {description: "The prefix for the output files.", category: "required"}
        copyRatioSegments: {description: "The copy ratios file generated by gatk ModelSegments.", category: "required"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task CollectAllelicCounts {
    input {
        String allelicCountsPath = "allelic_counts.tsv"
        File commonVariantSites
        File? commonVariantSitesIndex
        File inputBam
        File inputBamIndex
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai

        String memory = "11G"
        String javaXmx = "10G"
        Int timeMinutes = 120
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{allelicCountsPath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        CollectAllelicCounts \
        -L ~{commonVariantSites} \
        -I ~{inputBam} \
        -R ~{referenceFasta} \
        -O ~{allelicCountsPath}
    }

    output {
        File allelicCounts = allelicCountsPath
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        allelicCountsPath: {description: "The path the output should be written to.", category: "advanced"}
        commonVariantSites: {description: "Interval list or vcf of common variant sites (to retrieve the allelic counts for).", category: "required"}
        commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"}
        inputBam: {description: "The BAM file to generate counts for.", category: "required"}
        inputBamIndex: {description: "The index of the input BAM file.", category: "required"}
        referenceFasta: {description: "The reference fasta file.", category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task CollectReadCounts {
    input {
        String countsPath = "readcounts.hdf5"
        File intervals
        File inputBam
        File inputBamIndex
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
        String intervalMergingRule = "OVERLAPPING_ONLY"

        String memory = "8G"
        String javaXmx = "7G"
        Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5)
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{countsPath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        CollectReadCounts \
        -L ~{intervals} \
        -I ~{inputBam} \
        -R ~{referenceFasta} \
        --format HDF5 \
        --interval-merging-rule ~{intervalMergingRule} \
        -O ~{countsPath}
    }

    output {
        File counts = countsPath
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        countsPath: {description: "The location the output should be written to.", category: "advanced"}
        intervals: {description: "The intervals to collect counts for.", category: "required"}
        inputBam: {description: "The BAM file to determine the coverage for.", category: "required"}
        inputBamIndex: {description: "The input BAM file's index.", category: "required"}
        referenceFasta: {description: "The reference fasta file.", category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        intervalMergingRule: {description: "Equivalent to gatk CollectReadCounts' `--interval-merging-rule` option.", category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task CombineGVCFs {
    input {
        Array[File]+ gvcfFiles
        Array[File]+ gvcfFilesIndex
        Array[File] intervals = []
        String outputPath
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai

        String memory = "5G"
        String javaXmx = "4G"
        Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8)
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputPath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        CombineGVCFs \
        -R ~{referenceFasta} \
        -O ~{outputPath} \
        -V ~{sep=' -V ' gvcfFiles} \
        ~{true='-L' false='' length(intervals) > 0} ~{sep=' -L ' intervals}
    }

    output {
        File outputVcf = outputPath
        File outputVcfIndex = outputPath + ".tbi"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        gvcfFiles: {description: "The GVCF files to be combined.", category: "required"}
        gvcfFilesIndex: {description: "The indexes for the GVCF files.", caregory: "required"}
        intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"}
        outputPath: {description: "The location the combined GVCF should be written to.", category: "required"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                             category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}

        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task CombineVariants {
    input {
        File referenceFasta
        File referenceFastaFai
        File referenceFastaDict
        String genotypeMergeOption = "UNIQUIFY"
        String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED"
        Array[String]+ identifiers
        Array[File]+ variantVcfs # follow "identifiers" array order
        Array[File]+ variantIndexes
        String outputPath

        String memory = "13G"
        String javaXmx = "12G"
        Int timeMinutes = 180
        String dockerImage = "broadinstitute/gatk3:3.8-1"
    }

    command <<<
        set -e
        mkdir -p "$(dirname ~{outputPath})"

        # build "-V:<ID> <file.vcf>" arguments according to IDs and VCFs to merge
        # Make sure commands are run in bash
        V_args=$(bash -c '
        set -eu
        ids=(~{sep=" " identifiers})
        vars=(~{sep=" " variantVcfs})
        for (( i = 0; i < ${#ids[@]}; ++i ))
          do
            printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}"
          done
        ')
        java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar \
        -T CombineVariants \
        -R ~{referenceFasta} \
        --genotypemergeoption ~{genotypeMergeOption} \
        --filteredrecordsmergetype ~{filteredRecordsMergeType} \
        --out ~{outputPath} \
        $V_args
    >>>

    output {
        File combinedVcf = outputPath
        File combinedVcfIndex = outputPath + ".tbi"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        genotypeMergeOption: {description: "Equivalent to CombineVariants' `--genotypemergeoption` option.", category: "advanced"}
        filteredRecordsMergeType: {description: "Equivalent to CombineVariants' `--filteredrecordsmergetype` option.", category: "advanced"}
        identifiers: {description: "The sample identifiers in the same order as variantVcfs.", category: "required"}
        variantVcfs: {description: "The input VCF files in the same order as identifiers.", category: "required"}
        variantIndexes: {description: "The indexes of the input VCF files.", category: "required"}
        outputPath: {description: "The location the output should be written to", category: "required"}

        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task CreateReadCountPanelOfNormals {
    input {
        String PONpath = "PON.hdf5"
        Array[File]+ readCountsFiles
        File? annotatedIntervals

        String memory = "8G"
        String javaXmx = "7G"
        Int timeMinutes = 5
        String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason...
    }

    command {
        set -e
        mkdir -p "$(dirname ~{PONpath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        CreateReadCountPanelOfNormals \
        -I ~{sep=" -I " readCountsFiles} \
        ~{"--annotated-intervals " + annotatedIntervals} \
        -O ~{PONpath}
    }

    output {
        File PON = PONpath
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        PONpath: {description: "The location the PON should be written to.", category: "common"}
        readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "required"}
        annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.",
                             category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task DenoiseReadCounts {
    input {
        File? PON
        File? annotatedIntervals
        File readCounts
        String outputPrefix

        String memory = "5G"
        String javaXmx = "4G"
        Int timeMinutes = 5
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputPrefix})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        DenoiseReadCounts \
        -I ~{readCounts} \
        ~{"--count-panel-of-normals " + PON} \
        ~{"--annotated-intervals " + annotatedIntervals} \
        --standardized-copy-ratios ~{outputPrefix}.standardizedCR.tsv \
        --denoised-copy-ratios ~{outputPrefix}.denoisedCR.tsv
    }

    output {
        File standardizedCopyRatios = outputPrefix + ".standardizedCR.tsv"
        File denoisedCopyRatios = outputPrefix + ".denoisedCR.tsv"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"}
        annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.",
                             category: "advanced"}
        readCounts: {description: "The read counts file as generated by CollectReadCounts.", category: "required"}
        outputPrefix: {description: "The prefix for the output files.", category: "required"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task FilterMutectCalls {
    input {
        File referenceFasta
        File referenceFastaFai
        File referenceFastaDict
        File unfilteredVcf
        File unfilteredVcfIndex
        String outputVcf
        File? contaminationTable
        File? mafTumorSegments
        File? artifactPriors
        Int uniqueAltReadCount = 4
        File mutect2Stats

        String memory = "13G"
        String javaXmx = "12G"
        Int timeMinutes = 60
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputVcf})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        FilterMutectCalls \
        -R ~{referenceFasta} \
        -V ~{unfilteredVcf} \
        -O ~{outputVcf} \
        ~{"--contamination-table " + contaminationTable} \
        ~{"--tumor-segmentation " + mafTumorSegments} \
        ~{"--ob-priors " + artifactPriors} \
        ~{"--unique-alt-read-count " + uniqueAltReadCount} \
        ~{"-stats " + mutect2Stats} \
        --filtering-stats "filtering.stats" \
        --showHidden
    }

    output {
        File filteredVcf = outputVcf
        File filteredVcfIndex = outputVcf + ".tbi"
        File filteringStats = "filtering.stats"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"}
        unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"}
        outputVcf: {description: "The location the filtered VCF file should be written.", category: "required"}
        contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"}
        mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"}
        artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"}
        uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"}
        mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"}

        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
    input {
        Array[File] inputBQSRreports
        String outputReportPath

        Int memoryMb = 256 + javaXmxMb
        Int javaXmxMb = 256
        Int timeMinutes = 1
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputReportPath})"
        gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \
        GatherBQSRReports \
        -I ~{sep=' -I ' inputBQSRreports} \
        -O ~{outputReportPath}
    }

    output {
        File outputBQSRreport = outputReportPath
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: "~{memoryMb}M"
    }

    parameter_meta {
        inputBQSRreports: {description: "The BQSR reports to be merged.", category: "required"}
        outputReportPath: {description: "The location of the combined BQSR report.", category: "required"}

        memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
        javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task GenomicsDBImport {
    input {
        Array[File] gvcfFiles
        Array[File] gvcfFilesIndex
        Array[File]+ intervals
        String genomicsDBWorkspacePath = "genomics_db"
        String genomicsDBTarFile = "genomics_db.tar.gz"
        String? tmpDir
        String memory = "5G"
        String javaXmx = "4G"
        Int timeMinutes = 180
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{genomicsDBWorkspacePath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        GenomicsDBImport \
        -V ~{sep=" -V " gvcfFiles} \
        --genomicsdb-workspace-path ~{genomicsDBWorkspacePath} \
        ~{"--tmp-dir " + tmpDir} \
        -L ~{sep=" -L " intervals}
        bash -c 'tar -cvzf ~{genomicsDBTarFile} ~{genomicsDBWorkspacePath}/*'
    }

    output {
        File genomicsDbTarArchive = genomicsDBTarFile
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        gvcfFiles: {description: "The gvcfFiles to be merged.", category: "required"}
        gvcfFilesIndex: {description: "Indexes for the gvcfFiles.", category: "required"}
        intervals: {description: "intervals over which to operate.", category: "required"}
        genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"}
        genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"}
        tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers",
                 category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task GenotypeGVCFs {
    input {
        File gvcfFile
        File gvcfFileIndex
        Array[File]+ intervals
        String outputPath
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
        Array[String] annotationGroups = ["StandardAnnotation"]
        File? dbsnpVCF
        File? dbsnpVCFIndex
        File? pedigree

        String memory = "7G"
        String javaXmx = "6G"
        Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used.
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputPath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        GenotypeGVCFs \
        -R ~{referenceFasta} \
        -O ~{outputPath} \
        ~{"-D " + dbsnpVCF} \
        ~{"--pedigree " + pedigree} \
        ~{true="-G" false="" length(annotationGroups) > 0} ~{sep=" -G " annotationGroups} \
        --only-output-calls-starting-in-intervals \
        -V ~{gvcfFile} \
        -L ~{sep=' -L ' intervals}
    }

    output {
        File outputVCF = outputPath
        File outputVCFIndex = outputPath + ".tbi"

    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"}
        gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"}
        intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"}
        outputPath: {description: "The location to write the output VCF file to.", category: "required"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                             category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        annotationGroups: {description: "Which annotation groups will be used for the annotation", category: "advanced"}
        dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
        dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
        pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task GetPileupSummaries {
    input {
        File sampleBam
        File sampleBamIndex
        File variantsForContamination
        File variantsForContaminationIndex
        File sitesForContamination
        File sitesForContaminationIndex
        String outputPrefix

        String memory = "13G"
        String javaXmx = "12G"
        Int timeMinutes = 120
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        GetPileupSummaries \
        -I ~{sampleBam} \
        -V ~{variantsForContamination} \
        -L ~{sitesForContamination} \
        -O ~{outputPrefix + "-pileups.table"}
    }

    output {
        File pileups = outputPrefix + "-pileups.table"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        sampleBam: {description: "A BAM file for which a pileup should be created.", category: "required"}
        sampleBamIndex: {description: "The index of the input BAM file.", category: "required"}
        variantsForContamination: {description: "A VCF file with common variants.", category: "required"}
        variantsForContaminationIndex: {description: "The index for the common variants VCF file.", category: "required"}
        sitesForContamination: {description: "A bed file describing regions to operate on.", category: "required"}
        sitesForContaminationIndex: {description: "The index for the bed file.", category: "required"}
        outputPrefix: {description: "The prefix for the ouput.", category: "required"}

        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}


task HaplotypeCaller {
    input {
        Array[File]+ inputBams
        Array[File]+ inputBamsIndex
        Array[File]+? intervalList
        Array[File]+? excludeIntervalList
        String outputPath
        File referenceFasta
        File referenceFastaIndex
        File referenceFastaDict
        Float? contamination
        File? dbsnpVCF
        File? dbsnpVCFIndex
        File? pedigree
        Int? ploidy
        String? outputMode
        Boolean gvcf = false
        String emitRefConfidence = if gvcf then "GVCF" else "NONE"
        Boolean dontUseSoftClippedBases = false
        Float? standardMinConfidenceThresholdForCalling

        Int memoryMb = javaXmxMb + 512
        # Memory increases with time used. 4G should cover most use cases.
        Int javaXmxMb = 4096
        Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used.
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputPath})"
        gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \
        HaplotypeCaller \
        -R ~{referenceFasta} \
        -O ~{outputPath} \
        -I ~{sep=" -I " inputBams} \
        ~{"--sample-ploidy " + ploidy} \
        ~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \
        ~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \
        ~{"-D " + dbsnpVCF} \
        ~{"--pedigree " + pedigree} \
        ~{"--contamination-fraction-per-sample-file " + contamination} \
        ~{"--output-mode " + outputMode} \
        --emit-ref-confidence ~{emitRefConfidence} \
        ~{true="--dont-use-soft-clipped-bases" false="" dontUseSoftClippedBases} \
        ~{"--standard-min-confidence-threshold-for-calling " + standardMinConfidenceThresholdForCalling}
    }

    output {
        File outputVCF = outputPath
        File outputVCFIndex = outputPath + ".tbi"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: "~{memoryMb}M"
    }

    parameter_meta {
        inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"}
        inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"}
        intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"}
        excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"}
        outputPath: {description: "The location to write the output to.", category: "required"}
        ploidy: {description: "The ploidy with which the variants should be called.", category: "common"}
        gvcf: {description: "Whether the output should be a gvcf", category: "common"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                             category: "required"}
        referenceFastaIndex: {description: "The index for the reference fasta file.", category: "required"}
        contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"}
        outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.",
                     category: "advanced"}
        emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'",
                            category: "advanced"}
        dontUseSoftClippedBases: {description: "Do not use soft-clipped bases. Should be 'true' for RNA variant calling.", category: "common"}
        standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"}
        dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
        dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
        pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"}
        memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
        javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}


task LearnReadOrientationModel {
    input {
        Array[File]+ f1r2TarGz

        String memory = "13G"
        String javaXmx = "12G"
        Int timeMinutes = 120
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        LearnReadOrientationModel \
        -I ~{sep=" -I " f1r2TarGz} \
        -O "artifact-priors.tar.gz"
    }

    output {
        File artifactPriorsTable = "artifact-priors.tar.gz"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        f1r2TarGz: {description: "A f1r2TarGz file outputed by mutect2.", category: "required"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task MergeStats {
    input {
        Array[File]+ stats

        String memory = "15G"
        String javaXmx = "14G"
        Int timeMinutes = 30
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        MergeMutectStats \
        -stats ~{sep=" -stats " stats} \
        -O "merged.stats"
    }

    output {
        File mergedStats = "merged.stats"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        stats: {description: "Statistics files to be merged.", category: "required"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task ModelSegments {
    input {
        String outputDir = "."
        String outputPrefix
        File denoisedCopyRatios
        File allelicCounts
        File? normalAllelicCounts
        Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts)
            then 0
            else 30
        Int maximumNumberOfSmoothingIterations = 10

        String memory = "11G"
        String javaXmx = "10G"
        Int timeMinutes = 60
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p ~{outputDir}
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        ModelSegments \
        --denoised-copy-ratios ~{denoisedCopyRatios} \
        --allelic-counts ~{allelicCounts} \
        ~{"--normal-allelic-counts " + normalAllelicCounts} \
        --minimum-total-allele-count-case ~{minimumTotalAlleleCountCase} \
        --maximum-number-of-smoothing-iterations ~{maximumNumberOfSmoothingIterations} \
        --output ~{outputDir} \
        --output-prefix ~{outputPrefix}
    }

    output {
        File hetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.tsv"
        File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv"
        File copyRatioSegments = outputDir + "/" + outputPrefix + ".cr.seg"
        File copyRatioCBS = outputDir + "/" + outputPrefix + ".cr.igv.seg"
        File alleleFractionCBS = outputDir + "/" + outputPrefix + ".af.igv.seg"
        File unsmoothedModeledSegments = outputDir + "/" + outputPrefix + ".modelBegin.seg"
        File unsmoothedCopyRatioParameters = outputDir + "/" + outputPrefix + ".modelBegin.cr.param"
        File unsmoothedAlleleFractionParameters = outputDir + "/" + outputPrefix + ".modelBegin.af.param"
        File modeledSegments = outputDir + "/" + outputPrefix + ".modelFinal.seg"
        File copyRatioParameters = outputDir + "/" + outputPrefix + ".modelFinal.cr.param"
        File alleleFractionParameters = outputDir + "/" + outputPrefix + ".modelFinal.af.param"
    }

    runtime {
        docker: dockerImage
        time_minute: timeMinutes
        memory: memory
    }

    parameter_meta {
        outputDir: {description: "The directory to write the ouput to.", category: "common"}
        outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"}
        denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"}
        allelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts.", category: "required" }
        normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"}
        minimumTotalAlleleCountCase: {description: "Equivalent to gatk ModelSeqments' `--minimum-total-allele-count-case` option.", category: "advanced"}
        maximumNumberOfSmoothingIterations: {description: "Equivalent to gatk ModelSeqments' `--maximum-number-of-smoothing-iterations` option.", category: "advanced"}

        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task MuTect2 {
    input {
        Array[File]+ inputBams
        Array[File]+ inputBamsIndex
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
        String outputVcf
        String tumorSample
        String? normalSample
        File? germlineResource
        File? germlineResourceIndex
        File? panelOfNormals
        File? panelOfNormalsIndex
        String f1r2TarGz = "f1r2.tar.gz"
        Array[File]+ intervals
        String outputStats = outputVcf + ".stats"

        String memory = "5G"
        String javaXmx = "4G"
        Int timeMinutes = 240
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputVcf})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        Mutect2 \
        -R ~{referenceFasta} \
        -I ~{sep=" -I " inputBams} \
        -tumor ~{tumorSample} \
        ~{"-normal " + normalSample} \
        ~{"--germline-resource " + germlineResource} \
        ~{"--panel-of-normals " + panelOfNormals} \
        ~{"--f1r2-tar-gz " + f1r2TarGz} \
        -O ~{outputVcf} \
        -L ~{sep=" -L " intervals}
    }

    output {
        File vcfFile = outputVcf
        File vcfFileIndex = outputVcf + ".tbi"
        File f1r2File = f1r2TarGz
        File stats = outputStats
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"}
        inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        outputVcf: {description: "The location to write the output VCF file to.", category: "required"}
        tumorSample: {description: "The name of the tumor/case sample.", category: "required"}
        normalSample: {description: "The name of the normal/control sample.", category: "common"}
        germlineResource: {description: "Equivalent to Mutect2's `--germline-resource` option.", category: "advanced"}
        germlineResourceIndex: {description: "The index for the germline resource.", category: "advanced"}
        panelOfNormals: {description: "Equivalent to Mutect2's `--panel-of-normals` option.", category: "advanced"}
        panelOfNormalsIndex: {description: "The index for the panel of normals.", category: "advanced"}
        f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"}
        intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"}
        outputStats: {description: "The location the output statistics should be written to.", category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task PlotDenoisedCopyRatios {
    input {
        File referenceFastaDict
        String outputDir = "."
        String outputPrefix
        File standardizedCopyRatios
        File denoisedCopyRatios
        Int? minimumContigLength

        String memory = "4G"
        String javaXmx = "3G"
        Int timeMinutes = 2
        String dockerImage = "broadinstitute/gatk:4.1.8.0"
    }

    command {
        set -e
        mkdir -p ~{outputDir}
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        PlotDenoisedCopyRatios \
        --standardized-copy-ratios ~{standardizedCopyRatios} \
        --denoised-copy-ratios ~{denoisedCopyRatios} \
        --sequence-dictionary ~{referenceFastaDict} \
        ~{"--minimum-contig-length " + minimumContigLength} \
        --output ~{outputDir} \
        --output-prefix ~{outputPrefix}
    }

    output {
        File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png"
        File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png"
        File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt"
        File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt"
        File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt"
        File deltaScaledMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".scaledDeltaMAD.txt"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"}
        outputDir: {description: "The directory to write the ouput to.", category: "common"}
        outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"}
        denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"}
        standardizedCopyRatios: {description: "The standardized copy ratios as generated by DenoiseReadCounts.", category: "required"}
        minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task PlotModeledSegments {
    input {
        File referenceFastaDict
        String outputDir = "."
        String outputPrefix
        File denoisedCopyRatios
        File segments
        File allelicCounts
        Int? minimumContigLength

        String memory = "4G"
        String javaXmx = "3G"
        Int timeMinutes = 2
        String dockerImage = "broadinstitute/gatk:4.1.8.0"
    }

    command {
        set -e
        mkdir -p ~{outputDir}
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        PlotModeledSegments \
        --denoised-copy-ratios ~{denoisedCopyRatios} \
        --allelic-counts ~{allelicCounts} \
        --segments ~{segments} \
        --sequence-dictionary ~{referenceFastaDict} \
        ~{"--minimum-contig-length " + minimumContigLength} \
        --output ~{outputDir} \
        --output-prefix ~{outputPrefix}
    }

    output {
        File modeledSegmentsPlot = outputDir + "/" + outputPrefix + ".modeled.png"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"}
        outputDir: {description: "The directory to write the ouput to.", category: "common"}
        outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"}
        denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"}
        segments: {description: "The modeled segments as generated by ModelSegments.", category: "required"}
        allelicCounts: {description: "The hetrozygous allelic counts as generated by ModelSegments.", category: "required"}
        minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task PreprocessIntervals {
    input {
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
        File? intervals
        String outputIntervalList = "bins.interval_list"
        Int binLength = if defined(intervals) then 0 else 1000
        Int padding = if defined(intervals) then 250 else 0
        String intervalMergingRule = "OVERLAPPING_ONLY"

        String memory = "4G"
        String javaXmx = "3G"
        Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6)
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputIntervalList})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        PreprocessIntervals \
        -R ~{referenceFasta} \
        --sequence-dictionary ~{referenceFastaDict} \
        --bin-length ~{binLength} \
        --padding ~{padding} \
        ~{"-L " + intervals} \
        --interval-merging-rule ~{intervalMergingRule} \
        -O ~{outputIntervalList}
    }

    output {
        File intervalList = outputIntervalList
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        referenceFasta: {description: "The reference fasta file..", category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"}
        outputIntervalList: {description: "The location the output should be written to.", category: "advanced"}
        binLength: {description: "The size of the bins to be created. Should be 0 for targeted/exome sequencing.", category: "advanced"}
        padding: {description: "The padding to be added to the bins. Should be 0 if contiguos binning is used, eg with WGS.", category: "advanced"}
        intervalMergingRule: {description: "Equivalent to gatk PreprocessIntervals' `--interval-merging-rule` option.", category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task SelectVariants {
    input {
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
        File inputVcf
        File inputVcfIndex
        String outputPath = "output.vcf.gz"
        String? selectTypeToInclude
        Array[File] intervals = []
        String memory = "5G"
        String javaXmx = "4G"
        Int timeMinutes = 60
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputPath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        SelectVariants \
        -R ~{referenceFasta} \
        -V ~{inputVcf} \
        ~{"--select-type-to-include " + selectTypeToInclude} \
        ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \
        -O ~{outputPath}
    }

    output {
        File outputVcf = outputPath
        File outputVcfIndex = outputPath + ".tbi"
    }

    runtime {
        docker: dockerImage
        time_minute: timeMinutes
        memory: memory
    }

    parameter_meta {
        inputVcf: {description: "The VCF input file.", category: "required"}
        inputVcfIndex: {description: "The input VCF file's index.", category: "required"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                             category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        selectTypeToInclude: {description: "Select only a certain type of variants from the input file", category: "common"}
        outputPath: {description: "The location the output VCF file should be written.", category: "advanced"}
        intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"}

        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task SplitNCigarReads {
    input {
        File inputBam
        File inputBamIndex
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
        String outputBam
        Array[File] intervals = []

        String memory = "5G"
        String javaXmx = "4G"
        Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used.
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputBam})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        SplitNCigarReads \
        -I ~{inputBam} \
        -R ~{referenceFasta} \
        -O ~{outputBam} \
        ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals}
    }

    output {
        File bam = outputBam
        File bamIndex = sub(outputBam, "\.bam$", ".bai")
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        inputBam: {description: "The BAM file for which spliced reads should be split.", category: "required"}
        inputBamIndex: {description: "The input BAM file's index.", category: "required"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                             category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        outputBam: {description: "The location the output BAM file should be written.", category: "required"}
        intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"}

        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}

task VariantEval {
    input {
        Array[File] evalVcfs
        Array[File] evalVcfsIndex
        Array[File] comparisonVcfs = []
        Array[File] comparisonVcfsIndex = []
        File? referenceFasta
        File? referenceFastaDict
        File? referenceFastaFai
        File? dbsnpVCF
        File? dbsnpVCFIndex
        Array[File] intervals = []
        String outputPath = "eval.table"
        Boolean doNotUseAllStandardModules = false 
        Boolean doNotUseAllStandardStratifications = false 
        Array[String] evalModules = []
        Array[String] stratificationModules = []
        Array[String] samples = []
        Boolean mergeEvals = false

        String memory = "5G"
        String javaXmx = "4G"
        # TODO: Refine estimate. For now 4 minutes per GB of input.
        Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20)
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputPath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        VariantEval \
        --output ~{outputPath} \
        ~{true="--eval" false="" length(evalVcfs) > 0} ~{sep=" --eval " evalVcfs} \
        ~{true="--comparison" false="" length(comparisonVcfs) > 0} ~{sep=" --comparison " comparisonVcfs} \
        ~{"-R " + referenceFasta} \
        ~{"--dbsnp " + dbsnpVCF } \
        ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \
        ~{true="--sample" false="" length(samples) > 0} ~{sep=' --sample ' samples} \
        ~{true="--do-not-use-all-standard-modules" false="" doNotUseAllStandardModules} \
        ~{true="--do-not-use-all-standard-stratifications" false="" doNotUseAllStandardStratifications} \
        ~{true="-EV" false="" length(evalModules) > 0} ~{sep=" -EV " evalModules} \
        ~{true="-ST" false="" length(stratificationModules) > 0} ~{sep=" -ST " stratificationModules} \
        ~{true="--merge-evals" false="" mergeEvals}
    }

    output {
        File table = outputPath
    }

    runtime {
        cpu: 1
        docker: dockerImage
        memory: memory
        time_minutes: timeMinutes
    }
    parameter_meta {
        evalVcfs: {description: "Variant sets to evaluate.", category: "required"}
        evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"}
        comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"}
        comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"}
        evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true)", category: "common"}
        stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true)", category: "common"}
        samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"}  # Advanced because this description is impossible to understand...
        mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track", category: "common"}
        doNotUseAllStandardModules: {description: "Do not use the standard modules by default (instead, only those that are specified with the evalModules option).", category: "common"}
        doNotUseAllStandardStratifications: {description: "Do not use the standard stratification modules by default (instead, only those that are specified with the stratificationModules option).", category: "common"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"}
        dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
        dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
        outputPath: {description: "The location the output table should be written.", category: "advanced"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}
task VariantFiltration {
    input {
        File inputVcf
        File inputVcfIndex
        File referenceFasta
        File referenceFastaDict
        File referenceFastaFai
        String outputPath = "filtered.vcf.gz"
        Array[String]+ filterArguments
        Array[File] intervals = []

        String memory = "5G"
        String javaXmx = "4G"
        Int timeMinutes = 120
        String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
    }

    command {
        set -e
        mkdir -p "$(dirname ~{outputPath})"
        gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \
        VariantFiltration \
        -I ~{inputVcf} \
        -R ~{referenceFasta} \
        -O ~{outputPath} \
        ~{sep=" " filterArguments} \
        ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals}
    }

    output {
        File filteredVcf = outputPath
        File filteredVcfIndex = outputPath + ".tbi"
    }

    runtime {
        docker: dockerImage
        time_minutes: timeMinutes
        memory: memory
    }

    parameter_meta {
        inputVcf: {description: "The VCF to be filtered.", category: "required"}
        inputVcfIndex: {description: "The input VCF file's index.", category: "required"}
        referenceFasta: {description: "The reference fasta file which was also used for mapping.",
                         category: "required"}
        referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
                             category: "required"}
        referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
        outputPath: {description: "The location the output VCF file should be written.", category: "common"}
        intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"}
        filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2']",
                        category: "required"}
        memory: {description: "The amount of memory this job will use.", category: "advanced"}
        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
                  category: "advanced"}
        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                      category: "advanced"}
    }
}