diff --git a/gatk.wdl b/gatk.wdl index f6d42e825356ebd6fb7d3a7de14afc11940f3833..f54934a049d8307d721f36431dcb6c2bee2cd9cf 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -251,6 +251,96 @@ task CalculateContamination { } } +task CallCopyRatioSegments { + input { + String outputPrefix + File copyRatioSegments + + String memory = "21G" + String javaXmx = "6G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p "$(~{outputPrefix})" + gatk --java-options -Xmx~{javaXmx} \ + CallCopyRatioSegments \ + -I ~{copyRatioSegments} \ + -O ~{outputPrefix}.called.seg + } + + output { + File calledSegments = outputPrefix + ".called.seg" + File calledSegmentsIgv = outputPrefix + ".called.igv.seg" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + outputPrefix: {description: "The prefix for the output files.", category: "required"} + copyRatioSegments: {description: "The copy ratios file generated by gatk ModelSegments.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CollectAllelicCounts { + input { + String allelicCountsPath = "allelic_counts.tsv" + File commonVariantSites + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String memory = "90G" + String javaXmx = "30G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p "$(dirname ~{allelicCountsPath})" + gatk --java-options -Xmx~{javaXmx} \ + CollectAllelicCounts \ + -L ~{commonVariantSites} \ + -I ~{inputBam} \ + -R ~{referenceFasta} \ + -O ~{allelicCountsPath} + } + + output { + File allelicCounts = allelicCountsPath + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + allelicCountsPath: {description: "The path the output should be written to.", category: "advanced"} + commonVariantSites: {description: "Interval list of common vairat sies (to retrieve the allelic counts for).", category: "required"} + inputBam: {description: "The BAM file to generate counts for.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectReadCounts { input { String countsPath = "readcounts.hdf5" @@ -290,7 +380,7 @@ task CollectReadCounts { } parameter_meta { - countsPath: {description: "The location the output should be written to.", category: "required"} + countsPath: {description: "The location the output should be written to.", category: "advanced"} intervals: {description: "The intervals to collect counts for.", category: "required"} inputBam: {description: "The BAM file to determine the coverage for.", category: "required"} inputBamIndex: {description: "The input BAM file's index.", category: "required"} @@ -463,7 +553,7 @@ task CreateReadCountPanelOfNormals { parameter_meta { PONpath: {description: "The location the PON should be written to.", category: "common"} - readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "advanced"} + readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "required"} annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -474,6 +564,54 @@ task CreateReadCountPanelOfNormals { } } +task DenoiseReadCounts { + input { + File? PON + File? annotatedIntervals + File readCounts + String outputPrefix + + String memory = "39G" + String javaXmx = "13G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + gatk --java-options -Xmx~{javaXmx} \ + DenoiseReadCounts \ + -I ~{readCounts} \ + ~{"--count-panel-of-normals " + PON} \ + ~{"--annotated-intervals " + annotatedIntervals} \ + --standardized-copy-ratios ~{outputPrefix}.standardizedCR.tsv \ + --denoised-copy-ratios ~{outputPrefix}.denoisedCR.tsv + } + + output { + File standardizedCopyRatios = outputPrefix + ".standardizedCR.tsv" + File denoisedCopyRatios = outputPrefix + ".denoisedCR.tsv" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"} + annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", + category: "advanced"} + readCounts: {description: "The read counts file as generated by CollectReadCounts.", category: "required"} + outputPrefix: {description: "The prefix for the output files.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task FilterMutectCalls { input { File referenceFasta @@ -836,6 +974,73 @@ task MergeStats { } } +task ModelSegments { + input { + String outputDir = "." + String outputPrefix + File denoisedCopyRatios + File allelicCounts + File? normalAllelicCounts + Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) + then 0 + else 30 + Int maximumNumberOfSmoothingIterations = 10 + + String memory = "64G" + String javaXmx = "10G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p ~{outputDir} + gatk --java-options -Xmx~{javaXmx} \ + ModelSegments \ + --denoised-copy-ratios ~{denoisedCopyRatios} \ + --allelic-counts ~{allelicCounts} \ + ~{"--normal-allelic-counts " + normalAllelicCounts} \ + --minimum-total-allele-count-case ~{minimumTotalAlleleCountCase} + --maximum-number-of-smoothing-iterations ~{maximumNumberOfSmoothingIterations} + --output ~{outputDir} \ + --output-prefix ~{outputPrefix} + } + + output { + File hetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.tsv" + File normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" + File copyRatioSegments = outputDir + "/" + outputPrefix + ".cr.seg" + File copyRatioCBS = outputDir + "/" + outputPrefix + ".cr.igv.seg" + File alleleFractionCBS = outputDir + "/" + outputPrefix + ".af.igv.seg" + File unsmoothedModeledSegments = outputDir + "/" + outputPrefix + ".modelBegin.seg" + File unsmoothedCopyRatioParameters = outputDir + "/" + outputPrefix + ".modelBegin.cr.param" + File unsmoothedAlleleFractionParameters = outputDir + "/" + outputPrefix + ".modelBegin.af.param" + File modeledSegments = outputDir + "/" + outputPrefix + ".modelFinal.seg" + File copyRatioParameters = outputDir + "/" + outputPrefix + ".modelFinal.cr.param" + File alleleFractionParameters = outputDir + "/" + outputPrefix + ".modelFinal.af.param" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory to write the ouput to.", category: "common"} + outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} + allelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts.", category: "required" } + normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"} + minimumTotalAlleleCountCase: {description: "Equivalent to gatk ModelSeqments' `--minimum-total-allele-count-case` option.", category: "advanced"} + maximumNumberOfSmoothingIterations: {description: "Equivalent to gatk ModelSeqments' `--maximum-number-of-smoothing-iterations` option.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task MuTect2 { input { Array[File]+ inputBams @@ -911,6 +1116,110 @@ task MuTect2 { } } +task PlotDenoisedCopyRatios { + input { + File referenceFastaDict + String outputDir = "." + String outputPrefix + File standardizedCopyRatios + File denoisedCopyRatios + + String memory = "21G" + String javaXmx = "7G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p ~{outputDir} + gatk --java-options -XmX~{javaXmx} \ + PlotDenoisedCopyRatios \ + --standardized-copy-ratios ~{standardizedCopyRatios} \ + --denoised-copy-ratios ~{denoisedCopyRatios} \ + --sequence-dictionary ~{referenceFastaDict} \ + --output ~{outputDir} \ + --output-prefix ~{outputPrefix} + } + + output { + File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png" + File denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" + File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt" + File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt" + File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt" + File deltaScaledMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".scaledDeltaMAD.txt" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} + outputDir: {description: "The directory to write the ouput to.", category: "common"} + outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} + standardizedCopyRatios: {description: "The standardized copy ratios as generated by DenoiseReadCounts.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task PlotModeledSegments { + input { + File referenceFastaDict + String outputDir = "." + String outputPrefix + File denoisedCopyRatios + File segments + File allelicCounts + + String memory = "21G" + String javaXmx = "7G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p ~{outputDir} + gatk --java-option -Xmx~{javaXmx} \ + PlotModeledSegments \ + --denoised-copy-ratios ~{denoisedCopyRatios} \ + --allelic-counts ~{allelicCounts} \ + --segments ~{segments} \ + --sequence-dictionary ~{referenceFastaDict} \ + --output ~{outputDir} \ + --output-prefix ~{outputPrefix} + } + + output { + File modeledSegmentsPlot = outputDir + "/" + outputPrefix + ".modeled.png" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} + outputDir: {description: "The directory to write the ouput to.", category: "common"} + outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} + segments: {description: "The modeled segments as generated by ModelSegments.", category: "required"} + allelicCounts: {description: "The hetrozygous allelic counts as generated by ModelSegments.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task PreprocessIntervals { input { File referenceFasta