-
António Paulo authoredAntónio Paulo authored
gatk.wdl 13.91 KiB
version 1.0
# Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR {
input {
File inputBam
File inputBamIndex
String outputBamPath
File recalibrationReport
Array[File]+ sequenceGroupInterval
File referenceFasta
File referenceFastaDict
File referenceFastaFai
Int memory = 4
Float memoryMultiplier = 3.0
String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
}
command {
set -e
mkdir -p $(dirname ~{outputBamPath})
gatk --java-options -Xmx~{memory}G \
ApplyBQSR \
--create-output-bam-md5 \
--add-output-sam-program-record \
-R ~{referenceFasta} \
-I ~{inputBam} \
--use-original-qualities \
-O ~{outputBamPath} \
-bqsr ~{recalibrationReport} \
--static-quantized-quals 10 \
--static-quantized-quals 20 \
--static-quantized-quals 30 \
-L ~{sep=" -L " sequenceGroupInterval}
}
output {
File recalibratedBam = outputBamPath
File recalibratedBamIndex = sub(outputBamPath, "\.bam$", ".bai")
File recalibratedBamMd5 = outputBamPath + ".md5"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
# Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator {
input {
File inputBam
File inputBamIndex
String recalibrationReportPath
Array[File]+ sequenceGroupInterval
Array[File]? knownIndelsSitesVCFs
Array[File]? knownIndelsSitesVCFIndexes
File? dbsnpVCF
File? dbsnpVCFIndex
File referenceFasta
File referenceFastaDict
File referenceFastaFai
Int memory = 4
Float memoryMultiplier = 3.0
String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
}
Array[File]+ knownIndelsSitesVCFsArg = flatten([
select_first([knownIndelsSitesVCFs, []]),
[select_first([dbsnpVCF])]
])
command {
set -e
mkdir -p $(dirname ~{recalibrationReportPath})
gatk --java-options -Xmx~{memory}G \
BaseRecalibrator \
-R ~{referenceFasta} \
-I ~{inputBam} \
--use-original-qualities \
-O ~{recalibrationReportPath} \
--known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \
-L ~{sep=" -L " sequenceGroupInterval}
}
output {
File recalibrationReport = recalibrationReportPath
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task CombineGVCFs {
input {
Array[File]+ gvcfFiles
Array[File]+ gvcfFilesIndex
Array[File]+ intervals
String outputPath
File referenceFasta
File referenceFastaDict
File referenceFastaFai
Int memory = 4
Float memoryMultiplier = 3.0
String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
}
command {
set -e
mkdir -p $(dirname ~{outputPath})
gatk --java-options -Xmx~{memory}G \
CombineGVCFs \
-R ~{referenceFasta} \
-O ~{outputPath} \
-V ~{sep=' -V ' gvcfFiles} \
-L ~{sep=' -L ' intervals}
}
output {
File outputVcf = outputPath
File outputVcfIndex = outputPath + ".tbi"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
input {
Array[File] inputBQSRreports
String outputReportPath
Int memory = 4
Float memoryMultiplier = 3.0
String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
}
command {
set -e
mkdir -p $(dirname ~{outputReportPath})
gatk --java-options -Xmx~{memory}G \
GatherBQSRReports \
-I ~{sep=' -I ' inputBQSRreports} \
-O ~{outputReportPath}
}
output {
File outputBQSRreport = outputReportPath
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task GenotypeGVCFs {
input {
Array[File]+ gvcfFiles
Array[File]+ gvcfFilesIndex
Array[File]+ intervals
String outputPath
File referenceFasta
File referenceFastaDict
File referenceFastaFai
File? dbsnpVCF
File? dbsnpVCFIndex
Int memory = 6
Float memoryMultiplier = 2.0
String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
}
command {
set -e
mkdir -p $(dirname ~{outputPath})
gatk --java-options -Xmx~{memory}G \
GenotypeGVCFs \
-R ~{referenceFasta} \
-O ~{outputPath} \
~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \
-G StandardAnnotation \
--only-output-calls-starting-in-intervals \
-new-qual \
-V ~{sep=' -V ' gvcfFiles} \
-L ~{sep=' -L ' intervals}
}
output {
File outputVCF = outputPath
File outputVCFIndex = outputPath + ".tbi"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
input {
Array[File]+ inputBams
Array[File]+ inputBamsIndex
Array[File]+ intervalList
String gvcfPath
File referenceFasta
File referenceFastaIndex
File referenceFastaDict
Float contamination = 0.0
File? dbsnpVCF
File? dbsnpVCFIndex
Int memory = 4
Float memoryMultiplier = 3
String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
}
command {
set -e
mkdir -p $(dirname ~{gvcfPath})
gatk --java-options -Xmx~{memory}G \
HaplotypeCaller \
-R ~{referenceFasta} \
-O ~{gvcfPath} \
-I ~{sep=" -I " inputBams} \
-L ~{sep=' -L ' intervalList} \
~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \
-contamination ~{contamination} \
-ERC GVCF
}
output {
File outputGVCF = gvcfPath
File outputGVCFIndex = gvcfPath + ".tbi"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task MuTect2 {
input {
Array[File]+ inputBams
Array[File]+ inputBamsIndex
File referenceFasta
File referenceFastaDict
File referenceFastaFai
String outputVcf
String tumorSample
String? normalSample
File? germlineResource
File? germlineResourceIndex
File? panelOfNormals
File? panelOfNormalsIndex
String f1r2TarGz = "f1r2.tar.gz"
Array[File]+ intervals
String outputStats = outputVcf + ".stats"
Int memory = 4
Float memoryMultiplier = 3
String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
}
command {
set -e
mkdir -p $(dirname ~{outputVcf})
gatk --java-options -Xmx~{memory}G \
Mutect2 \
-R ~{referenceFasta} \
-I ~{sep=" -I " inputBams} \
-tumor ~{tumorSample} \
~{"-normal " + normalSample} \
~{"--germline-resource " + germlineResource} \
~{"--panel-of-normals " + panelOfNormals} \
~{"--f1r2-tar-gz " + f1r2TarGz} \
-O ~{outputVcf} \
-L ~{sep=" -L " intervals}
}
output {
File vcfFile = outputVcf
File vcfFileIndex = outputVcf + ".tbi"
File f1r2File = f1r2TarGz
File stats = outputStats
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task LearnReadOrientationModel {
input {
Array[File]+ f1r2TarGz
Int memory = 8
Float memoryMultiplier = 1.5
String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
}
command {
set -e
gatk --java-options -Xmx~{memory}G \
LearnReadOrientationModel \
-I ~{sep=" -I " f1r2TarGz} \
-O "artifact-priors.tar.gz"
}
output {
File artifactPriorsTable = "artifact-priors.tar.gz"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task MergeStats {
input {
Array[File]+ stats
Int memory = 2
Float memoryMultiplier = 1.5
String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
}
command {
set -e
gatk --java-options -Xmx~{memory}G \
MergeMutectStats \
-stats ~{sep=" -stats " stats} \
-O "merged.stats"
}
output {
File mergedStats = "merged.stats"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task GetPileupSummaries {
input {
File sampleBam
File sampleBamIndex
File variantsForContamination
File variantsForContaminationIndex
File sitesForContamination
File sitesForContaminationIndex
String outputPrefix
Int memory = 4
Float memoryMultiplier = 1.5
String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
}
command {
set -e
gatk --java-options -Xmx~{memory}G \
GetPileupSummaries \
-I ~{sampleBam} \
-V ~{variantsForContamination} \
-L ~{sitesForContamination} \
-O ~{outputPrefix + "-pileups.table"}
}
output {
File pileups = outputPrefix + "-pileups.table"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task CalculateContamination {
input {
File tumorPileups
File? normalPileups
Int memory = 4
Float memoryMultiplier = 1.5
String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
}
command {
set -e
gatk --java-options -Xmx~{memory}G \
CalculateContamination \
-I ~{tumorPileups} \
~{"-matched " + normalPileups} \
-O "contamination.table" \
--tumor-segmentation "segments.table"
}
output {
File contaminationTable = "contamination.table"
File mafTumorSegments = "segments.table"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task FilterMutectCalls {
input {
File referenceFasta
File referenceFastaFai
File referenceFastaDict
File unfilteredVcf
File unfilteredVcfIndex
String outputVcf
File? contaminationTable
File? mafTumorSegments
File? artifactPriors
Int uniqueAltReadCount = 4
File mutect2Stats
String? extraArgs
Int memory = 4
Float memoryMultiplier = 1.5
String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1"
}
command {
set -e
mkdir -p $(dirname ~{outputVcf})
gatk --java-options -Xmx~{memory}G \
FilterMutectCalls \
-R ~{referenceFasta} \
-V ~{unfilteredVcf} \
-O ~{outputVcf} \
~{"--contamination-table " + contaminationTable} \
~{"--tumor-segmentation " + mafTumorSegments} \
~{"--ob-priors " + artifactPriors} \
~{"--unique-alt-read-count " + uniqueAltReadCount} \
~{"-stats " + mutect2Stats} \
--filtering-stats "filtering.stats" \
--showHidden \
~{extraArgs}
}
output {
File filteredVcf = outputVcf
File filteredVcfIndex = outputVcf + ".tbi"
File filteringStats = "filtering.stats"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task SplitNCigarReads {
input {
File inputBam
File inputBamIndex
File referenceFasta
File referenceFastaDict
File referenceFastaFai
String outputBam
Array[File]+ intervals
Int memory = 4
Float memoryMultiplier = 4
String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0"
}
command {
set -e
mkdir -p $(dirname ~{outputBam})
gatk --java-options -Xmx~{memory}G \
SplitNCigarReads \
-I ~{inputBam} \
-R ~{referenceFasta} \
-O ~{outputBam} \
-L ~{sep=' -L ' intervals}
}
output {
File bam = outputBam
File bamIndex = sub(outputBam, "\.bam$", ".bai")
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}
task CombineVariants {
input {
String installDir = "/usr" # .jar location in the docker image
File referenceFasta
File referenceFastaFai
File referenceFastaDict
String genotypeMergeOption = "UNIQUIFY"
String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED"
Array[String]+ identifiers
Array[File]+ variantVcfs # follow "identifiers" array order
Array[File]+ variantIndexes
String outputPath
Int memory = 4
Float memoryMultiplier = 1.5
String dockerImage = "broadinstitute/gatk3:3.8-1"
}
command <<<
set -e -o pipefail
mkdir -p $(dirname "~{outputPath}")
# build "-V:<ID> <file.vcf>" arguments according to IDs and VCFs to merge
ids=(~{sep=" " identifiers})
vars=(~{sep=" " variantVcfs})
V_lines=`for ((i=0;i<${#ids[@]};++i)); do printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}"; done`
java -Xmx~{memory}G -jar ~{installDir}/GenomeAnalysisTK.jar \
-T CombineVariants \
-R ~{referenceFasta} \
--genotypemergeoption ~{genotypeMergeOption} \
--filteredrecordsmergetype ~{filteredRecordsMergeType} \
--out ~{outputPath} \
$V_lines
>>>
output {
File combinedVcf = outputPath
File combinedVcfIndex = outputPath + ".tbi"
}
runtime {
docker: dockerImage
memory: ceil(memory * memoryMultiplier)
}
}