-
Peter van 't Hof authoredPeter van 't Hof authored
gatk.wdl 8.78 KiB
version 1.0
import "common.wdl"
# Apply Base Quality Score Recalibration (BQSR) model
task ApplyBQSR {
input {
String? preCommand
File? gatkJar
IndexedBamFile inputBam
String outputBamPath
File recalibrationReport
Array[File]+ sequenceGroupInterval
Reference reference
Int memory = 4
Float memoryMultiplier = 3.0
}
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command {
set -e -o pipefail
~{preCommand}
~{toolCommand} \
ApplyBQSR \
--create-output-bam-md5 \
--add-output-sam-program-record \
-R ~{reference.fasta} \
-I ~{inputBam.file} \
--use-original-qualities \
-O ~{outputBamPath} \
-bqsr ~{recalibrationReport} \
--static-quantized-quals 10 \
--static-quantized-quals 20 \
--static-quantized-quals 30 \
-L ~{sep=" -L " sequenceGroupInterval}
}
output {
IndexedBamFile recalibratedBam = {
"file": outputBamPath,
"index": sub(outputBamPath, "\.bam$", ".bai"),
"md5": outputBamPath + ".md5"
}
}
runtime {
memory: ceil(memory * memoryMultiplier)
}
}
# Generate Base Quality Score Recalibration (BQSR) model
task BaseRecalibrator {
input {
String? preCommand
File? gatkJar
IndexedBamFile inputBam
String recalibrationReportPath
Array[File]+ sequenceGroupInterval
Array[File]? knownIndelsSitesVCFs
Array[File]? knownIndelsSitesVCFIndexes
IndexedVcfFile? dbsnpVCF
Reference reference
Int memory = 4
Float memoryMultiplier = 3.0
}
Array[File]+ knownIndelsSitesVCFsArg = flatten([
select_first([knownIndelsSitesVCFs, []]),
[select_first([dbsnpVCF]).file]
])
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command {
set -e -o pipefail
~{preCommand}
~{toolCommand} \
BaseRecalibrator \
-R ~{reference.fasta} \
-I ~{inputBam.file} \
--use-original-qualities \
-O ~{recalibrationReportPath} \
--known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \
-L ~{sep=" -L " sequenceGroupInterval}
}
output {
File recalibrationReport = recalibrationReportPath
}
runtime {
memory: ceil(memory * memoryMultiplier)
}
}
task CombineGVCFs {
input {
String? preCommand
Array[File]+ gvcfFiles
Array[File]+ gvcfFilesIndex
Array[File]+ intervals
String outputPath
String? gatkJar
Reference reference
Int memory = 4
Float memoryMultiplier = 3.0
}
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command {
set -e -o pipefail
~{preCommand}
if [ ~{length(gvcfFiles)} -gt 1 ]; then
~{toolCommand} \
CombineGVCFs \
-R ~{reference.fasta} \
-O ~{outputPath} \
-V ~{sep=' -V ' gvcfFiles} \
-L ~{sep=' -L ' intervals}
else # TODO this should be handeled in wdl
ln -sf ~{gvcfFiles[0]} ~{outputPath}
ln -sf ~{gvcfFiles[0]} ~{outputPath}.tbi
fi
}
output {
IndexedVcfFile outputVCF = {
"file": outputPath,
"index": outputPath + ".tbi"
}
}
runtime {
memory: ceil(memory * memoryMultiplier)
}
}
# Combine multiple recalibration tables from scattered BaseRecalibrator runs
task GatherBqsrReports {
input {
String? preCommand
String? gatkJar
Array[File] inputBQSRreports
String outputReportPath
Int memory = 4
Float memoryMultiplier = 3.0
}
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command {
set -e -o pipefail
~{preCommand}
~{toolCommand} \
GatherBQSRReports \
-I ~{sep=' -I ' inputBQSRreports} \
-O ~{outputReportPath}
}
output {
File outputBQSRreport = outputReportPath
}
runtime {
memory: ceil(memory * memoryMultiplier)
}
}
task GenotypeGVCFs {
input {
String? preCommand
Array[File]+ gvcfFiles
Array[File]+ gvcfFilesIndex
Array[File]+ intervals
String outputPath
String? gatkJar
Reference reference
IndexedVcfFile? dbsnpVCF
Int memory = 6
Float memoryMultiplier = 2.0
}
String dbsnpArg = if defined(dbsnpVCF) then "-D " + select_first([dbsnpVCF]).file else ""
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command {
set -e -o pipefail
~{preCommand}
~{toolCommand} \
GenotypeGVCFs \
-R ~{reference.fasta} \
-O ~{outputPath} \
~{dbsnpArg} \
-G StandardAnnotation \
--only-output-calls-starting-in-intervals \
-new-qual \
-V ~{sep=' -V ' gvcfFiles} \
-L ~{sep=' -L ' intervals}
}
output {
IndexedVcfFile outputVCF = {
"file": outputPath,
"index": outputPath + ".tbi"
}
}
runtime{
memory: ceil(memory * memoryMultiplier)
}
}
# Call variants on a single sample with HaplotypeCaller to produce a GVCF
task HaplotypeCallerGvcf {
input {
String? preCommand
Array[File]+ inputBams
Array[File]+ inputBamsIndex
Array[File]+ intervalList
String gvcfPath
Reference reference
Float contamination = 0.0
String? gatkJar
IndexedVcfFile? dbsnpVCF
Int memory = 4
Float memoryMultiplier = 3
}
String dbsnpArg = if (defined(dbsnpVCF)) then "-D " + select_first([dbsnpVCF]).file else ""
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command {
set -e -o pipefail
~{preCommand}
~{toolCommand} \
HaplotypeCaller \
-R ~{reference.fasta} \
-O ~{gvcfPath} \
-I ~{sep=" -I " inputBams} \
-L ~{sep=' -L ' intervalList} \
~{dbsnpArg} \
-contamination ~{contamination} \
-ERC GVCF
}
output {
IndexedVcfFile outputGVCF = {
"file": gvcfPath,
"index": gvcfPath + ".tbi"
}
}
runtime {
memory: ceil(memory * memoryMultiplier)
}
}
task MuTect2 {
input {
String? preCommand
Array[File]+ inputBams
Array[File]+ inputBamsIndex
Reference reference
String outputVcf
String tumorSample
String? normalSample
Array[File]+ intervals
String? gatkJar
Int memory = 4
Float memoryMultiplier = 3
}
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command {
set -e -o pipefail
~{preCommand}
~{toolCommand} \
Mutect2 \
-R ~{reference.fasta} \
-I ~{sep=" -I " inputBams} \
-tumor ~{tumorSample} \
~{"-normal " + normalSample} \
-O ~{outputVcf} \
-L ~{sep=" -L " intervals}
}
output {
IndexedVcfFile vcfFile = {
"file": outputVcf,
"index": outputVcf + ".tbi"
}
}
runtime {
memory: ceil(memory * memoryMultiplier)
}
}
task SplitNCigarReads {
input {
String? preCommand
IndexedBamFile inputBam
Reference reference
String outputBam
String? gatkJar
Array[File]+ intervals
Int memory = 4
Float memoryMultiplier = 3
}
String toolCommand = if defined(gatkJar)
then "java -Xmx" + memory + "G -jar " + gatkJar
else "gatk --java-options -Xmx" + memory + "G"
command {
set -e -o pipefail
~{preCommand}
~{toolCommand} \
SplitNCigarReads \
-I ~{inputBam.file} \
-R ~{reference.fasta} \
-O ~{outputBam} \
-L ~{sep=' -L ' intervals}
}
output {
IndexedBamFile bam = {
"file": outputBam,
"index": sub(outputBam, "\.bam$", ".bai")
}
}
runtime {
memory: ceil(memory * memoryMultiplier)
}
}