From 0e6bde497f1a239692783c5e90c69bc38442f7de Mon Sep 17 00:00:00 2001 From: Ruben Vorderman <r.h.p.vorderman@lumc.nl> Date: Mon, 23 Jul 2018 13:29:14 +0200 Subject: [PATCH] gatk to 1.0 --- gatk.wdl | 296 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 155 insertions(+), 141 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index fbbc813..e6c530e 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1,19 +1,22 @@ +version 1.0 # Apply Base Quality Score Recalibration (BQSR) model task ApplyBQSR { - String? preCommand - File? gatkJar - File inputBam - File inputBamIndex - String outputBamPath - File recalibrationReport - Array[File]+ sequenceGroupInterval - File refDict - File refFasta - File refFastaIndex - Int? compressionLevel - - Float? memory - Float? memoryMultiplier + input { + String? preCommand + File? gatkJar + File inputBam + File inputBamIndex + String outputBamPath + File recalibrationReport + Array[File]+ sequenceGroupInterval + File refDict + File refFasta + File refFastaIndex + Int? compressionLevel + + Float? memory + Float? memoryMultiplier + } Int mem = ceil(select_first([memory, 4.0])) @@ -23,18 +26,18 @@ task ApplyBQSR { command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ + ~{preCommand} + ~{toolCommand} \ ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ - -R ${refFasta} \ - -I ${inputBam} \ + -R ~{refFasta} \ + -I ~{inputBam} \ --use-original-qualities \ - -O ${outputBamPath} \ - -bqsr ${recalibrationReport} \ + -O ~{outputBamPath} \ + -bqsr ~{recalibrationReport} \ --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \ - -L ${sep=" -L " sequenceGroupInterval} + -L ~{sep=" -L " sequenceGroupInterval} } output { @@ -49,19 +52,23 @@ task ApplyBQSR { # Generate Base Quality Score Recalibration (BQSR) model task BaseRecalibrator { - String? preCommand - File? gatkJar - File inputBam - File inputBamIndex - String recalibrationReportPath - Array[File]+ sequenceGroupInterval - Array[File]? knownIndelsSitesVCFs - Array[File]? knownIndelsSitesIndices - File? dbsnpVCF - File? dbsnpVCFindex - File refDict - File refFasta - File refFastaIndex + input { + String? preCommand + File? gatkJar + File inputBam + File inputBamIndex + String recalibrationReportPath + Array[File]+ sequenceGroupInterval + Array[File]? knownIndelsSitesVCFs + Array[File]? knownIndelsSitesIndices + File? dbsnpVCF + File? dbsnpVCFindex + File refDict + File refFasta + File refFastaIndex + Float? memory + Float? memoryMultiplier + } Array[File]+ knownIndelsSitesVCFsArg = flatten([ select_first([knownIndelsSitesVCFs, []]), @@ -72,9 +79,6 @@ task BaseRecalibrator { select_all([dbsnpVCFindex]) ]) - Float? memory - Float? memoryMultiplier - Int mem = ceil(select_first([memory, 4.0])) String toolCommand = if defined(gatkJar) @@ -83,15 +87,15 @@ task BaseRecalibrator { command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ + ~{preCommand} + ~{toolCommand} \ BaseRecalibrator \ - -R ${refFasta} \ - -I ${inputBam} \ + -R ~{refFasta} \ + -I ~{inputBam} \ --use-original-qualities \ - -O ${recalibrationReportPath} \ - --known-sites ${sep=" --known-sites " knownIndelsSitesVCFsArg} \ - -L ${sep=" -L " sequenceGroupInterval} + -O ~{recalibrationReportPath} \ + --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \ + -L ~{sep=" -L " sequenceGroupInterval} } output { @@ -104,22 +108,24 @@ task BaseRecalibrator { } task CombineGVCFs { - String? preCommand - Array[File]+ gvcfFiles - Array[File]+ gvcfFileIndexes - Array[File]+ intervals + input { + String? preCommand + Array[File]+ gvcfFiles + Array[File]+ gvcfFileIndexes + Array[File]+ intervals - String outputPath + String outputPath - String? gatkJar + String? gatkJar - File refFasta - File refFastaIndex - File refDict + File refFasta + File refFastaIndex + File refDict - Int? compressionLevel - Float? memory - Float? memoryMultiplier + Int? compressionLevel + Float? memory + Float? memoryMultiplier + } Int mem = ceil(select_first([memory, 4.0])) @@ -129,18 +135,18 @@ task CombineGVCFs { command { set -e -o pipefail - ${preCommand} + ~{preCommand} - if [ ${length(gvcfFiles)} -gt 1 ]; then - ${toolCommand} \ + if [ ~{length(gvcfFiles)} -gt 1 ]; then + ~{toolCommand} \ CombineGVCFs \ - -R ${refFasta} \ - -O ${outputPath} \ - -V ${sep=' -V ' gvcfFiles} \ - -L ${sep=' -L ' intervals} + -R ~{refFasta} \ + -O ~{outputPath} \ + -V ~{sep=' -V ' gvcfFiles} \ + -L ~{sep=' -L ' intervals} else # TODO this should be handeled in wdl - ln -sf ${select_first(gvcfFiles)} ${outputPath} - ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi + ln -sf ~{select_first(gvcfFiles)} ~{outputPath} + ln -sf ~{select_first(gvcfFileIndexes)} ~{outputPath}.tbi fi } @@ -156,13 +162,15 @@ task CombineGVCFs { # Combine multiple recalibration tables from scattered BaseRecalibrator runs task GatherBqsrReports { - String? preCommand - String? gatkJar - Array[File] inputBQSRreports - String outputReportPath - - Float? memory - Float? memoryMultiplier + input { + String? preCommand + String? gatkJar + Array[File] inputBQSRreports + String outputReportPath + + Float? memory + Float? memoryMultiplier + } Int mem = ceil(select_first([memory, 4.0])) @@ -172,11 +180,11 @@ task GatherBqsrReports { command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ + ~{preCommand} + ~{toolCommand} \ GatherBQSRReports \ - -I ${sep=' -I ' inputBQSRreports} \ - -O ${outputReportPath} + -I ~{sep=' -I ' inputBQSRreports} \ + -O ~{outputReportPath} } output { @@ -189,25 +197,27 @@ task GatherBqsrReports { } task GenotypeGVCFs { - String? preCommand - File gvcfFiles - File gvcfFileIndexes - Array[File]+ intervals + input { + String? preCommand + File gvcfFiles + File gvcfFileIndexes + Array[File]+ intervals - String outputPath + String outputPath - String? gatkJar + String? gatkJar - File refFasta - File refFastaIndex - File refDict + File refFasta + File refFastaIndex + File refDict - File? dbsnpVCF - File? dbsnpVCFindex + File? dbsnpVCF + File? dbsnpVCFindex - Int? compressionLevel - Float? memory - Float? memoryMultiplier + Int? compressionLevel + Float? memory + Float? memoryMultiplier + } Int mem = ceil(select_first([memory, 4.0])) @@ -217,18 +227,18 @@ task GenotypeGVCFs { command { set -e -o pipefail - ${preCommand} + ~{preCommand} - ${toolCommand} \ + ~{toolCommand} \ GenotypeGVCFs \ - -R ${refFasta} \ - -O ${outputPath} \ - ${"-D " + dbsnpVCF} \ + -R ~{refFasta} \ + -O ~{outputPath} \ + ~{"-D " + dbsnpVCF} \ -G StandardAnnotation \ --only-output-calls-starting-in-intervals \ -new-qual \ - -V ${gvcfFiles} \ - -L ${sep=' -L ' intervals} + -V ~{gvcfFiles} \ + -L ~{sep=' -L ' intervals} } output { @@ -243,23 +253,25 @@ task GenotypeGVCFs { # Call variants on a single sample with HaplotypeCaller to produce a GVCF task HaplotypeCallerGvcf { - String? preCommand - Array[File]+ inputBams - Array[File]+ inputBamsIndex - Array[File]+ intervalList - String gvcfPath - File refDict - File refFasta - File refFastaIndex - Float? contamination - Int? compressionLevel - String? gatkJar - - File? dbsnpVCF - File? dbsnpVCFindex - - Float? memory - Float? memoryMultiplier + input { + String? preCommand + Array[File]+ inputBams + Array[File]+ inputBamsIndex + Array[File]+ intervalList + String gvcfPath + File refDict + File refFasta + File refFastaIndex + Float? contamination + Int? compressionLevel + String? gatkJar + + File? dbsnpVCF + File? dbsnpVCFindex + + Float? memory + Float? memoryMultiplier + } Int mem = ceil(select_first([memory, 4.0])) String toolCommand = if defined(gatkJar) @@ -268,15 +280,15 @@ task HaplotypeCallerGvcf { command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ + ~{preCommand} + ~{toolCommand} \ HaplotypeCaller \ - -R ${refFasta} \ - -O ${gvcfPath} \ - -I ${sep=" -I " inputBams} \ - -L ${sep=' -L ' intervalList} \ - ${"-D " + dbsnpVCF} \ - -contamination ${default=0 contamination} \ + -R ~{refFasta} \ + -O ~{gvcfPath} \ + -I ~{sep=" -I " inputBams} \ + -L ~{sep=' -L ' intervalList} \ + ~{"-D " + dbsnpVCF} \ + -contamination ~{default=0 contamination} \ -ERC GVCF } @@ -291,19 +303,21 @@ task HaplotypeCallerGvcf { } task SplitNCigarReads { - String? preCommand - - File inputBam - File inputBamIndex - File refFasta - File refFastaIndex - File refDict - String outputBam - String? gatkJar - Array[File]+ intervals - - Float? memory - Float? memoryMultiplier + input { + String? preCommand + + File inputBam + File inputBamIndex + File refFasta + File refFastaIndex + File refDict + String outputBam + String? gatkJar + Array[File]+ intervals + + Float? memory + Float? memoryMultiplier + } Int mem = ceil(select_first([memory, 4.0])) String toolCommand = if defined(gatkJar) @@ -312,13 +326,13 @@ task SplitNCigarReads { command { set -e -o pipefail - ${preCommand} - ${toolCommand} \ + ~{preCommand} + ~{toolCommand} \ SplitNCigarReads \ - -I ${inputBam} \ - -R ${refFasta} \ - -O ${outputBam} \ - -L ${sep=' -L ' intervals} + -I ~{inputBam} \ + -R ~{refFasta} \ + -O ~{outputBam} \ + -L ~{sep=' -L ' intervals} } output { -- GitLab