From 0e6bde497f1a239692783c5e90c69bc38442f7de Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 23 Jul 2018 13:29:14 +0200
Subject: [PATCH] gatk to 1.0

---
 gatk.wdl | 296 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 155 insertions(+), 141 deletions(-)

diff --git a/gatk.wdl b/gatk.wdl
index fbbc813..e6c530e 100644
--- a/gatk.wdl
+++ b/gatk.wdl
@@ -1,19 +1,22 @@
+version 1.0
 # Apply Base Quality Score Recalibration (BQSR) model
 task ApplyBQSR {
-    String? preCommand
-    File? gatkJar
-    File inputBam
-    File inputBamIndex
-    String outputBamPath
-    File recalibrationReport
-    Array[File]+ sequenceGroupInterval
-    File refDict
-    File refFasta
-    File refFastaIndex
-    Int? compressionLevel
-
-    Float? memory
-    Float? memoryMultiplier
+    input {
+        String? preCommand
+        File? gatkJar
+        File inputBam
+        File inputBamIndex
+        String outputBamPath
+        File recalibrationReport
+        Array[File]+ sequenceGroupInterval
+        File refDict
+        File refFasta
+        File refFastaIndex
+        Int? compressionLevel
+
+        Float? memory
+        Float? memoryMultiplier
+    }
 
     Int mem = ceil(select_first([memory, 4.0]))
 
@@ -23,18 +26,18 @@ task ApplyBQSR {
 
     command {
         set -e -o pipefail
-        ${preCommand}
-        ${toolCommand} \
+        ~{preCommand}
+        ~{toolCommand} \
           ApplyBQSR \
           --create-output-bam-md5 \
           --add-output-sam-program-record \
-          -R ${refFasta} \
-          -I ${inputBam} \
+          -R ~{refFasta} \
+          -I ~{inputBam} \
           --use-original-qualities \
-          -O ${outputBamPath} \
-          -bqsr ${recalibrationReport} \
+          -O ~{outputBamPath} \
+          -bqsr ~{recalibrationReport} \
           --static-quantized-quals 10 --static-quantized-quals 20 --static-quantized-quals 30 \
-          -L ${sep=" -L " sequenceGroupInterval}
+          -L ~{sep=" -L " sequenceGroupInterval}
     }
 
     output {
@@ -49,19 +52,23 @@ task ApplyBQSR {
 
 # Generate Base Quality Score Recalibration (BQSR) model
 task BaseRecalibrator {
-    String? preCommand
-    File? gatkJar
-    File inputBam
-    File inputBamIndex
-    String recalibrationReportPath
-    Array[File]+ sequenceGroupInterval
-    Array[File]? knownIndelsSitesVCFs
-    Array[File]? knownIndelsSitesIndices
-    File? dbsnpVCF
-    File? dbsnpVCFindex
-    File refDict
-    File refFasta
-    File refFastaIndex
+    input {
+        String? preCommand
+        File? gatkJar
+        File inputBam
+        File inputBamIndex
+        String recalibrationReportPath
+        Array[File]+ sequenceGroupInterval
+        Array[File]? knownIndelsSitesVCFs
+        Array[File]? knownIndelsSitesIndices
+        File? dbsnpVCF
+        File? dbsnpVCFindex
+        File refDict
+        File refFasta
+        File refFastaIndex
+        Float? memory
+        Float? memoryMultiplier
+    }
 
     Array[File]+ knownIndelsSitesVCFsArg = flatten([
         select_first([knownIndelsSitesVCFs, []]),
@@ -72,9 +79,6 @@ task BaseRecalibrator {
         select_all([dbsnpVCFindex])
     ])
 
-    Float? memory
-    Float? memoryMultiplier
-
     Int mem = ceil(select_first([memory, 4.0]))
 
     String toolCommand = if defined(gatkJar)
@@ -83,15 +87,15 @@ task BaseRecalibrator {
 
     command {
         set -e -o pipefail
-        ${preCommand}
-        ${toolCommand} \
+        ~{preCommand}
+        ~{toolCommand} \
           BaseRecalibrator \
-          -R ${refFasta} \
-          -I ${inputBam} \
+          -R ~{refFasta} \
+          -I ~{inputBam} \
           --use-original-qualities \
-          -O ${recalibrationReportPath} \
-          --known-sites ${sep=" --known-sites " knownIndelsSitesVCFsArg} \
-          -L ${sep=" -L " sequenceGroupInterval}
+          -O ~{recalibrationReportPath} \
+          --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \
+          -L ~{sep=" -L " sequenceGroupInterval}
     }
 
     output {
@@ -104,22 +108,24 @@ task BaseRecalibrator {
 }
 
 task CombineGVCFs {
-    String? preCommand
-    Array[File]+ gvcfFiles
-    Array[File]+ gvcfFileIndexes
-    Array[File]+ intervals
+    input {
+        String? preCommand
+        Array[File]+ gvcfFiles
+        Array[File]+ gvcfFileIndexes
+        Array[File]+ intervals
 
-    String outputPath
+        String outputPath
 
-    String? gatkJar
+        String? gatkJar
 
-    File refFasta
-    File refFastaIndex
-    File refDict
+        File refFasta
+        File refFastaIndex
+        File refDict
 
-    Int? compressionLevel
-    Float? memory
-    Float? memoryMultiplier
+        Int? compressionLevel
+        Float? memory
+        Float? memoryMultiplier
+    }
 
     Int mem = ceil(select_first([memory, 4.0]))
 
@@ -129,18 +135,18 @@ task CombineGVCFs {
 
     command {
         set -e -o pipefail
-        ${preCommand}
+        ~{preCommand}
 
-        if [ ${length(gvcfFiles)} -gt 1 ]; then
-            ${toolCommand} \
+        if [ ~{length(gvcfFiles)} -gt 1 ]; then
+            ~{toolCommand} \
              CombineGVCFs \
-             -R ${refFasta} \
-             -O ${outputPath} \
-             -V ${sep=' -V ' gvcfFiles} \
-             -L ${sep=' -L ' intervals}
+             -R ~{refFasta} \
+             -O ~{outputPath} \
+             -V ~{sep=' -V ' gvcfFiles} \
+             -L ~{sep=' -L ' intervals}
         else # TODO this should be handeled in wdl
-            ln -sf ${select_first(gvcfFiles)} ${outputPath}
-            ln -sf ${select_first(gvcfFileIndexes)} ${outputPath}.tbi
+            ln -sf ~{select_first(gvcfFiles)} ~{outputPath}
+            ln -sf ~{select_first(gvcfFileIndexes)} ~{outputPath}.tbi
         fi
     }
 
@@ -156,13 +162,15 @@ task CombineGVCFs {
 
 # Combine multiple recalibration tables from scattered BaseRecalibrator runs
 task GatherBqsrReports {
-    String? preCommand
-    String? gatkJar
-    Array[File] inputBQSRreports
-    String outputReportPath
-
-    Float? memory
-    Float? memoryMultiplier
+    input {
+        String? preCommand
+        String? gatkJar
+        Array[File] inputBQSRreports
+        String outputReportPath
+
+        Float? memory
+        Float? memoryMultiplier
+    }
 
     Int mem = ceil(select_first([memory, 4.0]))
 
@@ -172,11 +180,11 @@ task GatherBqsrReports {
 
     command {
         set -e -o pipefail
-        ${preCommand}
-        ${toolCommand} \
+        ~{preCommand}
+        ~{toolCommand} \
         GatherBQSRReports \
-        -I ${sep=' -I ' inputBQSRreports} \
-        -O ${outputReportPath}
+        -I ~{sep=' -I ' inputBQSRreports} \
+        -O ~{outputReportPath}
     }
 
     output {
@@ -189,25 +197,27 @@ task GatherBqsrReports {
 }
 
 task GenotypeGVCFs {
-    String? preCommand
-    File gvcfFiles
-    File gvcfFileIndexes
-    Array[File]+ intervals
+    input {
+        String? preCommand
+        File gvcfFiles
+        File gvcfFileIndexes
+        Array[File]+ intervals
 
-    String outputPath
+        String outputPath
 
-    String? gatkJar
+        String? gatkJar
 
-    File refFasta
-    File refFastaIndex
-    File refDict
+        File refFasta
+        File refFastaIndex
+        File refDict
 
-    File? dbsnpVCF
-    File? dbsnpVCFindex
+        File? dbsnpVCF
+        File? dbsnpVCFindex
 
-    Int? compressionLevel
-    Float? memory
-    Float? memoryMultiplier
+        Int? compressionLevel
+        Float? memory
+        Float? memoryMultiplier
+    }
 
     Int mem = ceil(select_first([memory, 4.0]))
 
@@ -217,18 +227,18 @@ task GenotypeGVCFs {
 
     command {
         set -e -o pipefail
-        ${preCommand}
+        ~{preCommand}
 
-        ${toolCommand} \
+        ~{toolCommand} \
          GenotypeGVCFs \
-         -R ${refFasta} \
-         -O ${outputPath} \
-         ${"-D " + dbsnpVCF} \
+         -R ~{refFasta} \
+         -O ~{outputPath} \
+         ~{"-D " + dbsnpVCF} \
          -G StandardAnnotation \
          --only-output-calls-starting-in-intervals \
          -new-qual \
-         -V ${gvcfFiles} \
-         -L ${sep=' -L ' intervals}
+         -V ~{gvcfFiles} \
+         -L ~{sep=' -L ' intervals}
     }
 
     output {
@@ -243,23 +253,25 @@ task GenotypeGVCFs {
 
 # Call variants on a single sample with HaplotypeCaller to produce a GVCF
 task HaplotypeCallerGvcf {
-    String? preCommand
-    Array[File]+ inputBams
-    Array[File]+ inputBamsIndex
-    Array[File]+ intervalList
-    String gvcfPath
-    File refDict
-    File refFasta
-    File refFastaIndex
-    Float? contamination
-    Int? compressionLevel
-    String? gatkJar
-
-    File? dbsnpVCF
-    File? dbsnpVCFindex
-
-    Float? memory
-    Float? memoryMultiplier
+     input {
+        String? preCommand
+        Array[File]+ inputBams
+        Array[File]+ inputBamsIndex
+        Array[File]+ intervalList
+        String gvcfPath
+        File refDict
+        File refFasta
+        File refFastaIndex
+        Float? contamination
+        Int? compressionLevel
+        String? gatkJar
+
+        File? dbsnpVCF
+        File? dbsnpVCFindex
+
+        Float? memory
+        Float? memoryMultiplier
+    }
     Int mem = ceil(select_first([memory, 4.0]))
 
     String toolCommand = if defined(gatkJar)
@@ -268,15 +280,15 @@ task HaplotypeCallerGvcf {
 
     command {
         set -e -o pipefail
-        ${preCommand}
-        ${toolCommand} \
+        ~{preCommand}
+        ~{toolCommand} \
           HaplotypeCaller \
-          -R ${refFasta} \
-          -O ${gvcfPath} \
-          -I ${sep=" -I " inputBams} \
-          -L ${sep=' -L ' intervalList} \
-          ${"-D " + dbsnpVCF} \
-          -contamination ${default=0 contamination} \
+          -R ~{refFasta} \
+          -O ~{gvcfPath} \
+          -I ~{sep=" -I " inputBams} \
+          -L ~{sep=' -L ' intervalList} \
+          ~{"-D " + dbsnpVCF} \
+          -contamination ~{default=0 contamination} \
           -ERC GVCF
     }
 
@@ -291,19 +303,21 @@ task HaplotypeCallerGvcf {
 }
 
 task SplitNCigarReads {
-    String? preCommand
-
-    File inputBam
-    File inputBamIndex
-    File refFasta
-    File refFastaIndex
-    File refDict
-    String outputBam
-    String? gatkJar
-    Array[File]+ intervals
-
-    Float? memory
-    Float? memoryMultiplier
+    input {
+        String? preCommand
+
+        File inputBam
+        File inputBamIndex
+        File refFasta
+        File refFastaIndex
+        File refDict
+        String outputBam
+        String? gatkJar
+        Array[File]+ intervals
+
+        Float? memory
+        Float? memoryMultiplier
+    }
     Int mem = ceil(select_first([memory, 4.0]))
 
     String toolCommand = if defined(gatkJar)
@@ -312,13 +326,13 @@ task SplitNCigarReads {
 
     command {
         set -e -o pipefail
-        ${preCommand}
-        ${toolCommand} \
+        ~{preCommand}
+        ~{toolCommand} \
         SplitNCigarReads \
-        -I ${inputBam} \
-        -R ${refFasta} \
-        -O ${outputBam} \
-        -L ${sep=' -L ' intervals}
+        -I ~{inputBam} \
+        -R ~{refFasta} \
+        -O ~{outputBam} \
+        -L ~{sep=' -L ' intervals}
     }
 
     output {
-- 
GitLab