From 325811625e2466362d2a292c4c143f93c6180682 Mon Sep 17 00:00:00 2001
From: ffinfo <pjrvanthof@gmail.com>
Date: Wed, 22 Aug 2018 13:33:14 +0200
Subject: [PATCH] Switch to structs

---
 gatk.wdl | 141 +++++++++++++++++++++++++------------------------------
 1 file changed, 64 insertions(+), 77 deletions(-)

diff --git a/gatk.wdl b/gatk.wdl
index 1f76099..265d284 100644
--- a/gatk.wdl
+++ b/gatk.wdl
@@ -1,18 +1,17 @@
 version 1.0
 
+import "common.wdl"
+
 # Apply Base Quality Score Recalibration (BQSR) model
 task ApplyBQSR {
     input {
         String? preCommand
         File? gatkJar
-        File inputBam
-        File inputBamIndex
+        IndexedBamFile inputBam
         String outputBamPath
         File recalibrationReport
         Array[File]+ sequenceGroupInterval
-        File refDict
-        File refFasta
-        File refFastaIndex
+        Reference reference
         Int? compressionLevel
 
         Int memory = 4
@@ -30,8 +29,8 @@ task ApplyBQSR {
          ApplyBQSR \
          --create-output-bam-md5 \
          --add-output-sam-program-record \
-         -R ~{refFasta} \
-         -I ~{inputBam} \
+         -R ~{reference.fasta} \
+         -I ~{inputBam.file} \
          --use-original-qualities \
          -O ~{outputBamPath} \
          -bqsr ~{recalibrationReport} \
@@ -42,7 +41,11 @@ task ApplyBQSR {
     }
 
     output {
-        File recalibrated_bam = outputBamPath
+        IndexedBamFile recalibrated_bam = {
+            "file": outputBamPath,
+            "index": sub(outputBamPath, "\.bam$", ".bai")
+        }
+
         File recalibrated_bam_checksum = outputBamPath + ".md5"
     }
 
@@ -56,28 +59,19 @@ task BaseRecalibrator {
     input {
         String? preCommand
         File? gatkJar
-        File inputBam
-        File inputBamIndex
+        IndexedBamFile inputBam
         String recalibrationReportPath
         Array[File]+ sequenceGroupInterval
-        Array[File]? knownIndelsSitesVCFs
-        Array[File]? knownIndelsSitesIndices
-        File? dbsnpVCF
-        File? dbsnpVCFindex
-        File refDict
-        File refFasta
-        File refFastaIndex
+        Array[IndexedVcfFile]? knownIndelsSitesVCFs
+        IndexedVcfFile? dbsnpVCF
+        Reference reference
         Int memory = 4
         Float memoryMultiplier = 3.0
     }
 
     Array[File]+ knownIndelsSitesVCFsArg = flatten([
-        select_first([knownIndelsSitesVCFs, []]),
-        select_all([dbsnpVCF])
-    ])
-    Array[File]+ knownIndelsSitesIndicesArg = flatten([
-        select_first([knownIndelsSitesIndices, []]),
-        select_all([dbsnpVCFindex])
+        select_first([knownIndelsSitesVCFs.file, []]),
+        select_all([dbsnpVCF.file])
     ])
 
     String toolCommand = if defined(gatkJar)
@@ -89,8 +83,8 @@ task BaseRecalibrator {
         ~{preCommand}
         ~{toolCommand} \
         BaseRecalibrator \
-        -R ~{refFasta} \
-        -I ~{inputBam} \
+        -R ~{reference.fasta} \
+        -I ~{inputBam.file} \
         --use-original-qualities \
         -O ~{recalibrationReportPath} \
         --known-sites ~{sep=" --known-sites " knownIndelsSitesVCFsArg} \
@@ -109,17 +103,14 @@ task BaseRecalibrator {
 task CombineGVCFs {
     input {
         String? preCommand
-        Array[File]+ gvcfFiles
-        Array[File]+ gvcfFileIndexes
+        Array[IndexedVcfFile]+ gvcfFiles
         Array[File]+ intervals
 
         String outputPath
 
         String? gatkJar
 
-        File refFasta
-        File refFastaIndex
-        File refDict
+        Reference reference
 
         Int? compressionLevel #TODO This isn't being used?
         Int memory = 4
@@ -137,19 +128,21 @@ task CombineGVCFs {
         if [ ~{length(gvcfFiles)} -gt 1 ]; then
             ~{toolCommand} \
              CombineGVCFs \
-             -R ~{refFasta} \
+             -R ~{reference.fasta} \
              -O ~{outputPath} \
              -V ~{sep=' -V ' gvcfFiles} \
              -L ~{sep=' -L ' intervals}
         else # TODO this should be handeled in wdl
-            ln -sf ~{gvcfFiles[0]} ~{outputPath}
-            ln -sf ~{gvcfFileIndexes[0]} ~{outputPath}.tbi
+            ln -sf ~{gvcfFiles[0].file} ~{outputPath}
+            ln -sf ~{gvcfFiles[0].index} ~{outputPath}.tbi
         fi
     }
 
     output {
-        File outputGVCF = outputPath
-        File outputGVCFindex = outputPath + ".tbi"
+        IndexedVcfFile outputVCF = {
+            "file": outputPath,
+            "index": outputPath + ".tbi"
+        }
     }
 
     runtime {
@@ -194,20 +187,16 @@ task GatherBqsrReports {
 task GenotypeGVCFs {
     input {
         String? preCommand
-        File gvcfFiles
-        File gvcfFileIndexes
+        Array[IndexedVcfFile] gvcfFiles
         Array[File]+ intervals
 
         String outputPath
 
         String? gatkJar
 
-        File refFasta
-        File refFastaIndex
-        File refDict
+        Reference reference
 
-        File? dbsnpVCF
-        File? dbsnpVCFindex
+        IndexedVcfFile? dbsnpVCF
 
         Int? compressionLevel
         Int memory = 4
@@ -223,19 +212,21 @@ task GenotypeGVCFs {
         ~{preCommand}
         ~{toolCommand} \
         GenotypeGVCFs \
-        -R ~{refFasta} \
+        -R ~{reference.fasta} \
         -O ~{outputPath} \
-        ~{"-D " + dbsnpVCF} \
+        ~{"-D " + dbsnpVCF.file} \
         -G StandardAnnotation \
         --only-output-calls-starting-in-intervals \
         -new-qual \
-        -V ~{gvcfFiles} \
+        -V ~{sep=' -V ' gvcfFiles.file} \
         -L ~{sep=' -L ' intervals}
     }
 
     output {
-        File outputVCF = outputPath
-        File outputVCFindex = outputPath + ".tbi"
+        IndexedVcfFile outputVCF = {
+            "file": outputPath,
+            "index": outputPath + ".tbi"
+        }
     }
 
     runtime{
@@ -247,19 +238,15 @@ task GenotypeGVCFs {
 task HaplotypeCallerGvcf {
     input {
         String? preCommand
-        Array[File]+ inputBams
-        Array[File]+ inputBamsIndex
+        Array[IndexedBamFile]+ inputBams
         Array[File]+ intervalList
         String gvcfPath
-        File refDict
-        File refFasta
-        File refFastaIndex
+        Reference reference
         Float contamination = 0.0
         Int? compressionLevel
         String? gatkJar
 
-        File? dbsnpVCF
-        File? dbsnpVCFindex
+        IndexedVcfFile? dbsnpVCF
 
         Int memory = 4
         Float memoryMultiplier = 3
@@ -274,18 +261,20 @@ task HaplotypeCallerGvcf {
         ~{preCommand}
         ~{toolCommand} \
         HaplotypeCaller \
-        -R ~{refFasta} \
+        -R ~{reference.fasta} \
         -O ~{gvcfPath} \
-        -I ~{sep=" -I " inputBams} \
+        -I ~{sep=" -I " inputBams.file} \
         -L ~{sep=' -L ' intervalList} \
-        ~{"-D " + dbsnpVCF} \
+        ~{"-D " + dbsnpVCF.file} \
         -contamination ~{contamination} \
         -ERC GVCF
     }
 
     output {
-        File outputGVCF = gvcfPath
-        File outputGVCFindex = gvcfPath + ".tbi"
+        IndexedVcfFile outputGVCF = {
+            "file": gvcfPath,
+            "index": gvcfPath + ".tbi"
+        }
     }
 
     runtime {
@@ -297,11 +286,8 @@ task MuTect2 {
     input {
         String? preCommand
 
-        Array[File]+ inputBams
-        Array[File]+ inputBamIndex
-        File refFasta
-        File refFastaIndex
-        File refDict
+        Array[IndexedBamFile]+ inputBams
+        Reference reference
         String outputVcf
         String tumorSample
         String? normalSample
@@ -321,8 +307,8 @@ task MuTect2 {
         ~{preCommand}
         ~{toolCommand} \
         Mutect2 \
-        -R ~{refFasta} \
-        -I ~{sep=" -I " inputBams} \
+        -R ~{reference.fasta} \
+        -I ~{sep=" -I " inputBams.file} \
         -tumor ~{tumorSample} \
         ~{"-normal " + normalSample} \
         -O ~{outputVcf} \
@@ -330,8 +316,10 @@ task MuTect2 {
     }
 
     output {
-        File vcfFile = outputVcf
-        File vcfIndex = outputVcf + ".tbi"
+        IndexedVcfFile vcfFile = {
+            "file": outputVcf,
+            "index": outputVcf + ".tbi"
+        }
     }
 
     runtime {
@@ -343,11 +331,8 @@ task SplitNCigarReads {
     input {
         String? preCommand
 
-        File inputBam
-        File inputBamIndex
-        File refFasta
-        File refFastaIndex
-        File refDict
+        IndexedBamFile inputBam
+        Reference reference
         String outputBam
         String? gatkJar
         Array[File]+ intervals
@@ -365,15 +350,17 @@ task SplitNCigarReads {
         ~{preCommand}
         ~{toolCommand} \
         SplitNCigarReads \
-        -I ~{inputBam} \
-        -R ~{refFasta} \
+        -I ~{inputBam.file} \
+        -R ~{reference.fasta} \
         -O ~{outputBam} \
         -L ~{sep=' -L ' intervals}
     }
 
     output {
-        File bam = outputBam
-        File bamIndex = sub(outputBam, "\.bam$", ".bai")
+        IndexedBamFile bam = {
+            "file": outputBam,
+            "index": sub(outputBam, "\.bam$", ".bai")
+        }
     }
 
     runtime {
-- 
GitLab