From c1e610afedcc9ff4749bf508df6ad5ccc9b0e396 Mon Sep 17 00:00:00 2001
From: Wai Yi Leung <w.y.leung@lumc.nl>
Date: Tue, 10 May 2016 15:59:25 +0200
Subject: [PATCH] Adding compression to all VCF outputs from the SV-callers

---
 .../shiva/svcallers/Breakdancer.scala         | 14 +++++---
 .../pipelines/shiva/svcallers/Clever.scala    |  4 +--
 .../pipelines/shiva/svcallers/Delly.scala     | 35 ++++++++++++++++---
 .../pipelines/shiva/svcallers/Pindel.scala    | 10 ++++--
 4 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala
index 2bfc49478..8a14292a1 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala
@@ -15,7 +15,8 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
 
-import nl.lumc.sasc.biopet.extensions.breakdancer.{ BreakdancerVCF, BreakdancerCaller, BreakdancerConfig }
+import nl.lumc.sasc.biopet.extensions.breakdancer.{BreakdancerCaller, BreakdancerConfig, BreakdancerVCF}
+import nl.lumc.sasc.biopet.extensions.picard.SortVcf
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
 /** Script for sv caler Breakdancer */
@@ -32,10 +33,15 @@ class Breakdancer(val root: Configurable) extends SvCaller {
       val bdcfg = BreakdancerConfig(this, bamFile, new File(breakdancerSampleDir, sample + ".breakdancer.cfg"))
       val breakdancer = BreakdancerCaller(this, bdcfg.output, new File(breakdancerSampleDir, sample + ".breakdancer.tsv"))
       val bdvcf = BreakdancerVCF(this, breakdancer.output, new File(breakdancerSampleDir, sample + ".breakdancer.vcf"),
-        sample = sample)
-      add(bdcfg, breakdancer, bdvcf)
+        sample = sample + "_bd")
 
-      addVCF(sample, bdvcf.output)
+      val compresssedVCF = new SortVcf(this)
+      compresssedVCF.input = bdvcf.output
+      compresssedVCF.output = new File(breakdancerSampleDir, s"${sample}.breakdancer.vcf.gz")
+
+      add(bdcfg, breakdancer, bdvcf, compresssedVCF)
+
+      addVCF(sample, compresssedVCF.output)
     }
   }
 }
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala
index 0baba5e63..606287478 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala
@@ -33,13 +33,13 @@ class Clever(val root: Configurable) extends SvCaller {
       val cleverVCF = new CleverFixVCF(this)
       cleverVCF.input = clever.outputvcf
       cleverVCF.output = new File(cleverDir, s".${sample}.clever.vcf")
-      cleverVCF.sampleName = sample
+      cleverVCF.sampleName = sample + "_cl"
       cleverVCF.isIntermediate = true
       add(cleverVCF)
 
       val sortvcf = new SortVcf(this)
       sortvcf.input = cleverVCF.output
-      sortvcf.output = new File(cleverDir, s"${sample}.clever.vcf")
+      sortvcf.output = new File(cleverDir, s"${sample}.clever.vcf.gz")
       add(sortvcf)
 
       addVCF(sample, sortvcf.output)
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala
index 91f8468b1..7589d9f86 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala
@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
 
 import nl.lumc.sasc.biopet.extensions.delly.DellyCaller
 import nl.lumc.sasc.biopet.extensions.gatk.CatVariants
+import nl.lumc.sasc.biopet.extensions.picard.SortVcf
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
 /** Script for sv caller delly */
@@ -41,7 +42,13 @@ class Delly(val root: Configurable) extends SvCaller {
         delly.analysistype = "DEL"
         delly.outputvcf = new File(dellyDir, sample + ".delly.del.vcf")
         add(delly)
-        catVariants.variant :+= delly.outputvcf
+
+        val compresssedVCF = new SortVcf(this)
+        compresssedVCF.input = delly.outputvcf
+        compresssedVCF.output = new File(dellyDir, s"${sample}.delly.del.vcf.gz")
+        add(compresssedVCF)
+
+        catVariants.variant :+= compresssedVCF.output
       }
       if (dup) {
         val delly = new DellyCaller(this)
@@ -49,7 +56,13 @@ class Delly(val root: Configurable) extends SvCaller {
         delly.analysistype = "DUP"
         delly.outputvcf = new File(dellyDir, sample + ".delly.dup.vcf")
         add(delly)
-        catVariants.variant :+= delly.outputvcf
+
+        val compresssedVCF = new SortVcf(this)
+        compresssedVCF.input = delly.outputvcf
+        compresssedVCF.output = new File(dellyDir, s"${sample}.delly.dup.vcf.gz")
+        add(compresssedVCF)
+
+        catVariants.variant :+= compresssedVCF.output
       }
       if (inv) {
         val delly = new DellyCaller(this)
@@ -57,18 +70,30 @@ class Delly(val root: Configurable) extends SvCaller {
         delly.analysistype = "INV"
         delly.outputvcf = new File(dellyDir, sample + ".delly.inv.vcf")
         add(delly)
-        catVariants.variant :+= delly.outputvcf
+
+        val compresssedVCF = new SortVcf(this)
+        compresssedVCF.input = delly.outputvcf
+        compresssedVCF.output = new File(dellyDir, s"${sample}.delly.inv.vcf.gz")
+        add(compresssedVCF)
+
+        catVariants.variant :+= compresssedVCF.output
       }
       if (tra) {
         val delly = new DellyCaller(this)
         delly.input = bamFile
         delly.analysistype = "TRA"
         delly.outputvcf = new File(dellyDir, sample + ".delly.tra.vcf")
-        catVariants.variant :+= delly.outputvcf
         add(delly)
+
+        val compresssedVCF = new SortVcf(this)
+        compresssedVCF.input = delly.outputvcf
+        compresssedVCF.output = new File(dellyDir, s"${sample}.delly.tra.vcf.gz")
+        add(compresssedVCF)
+
+        catVariants.variant :+= compresssedVCF.output
       }
 
-      require(catVariants.variant.nonEmpty, "Must atleast 1 SV-type be selected for Delly")
+      require(catVariants.variant.nonEmpty, "At least 1 SV-type should be selected for Delly")
 
       add(catVariants)
       addVCF(sample, catVariants.outputFile)
diff --git a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala
index 25281ec11..ff6fb17d0 100644
--- a/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala
+++ b/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala
@@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva.svcallers
 import java.text.SimpleDateFormat
 import java.util.Calendar
 
+import nl.lumc.sasc.biopet.extensions.picard.SortVcf
 import nl.lumc.sasc.biopet.extensions.pindel._
 import nl.lumc.sasc.biopet.utils.config.Configurable
 
@@ -39,7 +40,7 @@ class Pindel(val root: Configurable) extends SvCaller {
       val configFile: File = new File(pindelDir, sample + ".pindel.cfg")
       val cfg = new PindelConfig(this)
       cfg.input = bamFile
-      cfg.sampleName = sample
+      cfg.sampleName = sample + "_pd" // pindel suffix
       cfg.output = configFile
       add(cfg)
 
@@ -58,7 +59,12 @@ class Pindel(val root: Configurable) extends SvCaller {
       pindelVcf.outputVCF = new File(pindelDir, s"${sample}.pindel.vcf")
       add(pindelVcf)
 
-      addVCF(sample, pindelVcf.outputVCF)
+      val compresssedVCF = new SortVcf(this)
+      compresssedVCF.input = pindelVcf.outputVCF
+      compresssedVCF.output = new File(pindelDir, s"${sample}.pindel.vcf.gz")
+      add(compresssedVCF)
+
+      addVCF(sample, compresssedVCF.output)
     }
 
   }
-- 
GitLab