From 1d463d8278d6dac62be613141227ae24c428800b Mon Sep 17 00:00:00 2001
From: Wai Yi Leung <w.y.leung@lumc.nl>
Date: Wed, 9 Mar 2016 15:23:06 +0100
Subject: [PATCH] Change executable from pysvtools for SV merging. Add all
 project VCF file merge into 1 global merge file

---
 .../sasc/biopet/extensions/Pysvtools.scala    |  2 +-
 .../nl/lumc/sasc/biopet/tools/SeqStat.scala   | 10 +++++-----
 .../lumc/sasc/biopet/utils/BamUtilsTest.scala |  4 ++--
 .../pipelines/shiva/ShivaSvCalling.scala      | 19 ++++++++++++++++---
 4 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala
index 59f8f1ef4..ebc09a328 100644
--- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala
+++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Pysvtools.scala
@@ -28,7 +28,7 @@ class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {
   var bedoutput: File = _
   var regionsoutput: File = _
 
-  executable = config("exe", default = "mergevcf")
+  executable = config("exe", default = "vcf_merge_sv_events")
 
   def versionRegex = """PySVtools (.*)""".r
   def versionCommand = executable + " --version"
diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala
index 69b8e7a8a..fcfcf9e25 100644
--- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala
+++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/SeqStat.scala
@@ -89,10 +89,10 @@ object SeqStat extends ToolCommand {
 
     (qual_low_boundery < 59, qual_high_boundery > 74) match {
       case (false, true) => phredEncoding = Solexa
-        // TODO: check this later on
-        // complex case, we cannot tell wheter this is a sanger or solexa
-        // but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
-        // New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
+      // TODO: check this later on
+      // complex case, we cannot tell wheter this is a sanger or solexa
+      // but since the qual_high_boundery exceeds any Sanger/Illumina1.8 quals, we can `assume` this is solexa
+      // New @ 2016/01/26: Illumina X ten samples can contain Phred=Q42 (qual_high_boundery==75/K)
       case (true, true)  => phredEncoding = Solexa
       // this is definite a sanger sequence, the lower end is sanger only
       case (true, false) => phredEncoding = Sanger
@@ -181,7 +181,7 @@ object SeqStat extends ToolCommand {
         quals ++= mutable.ArrayBuffer.fill(baseStats(pos).qual.length - quals.length)(0)
       }
       if (nucs.length <= baseStats(pos).nucs.length) {
-        nucs ++= mutable.ArrayBuffer.fill( baseStats(pos).nucs.length - nucs.length )(0)
+        nucs ++= mutable.ArrayBuffer.fill(baseStats(pos).nucs.length - nucs.length)(0)
       }
       // count into the quals
       baseStats(pos).qual.zipWithIndex foreach { case (value, index) => quals(index) += value }
diff --git a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala b/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala
index 355d0d8df..75021402f 100644
--- a/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala
+++ b/public/biopet-utils/src/test/scala/nl/lumc/sasc/biopet/utils/BamUtilsTest.scala
@@ -3,11 +3,11 @@ package nl.lumc.sasc.biopet.utils
 import java.io.File
 
 import htsjdk.samtools._
-import org.mockito.Mockito.{inOrder => inOrd}
+import org.mockito.Mockito.{ inOrder => inOrd }
 import org.scalatest.Matchers
 import org.scalatest.mock.MockitoSugar
 import org.scalatest.testng.TestNGSuite
-import org.testng.annotations.{BeforeClass, Test}
+import org.testng.annotations.{ BeforeClass, Test }
 
 /**
  * Created by wyleung on 22-2-16.
diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala
index 6c7652170..4407a9091 100644
--- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala
+++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala
@@ -33,6 +33,9 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
 
   def this() = this(null)
 
+  var outputMergedVCFbySample: Map[String, File] = Map()
+  var outputMergedVCF: File = _
+
   @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
   protected[shiva] var inputBamsArg: List[File] = Nil
 
@@ -41,6 +44,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
   /** Executed before script */
   def init(): Unit = {
     if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
+    outputMergedVCF = new File(outputDir, "allsamples.merged.vcf")
   }
 
   /** Variantcallers requested by the config */
@@ -56,7 +60,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
     val callers = callersList.filter(x => configCallers.contains(x.name))
 
     require(inputBams.nonEmpty, "No input bams found")
-    require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))
+    require(callers.nonEmpty, "Please select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))
 
     callers.foreach { caller =>
       caller.inputBams = inputBams
@@ -74,9 +78,15 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
       mergeSVcalls.input = sampleVCFS.flatten
       mergeSVcalls.output = new File(outputDir, sample + ".merged.vcf")
       add(mergeSVcalls)
-      //      outputFiles += (sample -> mergeSVcalls.output)
+      outputMergedVCFbySample += (sample -> mergeSVcalls.output)
     }
 
+    // merge all files from all samples in project
+    val mergeSVcallsProject = new Pysvtools(this)
+    mergeSVcallsProject.input = outputMergedVCFbySample.values.toList
+    mergeSVcallsProject.output = outputMergedVCF
+    add(mergeSVcallsProject)
+
     // merging the VCF calls by project
     // basicly this will do all samples from this pipeline run
     // group by "tags"
@@ -98,7 +108,10 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
   def summaryFiles: Map[String, File] = {
     val callers: Set[String] = configCallers
     //callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
-    Map()
+    Map(
+      "final_mergedvcf" -> outputMergedVCF
+
+    )
   }
 }
 
-- 
GitLab