From d411daacc15b77e32c1c40a2c7873bb3431dc2d8 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 20 Jan 2016 14:32:08 +0100
Subject: [PATCH] Added general merging

---
 .../biopet/extensions/tools/MergeTables.scala | 19 +++++++++++++++++++
 .../gentrap/measures/BasesPerExon.scala       |  2 ++
 .../gentrap/measures/BasesPerGene.scala       |  2 ++
 .../gentrap/measures/CufflinksBlind.scala     |  2 ++
 .../gentrap/measures/CufflinksGuided.scala    |  2 ++
 .../gentrap/measures/CufflinksStrict.scala    |  2 ++
 .../gentrap/measures/FragmentsPerExon.scala   |  2 ++
 .../gentrap/measures/FragmentsPerGene.scala   |  2 ++
 .../gentrap/measures/Measurement.scala        | 15 +++++++++++++--
 9 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala
index f4e094658..fc9d6a053 100644
--- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala
+++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala
@@ -77,3 +77,22 @@ class MergeTables(val root: Configurable) extends ToolCommandFunction {
       required("-o", output) +
       required("", repeat(inputTables), escape = false)
 }
+
+object MergeTables {
+  def apply(root: Configurable,
+            tables: List[File],
+            outputFile: File,
+            idCols: List[Int],
+            valCol: Int,
+            numHeaderLines: Int = 0,
+            fallback: String = "-"): MergeTables = {
+        val job = new MergeTables(root)
+        job.inputTables = tables
+        job.output = outputFile
+        job.idColumnIndices = idCols.map(_.toString)
+        job.valueColumnIndex = valCol
+        job.fallbackString = Option(fallback)
+        job.numHeaderLines = Option(numHeaderLines)
+        job
+      }
+}
\ No newline at end of file
diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala
index 43f2a4806..d0feb6c8b 100644
--- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala
+++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala
@@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript
  */
 class BasesPerExon(val root: Configurable) extends QScript with Measurement {
   def bamToCountFile(id: String, bamFile: File): (String, File) = ???
+
+  def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
 }
diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala
index 599fb670c..d2650362f 100644
--- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala
+++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala
@@ -9,4 +9,6 @@ import org.broadinstitute.gatk.queue.QScript
 class BasesPerGene(val root: Configurable) extends QScript with Measurement {
   //TODO: splitting on strand if strandspecific
   def bamToCountFile(id: String, bamFile: File): (String, File) = ???
+
+  def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
 }
diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala
index f24e0ed80..705f1fd8c 100644
--- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala
+++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala
@@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript
  */
 class CufflinksBlind(val root: Configurable) extends QScript with Measurement {
   def bamToCountFile(id: String, bamFile: File): (String, File) = ???
+
+  def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0")
 }
diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala
index a8b3e845a..ea1731401 100644
--- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala
+++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala
@@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript
  */
 class CufflinksGuided(val root: Configurable) extends QScript with Measurement {
   def bamToCountFile(id: String, bamFile: File): (String, File) = ???
+
+  def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0")
 }
diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala
index 5fa3ce96f..ceae31abe 100644
--- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala
+++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala
@@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript
  */
 class CufflinksStrict(val root: Configurable) extends QScript with Measurement {
   def bamToCountFile(id: String, bamFile: File): (String, File) = ???
+
+  def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0")
 }
diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala
index 1d1fbb56e..6f1268fcc 100644
--- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala
+++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala
@@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript
  */
 class FragmentsPerExon(val root: Configurable) extends QScript with Measurement {
   def bamToCountFile(id: String, bamFile: File): (String, File) = ???
+
+  def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
 }
diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala
index 904fcde16..26bf4dc14 100644
--- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala
+++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala
@@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript
  */
 class FragmentsPerGene(val root: Configurable) extends QScript with Measurement {
   def bamToCountFile(id: String, bamFile: File): (String, File) = ???
+
+  def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0")
 }
diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala
index 37249c593..351905676 100644
--- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala
+++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala
@@ -2,6 +2,7 @@ package nl.lumc.sasc.biopet.pipelines.gentrap.measures
 
 import nl.lumc.sasc.biopet.core.Reference
 import nl.lumc.sasc.biopet.core.summary.SummaryQScript
+import nl.lumc.sasc.biopet.extensions.tools.MergeTables
 import org.broadinstitute.gatk.queue.QScript
 
 /**
@@ -19,6 +20,15 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
 
   lazy val countFiles: Map[String, File] = bamFiles.map { case (id, bamFile) => bamToCountFile(id, bamFile) }
 
+  def mergedCountFile = new File(outputDir, s"$name.merged.tsv")
+
+  case class MergeArgs(idCols: List[Int],
+                       valCol: Int,
+                       numHeaderLines: Int = 0,
+                       fallback: String = "-")
+
+  def mergeArgs: MergeArgs
+
   /** Init for pipeline */
   def init(): Unit = {
     require(bamFiles.nonEmpty)
@@ -26,7 +36,8 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
 
   /** Pipeline itself */
   def biopetScript(): Unit = {
-    //TODO: Merging
+    add(MergeTables(this, countFiles.values.toList, mergedCountFile,
+      mergeArgs.idCols, mergeArgs.valCol, mergeArgs.numHeaderLines, mergeArgs.fallback))
 
     //TODO: Heatmap
   }
@@ -40,5 +51,5 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
   def summaryFiles: Map[String, File] = Map()
 
   /** Name of summary output file */
-  def summaryFile: File = new File(s"$name.summary.json")
+  def summaryFile: File = new File(outputDir, s"$name.summary.json")
 }
-- 
GitLab