From d411daacc15b77e32c1c40a2c7873bb3431dc2d8 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Wed, 20 Jan 2016 14:32:08 +0100 Subject: [PATCH] Added general merging --- .../biopet/extensions/tools/MergeTables.scala | 19 +++++++++++++++++++ .../gentrap/measures/BasesPerExon.scala | 2 ++ .../gentrap/measures/BasesPerGene.scala | 2 ++ .../gentrap/measures/CufflinksBlind.scala | 2 ++ .../gentrap/measures/CufflinksGuided.scala | 2 ++ .../gentrap/measures/CufflinksStrict.scala | 2 ++ .../gentrap/measures/FragmentsPerExon.scala | 2 ++ .../gentrap/measures/FragmentsPerGene.scala | 2 ++ .../gentrap/measures/Measurement.scala | 15 +++++++++++++-- 9 files changed, 46 insertions(+), 2 deletions(-) diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala index f4e094658..fc9d6a053 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MergeTables.scala @@ -77,3 +77,22 @@ class MergeTables(val root: Configurable) extends ToolCommandFunction { required("-o", output) + required("", repeat(inputTables), escape = false) } + +object MergeTables { + def apply(root: Configurable, + tables: List[File], + outputFile: File, + idCols: List[Int], + valCol: Int, + numHeaderLines: Int = 0, + fallback: String = "-"): MergeTables = { + val job = new MergeTables(root) + job.inputTables = tables + job.output = outputFile + job.idColumnIndices = idCols.map(_.toString) + job.valueColumnIndex = valCol + job.fallbackString = Option(fallback) + job.numHeaderLines = Option(numHeaderLines) + job + } +} \ No newline at end of file diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala index 43f2a4806..d0feb6c8b 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerExon.scala @@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript */ class BasesPerExon(val root: Configurable) extends QScript with Measurement { def bamToCountFile(id: String, bamFile: File): (String, File) = ??? + + def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala index 599fb670c..d2650362f 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/BasesPerGene.scala @@ -9,4 +9,6 @@ import org.broadinstitute.gatk.queue.QScript class BasesPerGene(val root: Configurable) extends QScript with Measurement { //TODO: splitting on strand if strandspecific def bamToCountFile(id: String, bamFile: File): (String, File) = ??? + + def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala index f24e0ed80..705f1fd8c 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksBlind.scala @@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript */ class CufflinksBlind(val root: Configurable) extends QScript with Measurement { def bamToCountFile(id: String, bamFile: File): (String, File) = ??? + + def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala index a8b3e845a..ea1731401 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksGuided.scala @@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript */ class CufflinksGuided(val root: Configurable) extends QScript with Measurement { def bamToCountFile(id: String, bamFile: File): (String, File) = ??? + + def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala index 5fa3ce96f..ceae31abe 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/CufflinksStrict.scala @@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript */ class CufflinksStrict(val root: Configurable) extends QScript with Measurement { def bamToCountFile(id: String, bamFile: File): (String, File) = ??? + + def mergeArgs = MergeArgs(List(1, 7), 10, numHeaderLines = 1, fallback = "0.0") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala index 1d1fbb56e..6f1268fcc 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerExon.scala @@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript */ class FragmentsPerExon(val root: Configurable) extends QScript with Measurement { def bamToCountFile(id: String, bamFile: File): (String, File) = ??? + + def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala index 904fcde16..26bf4dc14 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/FragmentsPerGene.scala @@ -8,4 +8,6 @@ import org.broadinstitute.gatk.queue.QScript */ class FragmentsPerGene(val root: Configurable) extends QScript with Measurement { def bamToCountFile(id: String, bamFile: File): (String, File) = ??? + + def mergeArgs = MergeArgs(List(1), 2, numHeaderLines = 1, fallback = "0") } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala index 37249c593..351905676 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/measures/Measurement.scala @@ -2,6 +2,7 @@ package nl.lumc.sasc.biopet.pipelines.gentrap.measures import nl.lumc.sasc.biopet.core.Reference import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.extensions.tools.MergeTables import org.broadinstitute.gatk.queue.QScript /** @@ -19,6 +20,15 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => lazy val countFiles: Map[String, File] = bamFiles.map { case (id, bamFile) => bamToCountFile(id, bamFile) } + def mergedCountFile = new File(outputDir, s"$name.merged.tsv") + + case class MergeArgs(idCols: List[Int], + valCol: Int, + numHeaderLines: Int = 0, + fallback: String = "-") + + def mergeArgs: MergeArgs + /** Init for pipeline */ def init(): Unit = { require(bamFiles.nonEmpty) @@ -26,7 +36,8 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => /** Pipeline itself */ def biopetScript(): Unit = { - //TODO: Merging + add(MergeTables(this, countFiles.values.toList, mergedCountFile, + mergeArgs.idCols, mergeArgs.valCol, mergeArgs.numHeaderLines, mergeArgs.fallback)) //TODO: Heatmap } @@ -40,5 +51,5 @@ trait Measurement extends SummaryQScript with Reference { qscript: QScript => def summaryFiles: Map[String, File] = Map() /** Name of summary output file */ - def summaryFile: File = new File(s"$name.summary.json") + def summaryFile: File = new File(outputDir, s"$name.summary.json") } -- GitLab