From 90fe82b42ef86f59c5e9db83626c7e9e442cb12f Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 19 Aug 2015 17:10:17 +0200
Subject: [PATCH] Combining duplicates records

---
 .../lumc/sasc/biopet/tools/RegionAfCount.scala | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
index 51cfd8e38..96446e4d1 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
@@ -27,6 +27,7 @@ import htsjdk.variant.variantcontext.writer.{AsyncVariantContextWriter, VariantC
 import htsjdk.variant.variantcontext.{VariantContext, VariantContextBuilder}
 import htsjdk.variant.vcf.{VCFFileReader, VCFHeaderLineCount, VCFHeaderLineType, VCFInfoHeaderLine}
 import nl.lumc.sasc.biopet.core.ToolCommand
+import nl.lumc.sasc.biopet.extensions.rscript.ScatterPlot
 
 import scala.collection.JavaConversions._
 import scala.collection.JavaConverters._
@@ -67,7 +68,7 @@ object RegionAfCount extends ToolCommand {
 
     var c = 0
 
-    val afCounts = (for (region <- regions.par) yield region.getName -> {
+    val afCountsRaw = for (region <- regions.par) yield region.getName -> {
       val sum = (for (vcfFile <- cmdArgs.vcfFiles.par) yield vcfFile -> {
         val reader = new VCFFileReader(vcfFile, true)
         val it = reader.query(region.getContig, region.getStart, region.getEnd)
@@ -86,9 +87,22 @@ object RegionAfCount extends ToolCommand {
       if (c % 100 == 0) logger.info(s"$c regions done")
 
       sum
-    }).toMap
+    }
 
     logger.info(s"Done reading, $c regions")
+    logger.info("Combining duplicates bed records")
+
+    val afCounts: Map[String, Map[File, Double]] = {
+      val combinedAfCounts: mutable.Map[String, mutable.Map[File, Double]] = mutable.Map()
+      for (x <- afCountsRaw) {
+        if (combinedAfCounts.contains(x._1)) {
+          x._2.foreach(y => combinedAfCounts(x._1)(y._1) += y._2)
+        } else combinedAfCounts += x._1 -> mutable.Map(x._2.toList:_*)
+      }
+      combinedAfCounts.map(x => x._1 -> x._2.toMap).toMap
+    }
+
+    logger.info("Writing output file")
 
     val writer = new PrintWriter(cmdArgs.outputFile)
     writer.println("\t" + cmdArgs.vcfFiles.map(_.getName).mkString("\t"))
-- 
GitLab