From 198fc86c328ea18e00d3b28632cf2870f66260ce Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 19 Aug 2015 15:37:11 +0200
Subject: [PATCH] Added logging

---
 .../sasc/biopet/tools/RegionAfCount.scala     | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
index 2052b7a56..f7c4324d2 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
@@ -55,6 +55,8 @@ object RegionAfCount extends ToolCommand {
     val argsParser = new OptParser
     val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
 
+    logger.info("Start")
+
     val regions = (for (line <- Source.fromFile(cmdArgs.bedFile).getLines()) yield {
       val values = line.split("\t")
       require(values.length >= 3, "to less columns in bed file")
@@ -63,8 +65,10 @@ object RegionAfCount extends ToolCommand {
       new Interval(values(0), values(1).toInt, values(2).toInt, true, name)
     }).toList
 
-    val counts = (for (region <- regions) yield region.getName -> {
-      (for (vcfFile <- cmdArgs.vcfFiles) yield vcfFile -> {
+    var c = 0
+
+    val afCounts = (for (region <- regions.par) yield region.getName -> {
+      val sum = (for (vcfFile <- cmdArgs.vcfFiles.par) yield vcfFile -> {
         val reader = new VCFFileReader(vcfFile, true)
         val it = reader.query(region.getContig, region.getStart, region.getEnd)
         val sum = (for (v <- it) yield {
@@ -77,14 +81,23 @@ object RegionAfCount extends ToolCommand {
         reader.close()
         sum
       }).toMap
+
+      c += 1
+      if (c % 100 == 0) logger.info(s"$c regions done")
+
+      sum
     }).toMap
 
+    logger.info(s"Done reading, $c regions")
+
     val writer = new PrintWriter(cmdArgs.outputFile)
     writer.println("\t" + cmdArgs.vcfFiles.map(_.getName).mkString("\t"))
-    for (c <- counts) {
-      writer.print(c._1 + "\t")
-      writer.println(cmdArgs.vcfFiles.map(c._2(_)).mkString("\t"))
+    for (r <- regions) {
+      writer.print(r.getName + "\t")
+      writer.println(cmdArgs.vcfFiles.map(afCounts(r.getName)(_)).mkString("\t"))
     }
     writer.close()
+
+    logger.info("Done")
   }
 }
\ No newline at end of file
-- 
GitLab