From 5f0a38583d45c652f44e90dc0d88e3cc1fe0bc0c Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 19 Aug 2015 19:00:18 +0200
Subject: [PATCH] Added option to only use the exons from a bed file

---
 .../sasc/biopet/tools/RegionAfCount.scala     | 40 +++++++++++++------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
index b2e13bcf5..1cbaee619 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/RegionAfCount.scala
@@ -39,7 +39,8 @@ object RegionAfCount extends ToolCommand {
   case class Args(bedFile: File = null,
                   outputFile: File = null,
                   scatterpPlot: Option[File] = None,
-                  vcfFiles: List[File] = Nil) extends AbstractArgs
+                  vcfFiles: List[File] = Nil,
+                  exonsOnly: Boolean = false) extends AbstractArgs
 
   class OptParser extends AbstractOptParser {
     opt[File]('b', "bedFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
@@ -54,6 +55,9 @@ object RegionAfCount extends ToolCommand {
     opt[File]('V', "vcfFile") unbounded () minOccurs 1 action { (x, c) =>
       c.copy(vcfFiles = c.vcfFiles ::: x :: Nil )
     }
+    opt[Unit]("exonsOnly") unbounded () action { (x, c) =>
+      c.copy(exonsOnly = true )
+    }
   }
 
   def main(args: Array[String]): Unit = {
@@ -68,25 +72,37 @@ object RegionAfCount extends ToolCommand {
       require(values.length >= 3, "to less columns in bed file")
       val name = if (values.length >= 4) values(3)
       else values(0) + ":" + values(1) + "-" + values(2)
-      new Interval(values(0), values(1).toInt, values(2).toInt, true, name)
+      val chr = values(0)
+      val start = values(1).toInt
+      val end = values(2).toInt
+
+      if (cmdArgs.exonsOnly) {
+        // Only reads the exons
+        require(values.length >= 12, "No exon information in bed file")
+        val blockSizes = values(10).split(",").map(_.toInt)
+        val blockStarts = values(11).split(",").map(_.toInt)
+        blockSizes.zip(blockStarts).map(x => new Interval(chr, start + x._2, start + x._2 + x._1, true, name)).toList
+      } else List(new Interval(chr, start, end, true, name))
     }).toList
 
     logger.info("Reading vcf files")
 
     var c = 0
-    val afCountsRaw = for (region <- regions.par) yield region.getName -> {
+    val afCountsRaw = for (region <- regions.par) yield region.head.getName -> {
       val sum = (for (vcfFile <- cmdArgs.vcfFiles.par) yield vcfFile -> {
         val reader = new VCFFileReader(vcfFile, true)
-        val it = reader.query(region.getContig, region.getStart, region.getEnd)
-        val sum = (for (v <- it) yield {
-          val bla = v.getAttribute("AF", 0) match {
-            case a:util.ArrayList[_] => a.map(_.toString.toDouble).toArray
-            case s => Array(s.toString.toDouble)
-          }
-          bla.sum
-        }).sum
+        val sums = for (r <- region) yield {
+          val it = reader.query(r.getContig, r.getStart, r.getEnd)
+          (for (v <- it) yield {
+            val bla = v.getAttribute("AF", 0) match {
+              case a: util.ArrayList[_] => a.map(_.toString.toDouble).toArray
+              case s => Array(s.toString.toDouble)
+            }
+            bla.sum
+          }).sum
+        }
         reader.close()
-        sum
+        sums.sum
       }).toMap
 
       c += 1
-- 
GitLab