From ffe6ea8cff029df52682280a703f20393e9a0e14 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Thu, 20 Aug 2015 17:53:43 +0200
Subject: [PATCH] Added a generic bed reader

---
 .../biopet/utils/intervals/BedRecord.scala    |  4 +-
 .../utils/intervals/BedRecordList.scala       | 57 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
index 441c0652d..eb3e2ac82 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
@@ -15,6 +15,8 @@ case class BedRecord(chr: String,
                      blockCount: Option[Int] = None,
                      blockSizes: Array[Int] = Array(),
                      blockStarts: Array[Int] = Array()) {
+
+  //TODO: Complete bed line output
   override def toString = {
     s"$chr\t$start\t$end"
   }
@@ -37,7 +39,7 @@ object BedRecord {
       },
       values.lift(6).map(_.toInt),
       values.lift(7)map(_.toInt),
-      values.lift(8).map(_.split(",", 3).map(_.toInt)).map(x => (x(0),x(1),x(2))),
+      values.lift(8).map(_.split(",", 3).map(_.toInt)).map(x => (x.lift(0).getOrElse(0),x.lift(1).getOrElse(0),x.lift(2).getOrElse(0))),
       values.lift(9).map(_.toInt),
       values.lift(10).map(_.split(",").map(_.toInt)).getOrElse(Array()),
       values.lift(11).map(_.split(",").map(_.toInt)).getOrElse(Array())
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
new file mode 100644
index 000000000..f734464a3
--- /dev/null
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
@@ -0,0 +1,57 @@
+package nl.lumc.sasc.biopet.utils.intervals
+
+import java.io.File
+
+import htsjdk.samtools.util.Interval
+
+import scala.annotation.tailrec
+import scala.collection.mutable
+import scala.collection.mutable.ListBuffer
+import scala.io.Source
+
+/**
+ * Created by pjvan_thof on 8/20/15.
+ */
+class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) {
+  def allRecords = for (chr <- chrRecords; record <- chr._2) yield record
+
+  def sort = new BedRecordList(chrRecords.map(x => x._1 -> x._2.sortBy(_.start)))
+
+  def overlapWith(record: BedRecord) = chrRecords
+      .getOrElse(record.chr, Nil)
+      .dropWhile(_.end < record.start)
+      .takeWhile(_.start <= record.end)
+}
+
+object BedRecordList {
+  def fromList(records: Traversable[BedRecord]): BedRecordList = fromList(records.toIterator)
+
+  def fromList(records: TraversableOnce[BedRecord]): BedRecordList = {
+    val map = mutable.Map[String, List[BedRecord]]()
+    for (record <- records)
+      map += record.chr -> (record :: map.getOrElse(record.chr, List()))
+    new BedRecordList(map.toMap)
+  }
+
+  def fromFile(bedFile: File) = {
+    fromList(Source.fromFile(bedFile).getLines().map(BedRecord.fromLine(_)))
+  }
+
+  def combineOverlap(list: BedRecordList): BedRecordList = {
+    new BedRecordList(for ((chr, records) <- list.sort.chrRecords) yield chr -> {
+      def combineOverlap(records: List[BedRecord],
+                                 newRecords: ListBuffer[BedRecord] = ListBuffer()): List[BedRecord] = {
+        if (records.nonEmpty) {
+          val chr = records.head.chr
+          val start = records.head.start
+          val overlapRecords = records.takeWhile(_.start <= records.head.end)
+          val end = overlapRecords.map(_.end).max
+
+          newRecords += BedRecord(chr, start, end)
+          combineOverlap(records.drop(overlapRecords.length), newRecords)
+        } else newRecords.toList
+      }
+      combineOverlap(records)
+    })
+  }
+}
\ No newline at end of file
-- 
GitLab