From 68b01fdc371823840539bc51bd2ebf686409d8ba Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Sun, 23 Aug 2015 13:49:46 +0200
Subject: [PATCH] Added UCSC header skipping

---
 .../biopet/utils/intervals/BedRecord.scala    |  2 +-
 .../utils/intervals/BedRecordList.scala       | 37 ++++++++++++-------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
index c395d0bc5..9a798fbaf 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
@@ -81,7 +81,7 @@ case class BedRecord(chr: String,
 object BedRecord {
   def fromLine(line: String): BedRecord = {
     val values = line.split("\t")
-    require(values.length >= 3)
+    require(values.length >= 3, "Not enough columns count for a bed file")
     BedRecord(
       values(0),
       values(1).toInt,
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
index 783f32481..029356e5f 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
@@ -10,7 +10,7 @@ import nl.lumc.sasc.biopet.core.Logging
 /**
  * Created by pjvan_thof on 8/20/15.
  */
-class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) {
+class BedRecordList(val chrRecords: Map[String, List[BedRecord]], header: List[String] = Nil) {
   def allRecords = for (chr <- chrRecords; record <- chr._2) yield record
 
   lazy val sort = {
@@ -58,29 +58,38 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) {
 }
 
 object BedRecordList {
-  def fromList(records: Traversable[BedRecord]): BedRecordList = fromList(records.toIterator)
+  def fromListWithHeader(records: Traversable[BedRecord],
+               header: List[String]): BedRecordList = fromListWithHeader(records.toIterator, header)
 
-  def fromList(records: TraversableOnce[BedRecord]): BedRecordList = {
+  def fromListWithHeader(records: TraversableOnce[BedRecord], header: List[String]): BedRecordList = {
     val map = mutable.Map[String, ListBuffer[BedRecord]]()
     for (record <- records) {
       if (!map.contains(record.chr)) map += record.chr -> ListBuffer()
       map(record.chr) += record
     }
-    new BedRecordList(map.toMap.map(m => m._1 -> m._2.toList))
+    new BedRecordList(map.toMap.map(m => m._1 -> m._2.toList), header)
   }
 
+  def fromList(records: Traversable[BedRecord]): BedRecordList = fromListWithHeader(records.toIterator, Nil)
+
+  def fromList(records: TraversableOnce[BedRecord]): BedRecordList = fromListWithHeader(records, Nil)
+
   def fromFile(bedFile: File) = {
-    var lineCount = 0L
-    fromList(Source.fromFile(bedFile).getLines().map(line => {
-      lineCount += 1
-      try {
+    val reader = Source.fromFile(bedFile)
+    val all = reader.getLines().toList
+    val header = all.takeWhile(x => x.startsWith("browser") || x.startsWith("track"))
+    var lineCount = header.length
+    val content = all.drop(lineCount)
+    try {
+      fromListWithHeader(content.map(line => {
+        lineCount += 1
         BedRecord.fromLine(line).validate
-      } catch {
-        case e: Exception =>
-          Logging.logger.error(s"Parsing line number $lineCount failed on file: ${bedFile.getAbsolutePath}")
-          throw e
-      }
-    }))
+      }), header)
+    } catch {
+      case e: Exception =>
+        Logging.logger.error(s"Parsing line number $lineCount failed on file: ${bedFile.getAbsolutePath}")
+        throw e
+    }
   }
 
   def combineOverlap(list: BedRecordList): BedRecordList = {
-- 
GitLab