From 3483a1be6cab2482d328ee7ebd45a932db514dc7 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Sun, 23 Aug 2015 10:48:38 +0200
Subject: [PATCH] cached sorting

---
 .../nl/lumc/sasc/biopet/tools/SquishBed.scala |  2 +-
 .../utils/intervals/BedRecordList.scala       | 25 ++++++++++---------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala
index 76bca165a..971be2f46 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala
@@ -37,7 +37,7 @@ object SquishBed extends ToolCommand {
 
     logger.info("Start")
 
-    val records = BedRecordList.fromFile(cmdArgs.input).sort
+    val records = BedRecordList.fromFile(cmdArgs.input)
     val squishBed = records.squishBed(cmdArgs.strandSensitive).sort
     squishBed.writeToFile(cmdArgs.output)
 
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
index a0deb868b..20ee5cd81 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
@@ -13,21 +13,20 @@ import nl.lumc.sasc.biopet.core.Logging
 class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) {
   def allRecords = for (chr <- chrRecords; record <- chr._2) yield record
 
-  def sort = new BedRecordList(chrRecords.map(x => x._1 -> x._2.sortWith((a, b) => a.start < b.start)))
-
-  lazy val isSorted = {
-    val sorted = this.sort
-    sorted.chrRecords.forall(x => x._2 == chrRecords(x._1))
+  lazy val sort = {
+    val sorted = new BedRecordList(chrRecords.map(x => x._1 -> x._2.sortWith((a, b) => a.start < b.start)))
+    if (sorted.chrRecords.forall(x => x._2 == chrRecords(x._1))) this else sorted
   }
 
-  def overlapWith(record: BedRecord) = (if (isSorted) this else sort).chrRecords
+  lazy val isSorted = sort.hashCode() == this.hashCode() || sort.chrRecords.forall(x => x._2 == chrRecords(x._1))
+
+  def overlapWith(record: BedRecord) = sort.chrRecords
     .getOrElse(record.chr, Nil)
     .dropWhile(_.end < record.start)
     .takeWhile(_.start <= record.end)
 
   def squishBed(strandSensitive: Boolean = true) = BedRecordList.fromList {
-    if (!isSorted) Logging.logger.warn("Running squish bed method on a unsorted bed file may not work correctly")
-    (for ((chr, records) <- chrRecords; record <- records) yield {
+    (for ((chr, records) <- sort.chrRecords; record <- records) yield {
       val overlaps = overlapWith(record)
         .filterNot(strandSensitive && _.strand != record.strand)
         .filterNot(_.name == record.name)
@@ -60,10 +59,12 @@ object BedRecordList {
   def fromList(records: Traversable[BedRecord]): BedRecordList = fromList(records.toIterator)
 
   def fromList(records: TraversableOnce[BedRecord]): BedRecordList = {
-    val map = mutable.Map[String, List[BedRecord]]()
-    for (record <- records)
-      map += record.chr -> (record :: map.getOrElse(record.chr, List()))
-    new BedRecordList(map.toMap)
+    val map = mutable.Map[String, ListBuffer[BedRecord]]()
+    for (record <- records) {
+      if (!map.contains(record.chr)) map += record.chr -> ListBuffer()
+      map(record.chr) += record
+    }
+    new BedRecordList(map.toMap.map(m => m._1 -> m._2.toList))
   }
 
   def fromFile(bedFile: File) = {
-- 
GitLab