From 3483a1be6cab2482d328ee7ebd45a932db514dc7 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Sun, 23 Aug 2015 10:48:38 +0200 Subject: [PATCH] cached sorting --- .../nl/lumc/sasc/biopet/tools/SquishBed.scala | 2 +- .../utils/intervals/BedRecordList.scala | 25 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala index 76bca165a..971be2f46 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala @@ -37,7 +37,7 @@ object SquishBed extends ToolCommand { logger.info("Start") - val records = BedRecordList.fromFile(cmdArgs.input).sort + val records = BedRecordList.fromFile(cmdArgs.input) val squishBed = records.squishBed(cmdArgs.strandSensitive).sort squishBed.writeToFile(cmdArgs.output) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala index a0deb868b..20ee5cd81 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala @@ -13,21 +13,20 @@ import nl.lumc.sasc.biopet.core.Logging class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) { def allRecords = for (chr <- chrRecords; record <- chr._2) yield record - def sort = new BedRecordList(chrRecords.map(x => x._1 -> x._2.sortWith((a, b) => a.start < b.start))) - - lazy val isSorted = { - val sorted = this.sort - sorted.chrRecords.forall(x => x._2 == chrRecords(x._1)) + lazy val sort = { + val sorted = new BedRecordList(chrRecords.map(x => x._1 -> x._2.sortWith((a, b) => a.start < b.start))) + if (sorted.chrRecords.forall(x => x._2 == chrRecords(x._1))) this else sorted } - def overlapWith(record: BedRecord) = (if (isSorted) this else sort).chrRecords + lazy val isSorted = sort.hashCode() == this.hashCode() || sort.chrRecords.forall(x => x._2 == chrRecords(x._1)) + + def overlapWith(record: BedRecord) = sort.chrRecords .getOrElse(record.chr, Nil) .dropWhile(_.end < record.start) .takeWhile(_.start <= record.end) def squishBed(strandSensitive: Boolean = true) = BedRecordList.fromList { - if (!isSorted) Logging.logger.warn("Running squish bed method on a unsorted bed file may not work correctly") - (for ((chr, records) <- chrRecords; record <- records) yield { + (for ((chr, records) <- sort.chrRecords; record <- records) yield { val overlaps = overlapWith(record) .filterNot(strandSensitive && _.strand != record.strand) .filterNot(_.name == record.name) @@ -60,10 +59,12 @@ object BedRecordList { def fromList(records: Traversable[BedRecord]): BedRecordList = fromList(records.toIterator) def fromList(records: TraversableOnce[BedRecord]): BedRecordList = { - val map = mutable.Map[String, List[BedRecord]]() - for (record <- records) - map += record.chr -> (record :: map.getOrElse(record.chr, List())) - new BedRecordList(map.toMap) + val map = mutable.Map[String, ListBuffer[BedRecord]]() + for (record <- records) { + if (!map.contains(record.chr)) map += record.chr -> ListBuffer() + map(record.chr) += record + } + new BedRecordList(map.toMap.map(m => m._1 -> m._2.toList)) } def fromFile(bedFile: File) = { -- GitLab