From 38d3970ed6fdb323e31fb19e68c7cea8ca35aa07 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Sun, 23 Aug 2015 11:03:35 +0200
Subject: [PATCH] Added lengths

---
 .../scala/nl/lumc/sasc/biopet/tools/SquishBed.scala   | 11 +++++++++++
 .../lumc/sasc/biopet/utils/intervals/BedRecord.scala  |  2 ++
 .../sasc/biopet/utils/intervals/BedRecordList.scala   |  2 ++
 3 files changed, 15 insertions(+)

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala
index 971be2f46..7a424f510 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala
@@ -38,7 +38,18 @@ object SquishBed extends ToolCommand {
     logger.info("Start")
 
     val records = BedRecordList.fromFile(cmdArgs.input)
+    val length = records.length
+    val refLength = BedRecordList.combineOverlap(records).length
+    logger.info(s"Total bases: $length")
+    logger.info(s"Total bases on reference: $refLength")
+    logger.info("Start squishing")
     val squishBed = records.squishBed(cmdArgs.strandSensitive).sort
+    logger.info("Done squishing")
+    val squishLength = squishBed.length
+    val squishRefLength = BedRecordList.combineOverlap(squishBed).length
+    logger.info(s"Total bases left: $squishLength")
+    logger.info(s"Total bases left on reference: $squishRefLength")
+    logger.info(s"Total bases removed from ref: ${refLength - squishRefLength}")
     squishBed.writeToFile(cmdArgs.output)
 
     logger.info("Done")
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
index a7e346fb6..543e41509 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala
@@ -24,6 +24,8 @@ case class BedRecord(chr: String,
     else _originals
   }
 
+  def length = end - start + 1
+
   lazy val exons = if (blockCount.isDefined && blockSizes.length > 0 && blockStarts.length > 0) {
     Some(BedRecordList.fromList(for (i <- 0 to blockCount.get) yield {
       val exonNumber = strand match {
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
index 20ee5cd81..ed2c873d3 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala
@@ -25,6 +25,8 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) {
     .dropWhile(_.end < record.start)
     .takeWhile(_.start <= record.end)
 
+  def length = allRecords.foldLeft(0L)((a,b) => a + b.length)
+
   def squishBed(strandSensitive: Boolean = true) = BedRecordList.fromList {
     (for ((chr, records) <- sort.chrRecords; record <- records) yield {
       val overlaps = overlapWith(record)
-- 
GitLab