From 38d3970ed6fdb323e31fb19e68c7cea8ca35aa07 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Sun, 23 Aug 2015 11:03:35 +0200 Subject: [PATCH] Added lengths --- .../scala/nl/lumc/sasc/biopet/tools/SquishBed.scala | 11 +++++++++++ .../lumc/sasc/biopet/utils/intervals/BedRecord.scala | 2 ++ .../sasc/biopet/utils/intervals/BedRecordList.scala | 2 ++ 3 files changed, 15 insertions(+) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala index 971be2f46..7a424f510 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala @@ -38,7 +38,18 @@ object SquishBed extends ToolCommand { logger.info("Start") val records = BedRecordList.fromFile(cmdArgs.input) + val length = records.length + val refLength = BedRecordList.combineOverlap(records).length + logger.info(s"Total bases: $length") + logger.info(s"Total bases on reference: $refLength") + logger.info("Start squishing") val squishBed = records.squishBed(cmdArgs.strandSensitive).sort + logger.info("Done squishing") + val squishLength = squishBed.length + val squishRefLength = BedRecordList.combineOverlap(squishBed).length + logger.info(s"Total bases left: $squishLength") + logger.info(s"Total bases left on reference: $squishRefLength") + logger.info(s"Total bases removed from ref: ${refLength - squishRefLength}") squishBed.writeToFile(cmdArgs.output) logger.info("Done") diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala index a7e346fb6..543e41509 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecord.scala @@ -24,6 +24,8 @@ case class BedRecord(chr: String, else _originals } + def length = end - start + 1 + lazy val exons = if (blockCount.isDefined && blockSizes.length > 0 && blockStarts.length > 0) { Some(BedRecordList.fromList(for (i <- 0 to blockCount.get) yield { val exonNumber = strand match { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala index 20ee5cd81..ed2c873d3 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala @@ -25,6 +25,8 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) { .dropWhile(_.end < record.start) .takeWhile(_.start <= record.end) + def length = allRecords.foldLeft(0L)((a,b) => a + b.length) + def squishBed(strandSensitive: Boolean = true) = BedRecordList.fromList { (for ((chr, records) <- sort.chrRecords; record <- records) yield { val overlaps = overlapWith(record) -- GitLab