From 3b17be5a034af21f2501aad0bce9df37fd96dccb Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Sat, 22 Aug 2015 15:22:00 +0200 Subject: [PATCH] Fix SquishBed --- .../nl/lumc/sasc/biopet/tools/SquishBed.scala | 13 ++++++-- .../utils/intervals/BedRecordList.scala | 33 +++++++++++-------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala index 41ac62d31..73daabbc6 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SquishBed.scala @@ -4,13 +4,16 @@ import java.io.File import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.utils.intervals.BedRecordList +import nl.lumc.sasc.biopet.utils.intervals.BedRecord /** * Created by pjvanthof on 22/08/15. */ object SquishBed extends ToolCommand { - case class Args(input: File = null, output: File = null) extends AbstractArgs + case class Args(input: File = null, + output: File = null, + strandSensitive: Boolean = false) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "input") required () valueName "<file>" action { (x, c) => @@ -19,6 +22,9 @@ object SquishBed extends ToolCommand { opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) => c.copy(output = x) } + opt[Unit]('s', "strandSensitive") unbounded () valueName "<file>" action { (x, c) => + c.copy(strandSensitive = true) + } } /** @@ -30,7 +36,8 @@ object SquishBed extends ToolCommand { if (!cmdArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + cmdArgs.input) - val records = BedRecordList.fromFile(cmdArgs.input).squishBed() - + val records = BedRecordList.fromFile(cmdArgs.input).sort + val squishBed = records.squishBed(cmdArgs.strandSensitive).sort + squishBed.writeToFile(cmdArgs.output) } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala index 2b9f6350f..2f43f7ab5 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/intervals/BedRecordList.scala @@ -12,9 +12,11 @@ import scala.io.Source class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) { def allRecords = for (chr <- chrRecords; record <- chr._2) yield record - def sort = new BedRecordList(chrRecords.map(x => x._1 -> x._2.sortBy(_.start))) + def sort = new BedRecordList(chrRecords.map(x => x._1 -> x._2.sortWith((a, b) => a.start < b.start))) - def overlapWith(record: BedRecord) = chrRecords + lazy val isSorted = this == this.sort + + def overlapWith(record: BedRecord) = (if (isSorted) this else sort).chrRecords .getOrElse(record.chr, Nil) .dropWhile(_.end < record.start) .takeWhile(_.start <= record.end) @@ -22,18 +24,23 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) { def squishBed(strandSensitive: Boolean = true) = BedRecordList.fromList { (for ((chr, records) <- chrRecords; record <- records) yield { val overlaps = overlapWith(record) - .filterNot(strandSensitive && _.strand == record.strand) + .filterNot(strandSensitive && _.strand != record.strand) .filterNot(_.name == record.name) - overlaps.foldLeft(List(record))((result, overlap) => { - (for (r <- result) yield { - (overlap.start < r.start, overlap.end > r.end) match { - case (true, true) => Nil - case (true, false) => List(r.copy(start = overlap.end + 1)) - case (false, true) => List(r.copy(end = overlap.start - 1)) - case (false, false) => List(r.copy(end = overlap.start - 1), r.copy(start = overlap.end + 1)) - } - }).flatten - }) + if (overlaps.isEmpty) { + List(record) + } else { + overlaps + .foldLeft(List(record))((result, overlap) => { + (for (r <- result) yield { + (overlap.start < r.start, overlap.end > r.end) match { + case (true, true) => Nil + case (true, false) => List(r.copy(start = overlap.end + 1)) + case (false, true) => List(r.copy(end = overlap.start - 1)) + case (false, false) => List(r.copy(end = overlap.start - 1), r.copy(start = overlap.end + 1)) + } + }).flatten + }) + } }).flatten } -- GitLab