Commit dc8933b8 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Switch wipereads to biopet bed reader

parent 02b7c60e
......@@ -95,7 +95,7 @@ object AnnotateVcfWithBed extends ToolCommand {
}
logger.info("Reading bed file")
val bedRecords = BedRecordList.fromFile(cmdArgs.bedFile).sort
val bedRecords = BedRecordList.fromFile(cmdArgs.bedFile).sorted
logger.info("Starting output file")
......
......@@ -54,7 +54,7 @@ object RegionAfCount extends ToolCommand {
logger.info("Start")
logger.info("Reading bed file")
val bedRecords = BedRecordList.fromFile(cmdArgs.bedFile).sort
val bedRecords = BedRecordList.fromFile(cmdArgs.bedFile).sorted
logger.info(s"Combine ${bedRecords.allRecords.size} bed records")
......
......@@ -43,7 +43,7 @@ object SquishBed extends ToolCommand {
logger.info(s"Total bases: $length")
logger.info(s"Total bases on reference: $refLength")
logger.info("Start squishing")
val squishBed = records.squishBed(cmdArgs.strandSensitive).sort
val squishBed = records.squishBed(cmdArgs.strandSensitive).sorted
logger.info("Done squishing")
val squishLength = squishBed.length
val squishRefLength = squishBed.combineOverlap.length
......
......@@ -24,6 +24,7 @@ import htsjdk.tribble.AbstractFeatureReader.getFeatureReader
import htsjdk.tribble.bed.BEDCodec
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.{ ToolCommand, ToolCommandFuntion }
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import org.apache.commons.io.FilenameUtils.getExtension
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -84,10 +85,7 @@ object WipeReads extends ToolCommand {
logger.info("Parsing interval file ...")
/** Function to create iterator from BED file */
def makeIntervalFromBed(inFile: File): Iterator[Interval] =
asScalaIteratorConverter(getFeatureReader(inFile.toPath.toString, new BEDCodec(), false).iterator)
.asScala
.map(x => new Interval(x.getContig, x.getStart, x.getEnd))
def makeIntervalFromBed(inFile: File) = BedRecordList.fromFile(inFile).sorted.intervals.toIterator
/**
* Parses a refFlat file to yield Interval objects
......
......@@ -2,6 +2,8 @@ package nl.lumc.sasc.biopet.utils.intervals
import java.io.{ PrintWriter, File }
import htsjdk.samtools.util.Interval
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.Source
......@@ -13,14 +15,21 @@ import nl.lumc.sasc.biopet.core.Logging
class BedRecordList(val chrRecords: Map[String, List[BedRecord]], header: List[String] = Nil) {
def allRecords = for (chr <- chrRecords; record <- chr._2) yield record
lazy val sort = {
def intervals = allRecords.map({ x =>
(x.name, x.strand) match {
case (Some(name), Some(strand)) => new Interval(x.chr, x.start, x.end, !strand, name)
case _ => new Interval(x.chr, x.start, x.end)
}
})
lazy val sorted = {
val sorted = new BedRecordList(chrRecords.map(x => x._1 -> x._2.sortWith((a, b) => a.start < b.start)))
if (sorted.chrRecords.forall(x => x._2 == chrRecords(x._1))) this else sorted
}
lazy val isSorted = sort.hashCode() == this.hashCode() || sort.chrRecords.forall(x => x._2 == chrRecords(x._1))
lazy val isSorted = sorted.hashCode() == this.hashCode() || sorted.chrRecords.forall(x => x._2 == chrRecords(x._1))
def overlapWith(record: BedRecord) = sort.chrRecords
def overlapWith(record: BedRecord) = sorted.chrRecords
.getOrElse(record.chr, Nil)
.dropWhile(_.end < record.start)
.takeWhile(_.start <= record.end)
......@@ -28,7 +37,7 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]], header: List[S
def length = allRecords.foldLeft(0L)((a, b) => a + b.length)
def squishBed(strandSensitive: Boolean = true) = BedRecordList.fromList {
(for ((chr, records) <- sort.chrRecords; record <- records) yield {
(for ((chr, records) <- sorted.chrRecords; record <- records) yield {
val overlaps = overlapWith(record)
.filterNot(strandSensitive && _.strand != record.strand)
.filterNot(_.name == record.name)
......@@ -51,7 +60,7 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]], header: List[S
}
def combineOverlap: BedRecordList = {
new BedRecordList(for ((chr, records) <- sort.chrRecords) yield chr -> {
new BedRecordList(for ((chr, records) <- sorted.chrRecords) yield chr -> {
def combineOverlap(records: List[BedRecord],
newRecords: ListBuffer[BedRecord] = ListBuffer()): List[BedRecord] = {
if (records.nonEmpty) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment