Commit 68b01fdc authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added UCSC header skipping

parent 2ecea204
......@@ -81,7 +81,7 @@ case class BedRecord(chr: String,
object BedRecord {
def fromLine(line: String): BedRecord = {
val values = line.split("\t")
require(values.length >= 3)
require(values.length >= 3, "Not enough columns count for a bed file")
BedRecord(
values(0),
values(1).toInt,
......
......@@ -10,7 +10,7 @@ import nl.lumc.sasc.biopet.core.Logging
/**
* Created by pjvan_thof on 8/20/15.
*/
class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) {
class BedRecordList(val chrRecords: Map[String, List[BedRecord]], header: List[String] = Nil) {
def allRecords = for (chr <- chrRecords; record <- chr._2) yield record
lazy val sort = {
......@@ -58,29 +58,38 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) {
}
object BedRecordList {
def fromList(records: Traversable[BedRecord]): BedRecordList = fromList(records.toIterator)
def fromListWithHeader(records: Traversable[BedRecord],
header: List[String]): BedRecordList = fromListWithHeader(records.toIterator, header)
def fromList(records: TraversableOnce[BedRecord]): BedRecordList = {
def fromListWithHeader(records: TraversableOnce[BedRecord], header: List[String]): BedRecordList = {
val map = mutable.Map[String, ListBuffer[BedRecord]]()
for (record <- records) {
if (!map.contains(record.chr)) map += record.chr -> ListBuffer()
map(record.chr) += record
}
new BedRecordList(map.toMap.map(m => m._1 -> m._2.toList))
new BedRecordList(map.toMap.map(m => m._1 -> m._2.toList), header)
}
def fromList(records: Traversable[BedRecord]): BedRecordList = fromListWithHeader(records.toIterator, Nil)
def fromList(records: TraversableOnce[BedRecord]): BedRecordList = fromListWithHeader(records, Nil)
def fromFile(bedFile: File) = {
var lineCount = 0L
fromList(Source.fromFile(bedFile).getLines().map(line => {
lineCount += 1
try {
val reader = Source.fromFile(bedFile)
val all = reader.getLines().toList
val header = all.takeWhile(x => x.startsWith("browser") || x.startsWith("track"))
var lineCount = header.length
val content = all.drop(lineCount)
try {
fromListWithHeader(content.map(line => {
lineCount += 1
BedRecord.fromLine(line).validate
} catch {
case e: Exception =>
Logging.logger.error(s"Parsing line number $lineCount failed on file: ${bedFile.getAbsolutePath}")
throw e
}
}))
}), header)
} catch {
case e: Exception =>
Logging.logger.error(s"Parsing line number $lineCount failed on file: ${bedFile.getAbsolutePath}")
throw e
}
}
def combineOverlap(list: BedRecordList): BedRecordList = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment