Skip to content
Snippets Groups Projects
Commit 68b01fdc authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added UCSC header skipping

parent 2ecea204
No related branches found
No related tags found
No related merge requests found
...@@ -81,7 +81,7 @@ case class BedRecord(chr: String, ...@@ -81,7 +81,7 @@ case class BedRecord(chr: String,
object BedRecord { object BedRecord {
def fromLine(line: String): BedRecord = { def fromLine(line: String): BedRecord = {
val values = line.split("\t") val values = line.split("\t")
require(values.length >= 3) require(values.length >= 3, "Not enough columns count for a bed file")
BedRecord( BedRecord(
values(0), values(0),
values(1).toInt, values(1).toInt,
......
...@@ -10,7 +10,7 @@ import nl.lumc.sasc.biopet.core.Logging ...@@ -10,7 +10,7 @@ import nl.lumc.sasc.biopet.core.Logging
/** /**
* Created by pjvan_thof on 8/20/15. * Created by pjvan_thof on 8/20/15.
*/ */
class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) { class BedRecordList(val chrRecords: Map[String, List[BedRecord]], header: List[String] = Nil) {
def allRecords = for (chr <- chrRecords; record <- chr._2) yield record def allRecords = for (chr <- chrRecords; record <- chr._2) yield record
lazy val sort = { lazy val sort = {
...@@ -58,29 +58,38 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) { ...@@ -58,29 +58,38 @@ class BedRecordList(val chrRecords: Map[String, List[BedRecord]]) {
} }
object BedRecordList { object BedRecordList {
def fromList(records: Traversable[BedRecord]): BedRecordList = fromList(records.toIterator) def fromListWithHeader(records: Traversable[BedRecord],
header: List[String]): BedRecordList = fromListWithHeader(records.toIterator, header)
def fromList(records: TraversableOnce[BedRecord]): BedRecordList = { def fromListWithHeader(records: TraversableOnce[BedRecord], header: List[String]): BedRecordList = {
val map = mutable.Map[String, ListBuffer[BedRecord]]() val map = mutable.Map[String, ListBuffer[BedRecord]]()
for (record <- records) { for (record <- records) {
if (!map.contains(record.chr)) map += record.chr -> ListBuffer() if (!map.contains(record.chr)) map += record.chr -> ListBuffer()
map(record.chr) += record map(record.chr) += record
} }
new BedRecordList(map.toMap.map(m => m._1 -> m._2.toList)) new BedRecordList(map.toMap.map(m => m._1 -> m._2.toList), header)
} }
def fromList(records: Traversable[BedRecord]): BedRecordList = fromListWithHeader(records.toIterator, Nil)
def fromList(records: TraversableOnce[BedRecord]): BedRecordList = fromListWithHeader(records, Nil)
def fromFile(bedFile: File) = { def fromFile(bedFile: File) = {
var lineCount = 0L val reader = Source.fromFile(bedFile)
fromList(Source.fromFile(bedFile).getLines().map(line => { val all = reader.getLines().toList
lineCount += 1 val header = all.takeWhile(x => x.startsWith("browser") || x.startsWith("track"))
try { var lineCount = header.length
val content = all.drop(lineCount)
try {
fromListWithHeader(content.map(line => {
lineCount += 1
BedRecord.fromLine(line).validate BedRecord.fromLine(line).validate
} catch { }), header)
case e: Exception => } catch {
Logging.logger.error(s"Parsing line number $lineCount failed on file: ${bedFile.getAbsolutePath}") case e: Exception =>
throw e Logging.logger.error(s"Parsing line number $lineCount failed on file: ${bedFile.getAbsolutePath}")
} throw e
})) }
} }
def combineOverlap(list: BedRecordList): BedRecordList = { def combineOverlap(list: BedRecordList): BedRecordList = {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment