Skip to content
Snippets Groups Projects
Commit 26d14d83 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Refactored code to avoid collection.Breakout. (With help from Peter)

parent d2d52a5a
No related branches found
No related tags found
No related merge requests found
......@@ -21,6 +21,7 @@ import htsjdk.samtools.{ SamReader, SamReaderFactory }
import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList }
import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.collection.parallel.immutable
/**
......@@ -65,33 +66,53 @@ object BamUtils {
// create a bedList to devide the contig into multiple pieces
val insertSizesOnAllFragments = BedRecordList.fromList(Seq(BedRecord(contig, start, end)))
.scatter(binSize)
.allRecords.par.map({
.allRecords.par.flatMap({
bedRecord =>
// for each scatter, open the bamfile for this specific region-query
val inputSam: SamReader = SamReaderFactory.makeDefault.open(inputBam)
val samIterator = inputSam.query(bedRecord.chr, bedRecord.start, bedRecord.end, true)
val insertsizes: List[Int] = (for {
read <- samIterator.toStream.takeWhile(rec => {
// TODO: This value is now hard-coded. I'm not sure whether this is the best practice on selecting reads with a minimum required quality.
val minQ10 = rec.getMappingQuality >= 10
// with properPairFlag we exclude readpairs that span multiple contigs.
val paired = rec.getReadPairedFlag && rec.getProperPairFlag
val bothMapped = if (paired) ((rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false)) else false
paired && bothMapped && minQ10
}).take(samplingSize)
} yield {
read.getInferredInsertSize.asInstanceOf[Int].abs
})(collection.breakOut)
val lociInsertSize = insertsizes.foldLeft((0.0, 0))((t, r) => (t._1 + r, t._2 + 1))
samIterator.close()
inputSam.close()
if (lociInsertSize._2 == 0) None else Some((lociInsertSize._1 / lociInsertSize._2).toInt)
}).toList.flatten
val contigInsertSize = insertSizesOnAllFragments.foldLeft((0.0, 0))((t, r) => (t._1 + r, t._2 + 1))
if (contigInsertSize._2 == 0) None else Some((contigInsertSize._1 / contigInsertSize._2).toInt)
val counts: mutable.Map[Int, Int] = mutable.Map()
for (i <- 0 until samplingSize if samIterator.hasNext) {
val rec = samIterator.next()
val isPaired = rec.getReadPairedFlag
val minQ10 = rec.getMappingQuality >= 10
val pairOnSameContig = rec.getContig == rec.getMateReferenceName
if (isPaired && minQ10 && pairOnSameContig) {
val insertSize = rec.getInferredInsertSize.abs
counts(insertSize) = counts.getOrElse(insertSize, 0) + 1
}
}
counts.keys.size match {
case 1 => Some(counts.keys.head)
case 0 => None
case _ => {
Some(counts.foldLeft(0)((old, observation) => {
observation match {
case (insertSize: Int, observations: Int) => {
(old + (insertSize * observations)) / (observations + 1)
}
case _ => 0
}
}))
}
}
})
insertSizesOnAllFragments.size match {
case 1 => Some(insertSizesOnAllFragments.head)
case 0 => None
case _ => {
Some(
insertSizesOnAllFragments.foldLeft(0)((old, observation) => {
(old + observation) / 2
}))
}
}
}
/**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment