Skip to content
Snippets Groups Projects
Commit 26d14d83 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Refactored code to avoid collection.Breakout. (With help from Peter)

parent d2d52a5a
No related branches found
No related tags found
No related merge requests found
...@@ -21,6 +21,7 @@ import htsjdk.samtools.{ SamReader, SamReaderFactory } ...@@ -21,6 +21,7 @@ import htsjdk.samtools.{ SamReader, SamReaderFactory }
import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList } import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList }
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.collection.parallel.immutable import scala.collection.parallel.immutable
/** /**
...@@ -65,33 +66,53 @@ object BamUtils { ...@@ -65,33 +66,53 @@ object BamUtils {
// create a bedList to devide the contig into multiple pieces // create a bedList to devide the contig into multiple pieces
val insertSizesOnAllFragments = BedRecordList.fromList(Seq(BedRecord(contig, start, end))) val insertSizesOnAllFragments = BedRecordList.fromList(Seq(BedRecord(contig, start, end)))
.scatter(binSize) .scatter(binSize)
.allRecords.par.map({ .allRecords.par.flatMap({
bedRecord => bedRecord =>
// for each scatter, open the bamfile for this specific region-query // for each scatter, open the bamfile for this specific region-query
val inputSam: SamReader = SamReaderFactory.makeDefault.open(inputBam) val inputSam: SamReader = SamReaderFactory.makeDefault.open(inputBam)
val samIterator = inputSam.query(bedRecord.chr, bedRecord.start, bedRecord.end, true) val samIterator = inputSam.query(bedRecord.chr, bedRecord.start, bedRecord.end, true)
val insertsizes: List[Int] = (for { val counts: mutable.Map[Int, Int] = mutable.Map()
read <- samIterator.toStream.takeWhile(rec => {
// TODO: This value is now hard-coded. I'm not sure whether this is the best practice on selecting reads with a minimum required quality. for (i <- 0 until samplingSize if samIterator.hasNext) {
val minQ10 = rec.getMappingQuality >= 10 val rec = samIterator.next()
// with properPairFlag we exclude readpairs that span multiple contigs. val isPaired = rec.getReadPairedFlag
val paired = rec.getReadPairedFlag && rec.getProperPairFlag val minQ10 = rec.getMappingQuality >= 10
val bothMapped = if (paired) ((rec.getReadUnmappedFlag == false) && (rec.getMateUnmappedFlag == false)) else false val pairOnSameContig = rec.getContig == rec.getMateReferenceName
paired && bothMapped && minQ10
}).take(samplingSize) if (isPaired && minQ10 && pairOnSameContig) {
} yield { val insertSize = rec.getInferredInsertSize.abs
read.getInferredInsertSize.asInstanceOf[Int].abs counts(insertSize) = counts.getOrElse(insertSize, 0) + 1
})(collection.breakOut) }
val lociInsertSize = insertsizes.foldLeft((0.0, 0))((t, r) => (t._1 + r, t._2 + 1)) }
samIterator.close() counts.keys.size match {
inputSam.close() case 1 => Some(counts.keys.head)
if (lociInsertSize._2 == 0) None else Some((lociInsertSize._1 / lociInsertSize._2).toInt) case 0 => None
}).toList.flatten case _ => {
Some(counts.foldLeft(0)((old, observation) => {
val contigInsertSize = insertSizesOnAllFragments.foldLeft((0.0, 0))((t, r) => (t._1 + r, t._2 + 1)) observation match {
if (contigInsertSize._2 == 0) None else Some((contigInsertSize._1 / contigInsertSize._2).toInt) case (insertSize: Int, observations: Int) => {
(old + (insertSize * observations)) / (observations + 1)
}
case _ => 0
}
}))
}
}
})
insertSizesOnAllFragments.size match {
case 1 => Some(insertSizesOnAllFragments.head)
case 0 => None
case _ => {
Some(
insertSizesOnAllFragments.foldLeft(0)((old, observation) => {
(old + observation) / 2
}))
}
}
} }
/** /**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment