Commit 7ab46aa3 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added base version of quality and insertsize histogram

parent df4ba8ec
......@@ -10,9 +10,11 @@ import nl.lumc.sasc.biopet.utils.{BamUtils, ToolCommand}
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{Await, Future}
import scala.concurrent.blocking
import scala.util.{Failure, Success}
import scala.concurrent.duration.Duration
import scala.collection.JavaConversions._
import scala.collection.immutable.Queue
/**
* Created by pjvanthof on 25/05/16.
......@@ -46,9 +48,13 @@ object BamStats extends ToolCommand {
val argsParser = new OptParser
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
logger.info("Start")
val sequenceDict = validateReferenceinBam(cmdArgs.bamFile, cmdArgs.referenceFasta)
init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize)
logger.info("Done")
}
def validateReferenceinBam(bamFile: File, referenceFasta: Option[File]) = {
......@@ -68,51 +74,55 @@ object BamStats extends ToolCommand {
var stats = Stats()
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
val f = Future { processContig(contig, bamFile, binSize, threadBinSize) }
val f = Future {
val s = processContig(contig, bamFile, binSize, threadBinSize)
blocking { stats += s }
}
f.onFailure { case t => throw new RuntimeException(t) }
contig -> f
}
val unmappedFuture = Future { processUnmappedReads(bamFile) }
unmappedFuture.onFailure { case t => throw new RuntimeException(t) }
// Waiting on all contigs to complete
contigsFutures.foreach { x =>
Await.ready(x._2, Duration.Inf)
x._2.value match {
case Some(x) =>
require(x.isSuccess)
x.foreach( stats += _)
}
}
contigsFutures.foreach { x => Await.ready(x._2, Duration.Inf) }
println(stats.totalReads)
Await.ready(unmappedFuture, Duration.Inf)
stats += unmappedFuture.value.get.get
logger.info(s"total: ${stats.totalReads}, unmapped: ${stats.unmapped}, secondary: ${stats.secondary}")
stats.insertSizeHistogram.keys.toList.sorted.foreach(x => println(s"$x\t${stats.insertSizeHistogram(x)}"))
}
def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = {
logger.info(s"Contig '${region.chr}' starting")
var stats = Stats()
val scattersPerThread = threadBinSize / binSize
val scattersFutures = region
val scatters = region
.scatter(binSize)
.grouped(scattersPerThread).map { scatters =>
val f = Future { processThread(scatters, bamFile) }
val scattersPerThread = (region.length.toDouble / threadBinSize).ceil.toInt
val scattersFutures = scatters.grouped(scattersPerThread).map { scatters =>
val f = Future {
val s = processThread(scatters, bamFile)
blocking { stats += s }
}
f.onFailure { case t => throw new RuntimeException(t) }
f
}
// Waiting on all contigs to complete
scattersFutures.foreach { x =>
Await.ready(x, Duration.Inf)
x.value match {
case Some(x) =>
require(x.isSuccess)
x.foreach(stats += _)
}
}
scattersFutures.foreach { x => Await.ready(x, Duration.Inf) }
logger.info(s"Contig '${region.chr}' done")
stats
}
def processThread(scatters: List[BedRecord], bamFile: File): Stats = {
val totalStats = Stats()
var totalStats = Stats()
val sortedScatters = scatters.sortBy(_.start)
val samReader = SamReaderFactory.makeDefault().open(bamFile)
val threadChr = sortedScatters.head.chr
......@@ -120,8 +130,18 @@ object BamStats extends ToolCommand {
val threadEnd = sortedScatters.last.end
val it = samReader.query(threadChr, threadStart, threadEnd, false).buffered
for (samRecord <- it) {
// Read based stats
if (samRecord.getAlignmentStart > threadStart && samRecord.getAlignmentStart <= threadEnd) {
totalStats.totalReads += 1
if (samRecord.isSecondaryOrSupplementary) totalStats.secondary += 1
if (samRecord.getReadUnmappedFlag) totalStats.unmapped += 1
else { // Mapped read
totalStats.mappingQualityHistogram += samRecord.getMappingQuality -> (totalStats.mappingQualityHistogram.getOrElse(samRecord.getMappingQuality, 0L) + 1)
}
if (samRecord.getProperPairFlag && samRecord.getFirstOfPairFlag && !samRecord.getSecondOfPairFlag)
totalStats.insertSizeHistogram += samRecord.getInferredInsertSize.abs -> (totalStats.insertSizeHistogram.getOrElse(samRecord.getInferredInsertSize.abs, 0L) + 1)
//TODO: Read counting
}
......@@ -131,4 +151,14 @@ object BamStats extends ToolCommand {
totalStats
}
def processUnmappedReads(bamFile: File): Stats = {
var stats = Stats()
val samReader = SamReaderFactory.makeDefault().open(bamFile)
var size = samReader.queryUnmapped().size
stats.totalReads += size
stats.unmapped += size
samReader.close()
stats
}
}
package nl.lumc.sasc.biopet.tools.bamstats
import scala.collection.mutable
/**
* Created by pjvanthof on 05/07/16.
*/
case class Stats() {
var totalReads = 0L
var unmapped = 0L
var secondary = 0L
var mappingQualityHistogram: mutable.Map[Int, Long] = mutable.Map()
var insertSizeHistogram: mutable.Map[Int, Long] = mutable.Map()
def +(other: Stats): Stats = {
this.totalReads += other.totalReads
this.unmapped += other.unmapped
other.mappingQualityHistogram.foreach(x => this.mappingQualityHistogram += x._1 -> (this.mappingQualityHistogram.getOrElse(x._1, 0L) + x._2))
other.insertSizeHistogram.foreach(x => this.insertSizeHistogram += x._1 -> (this.insertSizeHistogram.getOrElse(x._1, 0L) + x._2))
this
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment