Skip to content
Snippets Groups Projects
Commit 7ab46aa3 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added base version of quality and insertsize histogram

parent df4ba8ec
No related branches found
No related tags found
No related merge requests found
......@@ -10,9 +10,11 @@ import nl.lumc.sasc.biopet.utils.{BamUtils, ToolCommand}
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{Await, Future}
import scala.concurrent.blocking
import scala.util.{Failure, Success}
import scala.concurrent.duration.Duration
import scala.collection.JavaConversions._
import scala.collection.immutable.Queue
/**
* Created by pjvanthof on 25/05/16.
......@@ -46,9 +48,13 @@ object BamStats extends ToolCommand {
val argsParser = new OptParser
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
logger.info("Start")
val sequenceDict = validateReferenceinBam(cmdArgs.bamFile, cmdArgs.referenceFasta)
init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize)
logger.info("Done")
}
def validateReferenceinBam(bamFile: File, referenceFasta: Option[File]) = {
......@@ -68,51 +74,55 @@ object BamStats extends ToolCommand {
var stats = Stats()
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
val f = Future { processContig(contig, bamFile, binSize, threadBinSize) }
val f = Future {
val s = processContig(contig, bamFile, binSize, threadBinSize)
blocking { stats += s }
}
f.onFailure { case t => throw new RuntimeException(t) }
contig -> f
}
val unmappedFuture = Future { processUnmappedReads(bamFile) }
unmappedFuture.onFailure { case t => throw new RuntimeException(t) }
// Waiting on all contigs to complete
contigsFutures.foreach { x =>
Await.ready(x._2, Duration.Inf)
x._2.value match {
case Some(x) =>
require(x.isSuccess)
x.foreach( stats += _)
}
}
contigsFutures.foreach { x => Await.ready(x._2, Duration.Inf) }
println(stats.totalReads)
Await.ready(unmappedFuture, Duration.Inf)
stats += unmappedFuture.value.get.get
logger.info(s"total: ${stats.totalReads}, unmapped: ${stats.unmapped}, secondary: ${stats.secondary}")
stats.insertSizeHistogram.keys.toList.sorted.foreach(x => println(s"$x\t${stats.insertSizeHistogram(x)}"))
}
def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = {
logger.info(s"Contig '${region.chr}' starting")
var stats = Stats()
val scattersPerThread = threadBinSize / binSize
val scattersFutures = region
val scatters = region
.scatter(binSize)
.grouped(scattersPerThread).map { scatters =>
val f = Future { processThread(scatters, bamFile) }
val scattersPerThread = (region.length.toDouble / threadBinSize).ceil.toInt
val scattersFutures = scatters.grouped(scattersPerThread).map { scatters =>
val f = Future {
val s = processThread(scatters, bamFile)
blocking { stats += s }
}
f.onFailure { case t => throw new RuntimeException(t) }
f
}
// Waiting on all contigs to complete
scattersFutures.foreach { x =>
Await.ready(x, Duration.Inf)
x.value match {
case Some(x) =>
require(x.isSuccess)
x.foreach(stats += _)
}
}
scattersFutures.foreach { x => Await.ready(x, Duration.Inf) }
logger.info(s"Contig '${region.chr}' done")
stats
}
def processThread(scatters: List[BedRecord], bamFile: File): Stats = {
val totalStats = Stats()
var totalStats = Stats()
val sortedScatters = scatters.sortBy(_.start)
val samReader = SamReaderFactory.makeDefault().open(bamFile)
val threadChr = sortedScatters.head.chr
......@@ -120,8 +130,18 @@ object BamStats extends ToolCommand {
val threadEnd = sortedScatters.last.end
val it = samReader.query(threadChr, threadStart, threadEnd, false).buffered
for (samRecord <- it) {
// Read based stats
if (samRecord.getAlignmentStart > threadStart && samRecord.getAlignmentStart <= threadEnd) {
totalStats.totalReads += 1
if (samRecord.isSecondaryOrSupplementary) totalStats.secondary += 1
if (samRecord.getReadUnmappedFlag) totalStats.unmapped += 1
else { // Mapped read
totalStats.mappingQualityHistogram += samRecord.getMappingQuality -> (totalStats.mappingQualityHistogram.getOrElse(samRecord.getMappingQuality, 0L) + 1)
}
if (samRecord.getProperPairFlag && samRecord.getFirstOfPairFlag && !samRecord.getSecondOfPairFlag)
totalStats.insertSizeHistogram += samRecord.getInferredInsertSize.abs -> (totalStats.insertSizeHistogram.getOrElse(samRecord.getInferredInsertSize.abs, 0L) + 1)
//TODO: Read counting
}
......@@ -131,4 +151,14 @@ object BamStats extends ToolCommand {
totalStats
}
def processUnmappedReads(bamFile: File): Stats = {
var stats = Stats()
val samReader = SamReaderFactory.makeDefault().open(bamFile)
var size = samReader.queryUnmapped().size
stats.totalReads += size
stats.unmapped += size
samReader.close()
stats
}
}
package nl.lumc.sasc.biopet.tools.bamstats
import scala.collection.mutable
/**
* Created by pjvanthof on 05/07/16.
*/
case class Stats() {
var totalReads = 0L
var unmapped = 0L
var secondary = 0L
var mappingQualityHistogram: mutable.Map[Int, Long] = mutable.Map()
var insertSizeHistogram: mutable.Map[Int, Long] = mutable.Map()
def +(other: Stats): Stats = {
this.totalReads += other.totalReads
this.unmapped += other.unmapped
other.mappingQualityHistogram.foreach(x => this.mappingQualityHistogram += x._1 -> (this.mappingQualityHistogram.getOrElse(x._1, 0L) + x._2))
other.insertSizeHistogram.foreach(x => this.insertSizeHistogram += x._1 -> (this.insertSizeHistogram.getOrElse(x._1, 0L) + x._2))
this
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment