diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala index 602c0cb8aaadfdbbb07a87bdae2fe4f897748102..69568229903461f027a193ffb0386f02c180f25d 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala @@ -10,9 +10,11 @@ import nl.lumc.sasc.biopet.utils.{BamUtils, ToolCommand} import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.{Await, Future} +import scala.concurrent.blocking import scala.util.{Failure, Success} import scala.concurrent.duration.Duration import scala.collection.JavaConversions._ +import scala.collection.immutable.Queue /** * Created by pjvanthof on 25/05/16. @@ -46,9 +48,13 @@ object BamStats extends ToolCommand { val argsParser = new OptParser val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException) + logger.info("Start") + val sequenceDict = validateReferenceinBam(cmdArgs.bamFile, cmdArgs.referenceFasta) init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize) + + logger.info("Done") } def validateReferenceinBam(bamFile: File, referenceFasta: Option[File]) = { @@ -68,51 +74,55 @@ object BamStats extends ToolCommand { var stats = Stats() val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig => - val f = Future { processContig(contig, bamFile, binSize, threadBinSize) } + val f = Future { + val s = processContig(contig, bamFile, binSize, threadBinSize) + blocking { stats += s } + } f.onFailure { case t => throw new RuntimeException(t) } contig -> f } + val unmappedFuture = Future { processUnmappedReads(bamFile) } + unmappedFuture.onFailure { case t => throw new RuntimeException(t) } + // Waiting on all contigs to complete - contigsFutures.foreach { x => - Await.ready(x._2, Duration.Inf) - x._2.value match { - case Some(x) => - require(x.isSuccess) - x.foreach( stats += _) - } - } + contigsFutures.foreach { x => Await.ready(x._2, Duration.Inf) } - println(stats.totalReads) + Await.ready(unmappedFuture, Duration.Inf) + stats += unmappedFuture.value.get.get + + logger.info(s"total: ${stats.totalReads}, unmapped: ${stats.unmapped}, secondary: ${stats.secondary}") + stats.insertSizeHistogram.keys.toList.sorted.foreach(x => println(s"$x\t${stats.insertSizeHistogram(x)}")) } def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = { + logger.info(s"Contig '${region.chr}' starting") var stats = Stats() - val scattersPerThread = threadBinSize / binSize - val scattersFutures = region + val scatters = region .scatter(binSize) - .grouped(scattersPerThread).map { scatters => - val f = Future { processThread(scatters, bamFile) } + + val scattersPerThread = (region.length.toDouble / threadBinSize).ceil.toInt + + val scattersFutures = scatters.grouped(scattersPerThread).map { scatters => + val f = Future { + val s = processThread(scatters, bamFile) + blocking { stats += s } + } f.onFailure { case t => throw new RuntimeException(t) } f } // Waiting on all contigs to complete - scattersFutures.foreach { x => - Await.ready(x, Duration.Inf) - x.value match { - case Some(x) => - require(x.isSuccess) - x.foreach(stats += _) - } - } + scattersFutures.foreach { x => Await.ready(x, Duration.Inf) } + + logger.info(s"Contig '${region.chr}' done") stats } def processThread(scatters: List[BedRecord], bamFile: File): Stats = { - val totalStats = Stats() + var totalStats = Stats() val sortedScatters = scatters.sortBy(_.start) val samReader = SamReaderFactory.makeDefault().open(bamFile) val threadChr = sortedScatters.head.chr @@ -120,8 +130,18 @@ object BamStats extends ToolCommand { val threadEnd = sortedScatters.last.end val it = samReader.query(threadChr, threadStart, threadEnd, false).buffered for (samRecord <- it) { + + // Read based stats if (samRecord.getAlignmentStart > threadStart && samRecord.getAlignmentStart <= threadEnd) { totalStats.totalReads += 1 + if (samRecord.isSecondaryOrSupplementary) totalStats.secondary += 1 + if (samRecord.getReadUnmappedFlag) totalStats.unmapped += 1 + else { // Mapped read + totalStats.mappingQualityHistogram += samRecord.getMappingQuality -> (totalStats.mappingQualityHistogram.getOrElse(samRecord.getMappingQuality, 0L) + 1) + } + if (samRecord.getProperPairFlag && samRecord.getFirstOfPairFlag && !samRecord.getSecondOfPairFlag) + totalStats.insertSizeHistogram += samRecord.getInferredInsertSize.abs -> (totalStats.insertSizeHistogram.getOrElse(samRecord.getInferredInsertSize.abs, 0L) + 1) + //TODO: Read counting } @@ -131,4 +151,14 @@ object BamStats extends ToolCommand { totalStats } + + def processUnmappedReads(bamFile: File): Stats = { + var stats = Stats() + val samReader = SamReaderFactory.makeDefault().open(bamFile) + var size = samReader.queryUnmapped().size + stats.totalReads += size + stats.unmapped += size + samReader.close() + stats + } } diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala index ca3cd3fd43543290f7351b355ed9ededfe726964..4292ed659a6974fa0965d2d739f1349757c5d220 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala @@ -1,14 +1,23 @@ package nl.lumc.sasc.biopet.tools.bamstats +import scala.collection.mutable + /** * Created by pjvanthof on 05/07/16. */ case class Stats() { var totalReads = 0L + var unmapped = 0L + var secondary = 0L + var mappingQualityHistogram: mutable.Map[Int, Long] = mutable.Map() + var insertSizeHistogram: mutable.Map[Int, Long] = mutable.Map() def +(other: Stats): Stats = { this.totalReads += other.totalReads + this.unmapped += other.unmapped + other.mappingQualityHistogram.foreach(x => this.mappingQualityHistogram += x._1 -> (this.mappingQualityHistogram.getOrElse(x._1, 0L) + x._2)) + other.insertSizeHistogram.foreach(x => this.insertSizeHistogram += x._1 -> (this.insertSizeHistogram.getOrElse(x._1, 0L) + x._2)) this } }