Commit fbe97ce0 authored by Peter van 't Hof's avatar Peter van 't Hof

Added test to Stats class

parent 7a3e8399
......@@ -4,7 +4,7 @@ import java.io.File
import java.util.concurrent.TimeoutException
import htsjdk.samtools.reference.FastaSequenceFile
import htsjdk.samtools.{SAMSequenceDictionary, SamReaderFactory}
import htsjdk.samtools.{ SAMSequenceDictionary, SamReaderFactory }
import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList }
......@@ -13,6 +13,7 @@ import scala.collection.JavaConversions._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.concurrent.{ Await, Future }
import scala.language.postfixOps
/**
* This tool will collect stats from a bamfile
......@@ -76,14 +77,14 @@ object BamStats extends ToolCommand {
}
/**
* This is the main running function of [[BamStats]]. This will start the thereads and collect and write the results.
*
* @param outputDir All output files will be placed here
* @param bamFile Input bam file
* @param referenceDict Dict for scattering
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
* This is the main running function of [[BamStats]]. This will start the thereads and collect and write the results.
*
* @param outputDir All output files will be placed here
* @param bamFile Input bam file
* @param referenceDict Dict for scattering
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = {
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
Future { processContig(contig, bamFile, binSize, threadBinSize) }
......@@ -105,28 +106,28 @@ object BamStats extends ToolCommand {
}
/**
* This will start the subjobs for each contig and collect [[Stats]] on contig level
*
* @param region Region to check, mostly yhis is the complete contig
* @param bamFile Input bam file
* @param binSize stats binsize
* @param threadBinSize Thread binsize
* @return Output stats
*/
* This will start the subjobs for each contig and collect [[Stats]] on contig level
*
* @param region Region to check, mostly yhis is the complete contig
* @param bamFile Input bam file
* @param binSize stats binsize
* @param threadBinSize Thread binsize
* @return Output stats
*/
def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = {
val scattersFutures = region
.scatter(binSize)
.grouped((region.length.toDouble / threadBinSize).ceil.toInt)
.map( scatters => Future { processThread(scatters, bamFile) })
.map(scatters => Future { processThread(scatters, bamFile) })
waitOnFutures(scattersFutures.toList, Some(region.chr))
}
/**
* This method will wait when all futures are complete and collect a single [[Stats]] instance
* @param futures List of futures to monitor
* @param msg Optional message for logging
* @return Output stats
*/
* This method will wait when all futures are complete and collect a single [[Stats]] instance
* @param futures List of futures to monitor
* @param msg Optional message for logging
* @return Output stats
*/
def waitOnFutures(futures: List[Future[Stats]], msg: Option[String] = None): Stats = {
msg.foreach(m => logger.info(s"Start monitoring jobs for '$m', ${futures.size} jobs"))
futures.foreach(_.onFailure { case t => throw new RuntimeException(t) })
......@@ -148,12 +149,12 @@ object BamStats extends ToolCommand {
}
/**
* This method will process 1 thread bin
*
* @param scatters bins to check
* @param bamFile Input bamfile
* @return Output stats
*/
* This method will process 1 thread bin
*
* @param scatters bins to check
* @param bamFile Input bamfile
* @return Output stats
*/
def processThread(scatters: List[BedRecord], bamFile: File): Stats = {
val totalStats = Stats()
val sortedScatters = scatters.sortBy(_.start)
......@@ -201,10 +202,10 @@ object BamStats extends ToolCommand {
}
/**
* This method will only count the unmapped fragments
* @param bamFile Input bamfile
* @return Output stats
*/
* This method will only count the unmapped fragments
* @param bamFile Input bamfile
* @return Output stats
*/
def processUnmappedReads(bamFile: File): Stats = {
val stats = Stats()
val samReader = SamReaderFactory.makeDefault().open(bamFile)
......
......@@ -34,6 +34,13 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
counts.keys.toList.sorted.foreach(x => writer.println(s"$x\t${counts(x)}"))
writer.close()
}
override def equals(other: Any): Boolean = {
other match {
case c: Counts[T] => this.counts == c.counts
case _ => false
}
}
}
class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric[T]) extends Counts[T](_counts) {
......
......@@ -3,18 +3,16 @@ package nl.lumc.sasc.biopet.tools.bamstats
/**
* Created by pjvanthof on 05/07/16.
*/
case class Stats() {
var totalReads = 0L
var unmapped = 0L
var secondary = 0L
val mappingQualityHistogram = new Histogram[Int]()
val insertSizeHistogram = new Histogram[Int]()
val clippingHistogram = new Histogram[Int]()
val leftClippingHistogram = new Histogram[Int]()
val rightClippingHistogram = new Histogram[Int]()
val _5_ClippingHistogram = new Histogram[Int]()
val _3_ClippingHistogram = new Histogram[Int]()
case class Stats(var totalReads: Long = 0L,
var unmapped: Long = 0L,
var secondary: Long = 0L,
mappingQualityHistogram: Histogram[Int] = new Histogram[Int](),
insertSizeHistogram: Histogram[Int] = new Histogram[Int](),
clippingHistogram: Histogram[Int] = new Histogram[Int](),
leftClippingHistogram: Histogram[Int] = new Histogram[Int](),
rightClippingHistogram: Histogram[Int] = new Histogram[Int](),
_5_ClippingHistogram: Histogram[Int] = new Histogram[Int](),
_3_ClippingHistogram: Histogram[Int] = new Histogram[Int]()) {
/** This will add an other [[Stats]] inside `this` */
def +=(other: Stats): Stats = {
......
......@@ -9,12 +9,12 @@ import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvan_thof on 19-7-16.
*/
* Created by pjvan_thof on 19-7-16.
*/
class CountsTest extends TestNGSuite with Matchers {
@Test
def testValues: Unit = {
val data: Map[String, Long] = Map("1" -> 1, "2" -> 2, "3" -> 3)
def testValues(): Unit = {
val data: Map[String, Long] = Map("1" -> 1, "2" -> 2, "3" -> 3)
val c1 = new Counts[String](data)
c1.countsMap shouldBe data
c1.get("1") shouldBe Some(1)
......@@ -37,14 +37,36 @@ class CountsTest extends TestNGSuite with Matchers {
}
@Test
def testEmpty: Unit = {
def testEmpty(): Unit = {
val c1 = new Counts[Int]()
c1.countsMap.isEmpty shouldBe true
}
@Test
def testTsv: Unit = {
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
def testEqual(): Unit = {
val c1 = new Counts[Int]()
val c2 = new Counts[Int]()
c1 should not be "be a string"
c1 shouldBe c1
c2 shouldBe c2
c1 shouldBe c2
c1.add(1)
c1 shouldBe c1
c2 shouldBe c2
c1 should not be c2
c2.add(1)
c1 shouldBe c1
c2 shouldBe c2
c1 shouldBe c2
}
@Test
def testTsv(): Unit = {
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val c1 = new Counts[Int](data)
val tsvFile = File.createTempFile("counts.", ".tsv")
......
......@@ -9,12 +9,12 @@ import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvan_thof on 19-7-16.
*/
* Created by pjvan_thof on 19-7-16.
*/
class HistogramTest extends TestNGSuite with Matchers {
@Test
def testValues: Unit = {
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val c1 = new Histogram[Int](data)
c1.countsMap shouldBe data
c1.get(1) shouldBe Some(1)
......@@ -44,7 +44,7 @@ class HistogramTest extends TestNGSuite with Matchers {
@Test
def testTsv: Unit = {
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val c1 = new Histogram[Int](data)
val tsvFile = File.createTempFile("counts.", ".tsv")
......
package nl.lumc.sasc.biopet.tools.bamstats
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvan_thof on 19-7-16.
*/
class StatsTest extends TestNGSuite with Matchers {
@Test
def testEqual(): Unit = {
val s1 = new Stats()
val s2 = new Stats()
s1 shouldBe s2
s1.totalReads += 1
s1 should not be s2
s2.totalReads += 1
s1 shouldBe s2
s1.mappingQualityHistogram.add(1)
s1 should not be s2
s2.mappingQualityHistogram.add(1)
s1 shouldBe s2
}
@Test
def testEmpty(): Unit = {
val stats = new Stats()
stats.totalReads shouldBe 0
stats.unmapped shouldBe 0
stats.secondary shouldBe 0
stats.clippingHistogram.countsMap shouldBe empty
stats.insertSizeHistogram.countsMap shouldBe empty
stats.mappingQualityHistogram.countsMap shouldBe empty
stats.leftClippingHistogram.countsMap shouldBe empty
stats.rightClippingHistogram.countsMap shouldBe empty
stats._5_ClippingHistogram.countsMap shouldBe empty
stats._3_ClippingHistogram.countsMap shouldBe empty
}
@Test
def testPlus: Unit = {
val s1 = new Stats()
val s2 = new Stats()
s2.totalReads += 1
s2._3_ClippingHistogram.add(1)
s1.totalReads shouldBe 0
s1._3_ClippingHistogram.get(1) shouldBe None
s1 += s2
s1.totalReads shouldBe 1
s1._3_ClippingHistogram.get(1) shouldBe Some(1)
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment