Skip to content
Snippets Groups Projects
Commit fbe97ce0 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added test to Stats class

parent 7a3e8399
No related branches found
No related tags found
No related merge requests found
......@@ -4,7 +4,7 @@ import java.io.File
import java.util.concurrent.TimeoutException
import htsjdk.samtools.reference.FastaSequenceFile
import htsjdk.samtools.{SAMSequenceDictionary, SamReaderFactory}
import htsjdk.samtools.{ SAMSequenceDictionary, SamReaderFactory }
import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck
import nl.lumc.sasc.biopet.utils.ToolCommand
import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList }
......@@ -13,6 +13,7 @@ import scala.collection.JavaConversions._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.concurrent.{ Await, Future }
import scala.language.postfixOps
/**
* This tool will collect stats from a bamfile
......@@ -76,14 +77,14 @@ object BamStats extends ToolCommand {
}
/**
* This is the main running function of [[BamStats]]. This will start the thereads and collect and write the results.
*
* @param outputDir All output files will be placed here
* @param bamFile Input bam file
* @param referenceDict Dict for scattering
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
* This is the main running function of [[BamStats]]. This will start the thereads and collect and write the results.
*
* @param outputDir All output files will be placed here
* @param bamFile Input bam file
* @param referenceDict Dict for scattering
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = {
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
Future { processContig(contig, bamFile, binSize, threadBinSize) }
......@@ -105,28 +106,28 @@ object BamStats extends ToolCommand {
}
/**
* This will start the subjobs for each contig and collect [[Stats]] on contig level
*
* @param region Region to check, mostly yhis is the complete contig
* @param bamFile Input bam file
* @param binSize stats binsize
* @param threadBinSize Thread binsize
* @return Output stats
*/
* This will start the subjobs for each contig and collect [[Stats]] on contig level
*
* @param region Region to check, mostly yhis is the complete contig
* @param bamFile Input bam file
* @param binSize stats binsize
* @param threadBinSize Thread binsize
* @return Output stats
*/
def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = {
val scattersFutures = region
.scatter(binSize)
.grouped((region.length.toDouble / threadBinSize).ceil.toInt)
.map( scatters => Future { processThread(scatters, bamFile) })
.map(scatters => Future { processThread(scatters, bamFile) })
waitOnFutures(scattersFutures.toList, Some(region.chr))
}
/**
* This method will wait when all futures are complete and collect a single [[Stats]] instance
* @param futures List of futures to monitor
* @param msg Optional message for logging
* @return Output stats
*/
* This method will wait when all futures are complete and collect a single [[Stats]] instance
* @param futures List of futures to monitor
* @param msg Optional message for logging
* @return Output stats
*/
def waitOnFutures(futures: List[Future[Stats]], msg: Option[String] = None): Stats = {
msg.foreach(m => logger.info(s"Start monitoring jobs for '$m', ${futures.size} jobs"))
futures.foreach(_.onFailure { case t => throw new RuntimeException(t) })
......@@ -148,12 +149,12 @@ object BamStats extends ToolCommand {
}
/**
* This method will process 1 thread bin
*
* @param scatters bins to check
* @param bamFile Input bamfile
* @return Output stats
*/
* This method will process 1 thread bin
*
* @param scatters bins to check
* @param bamFile Input bamfile
* @return Output stats
*/
def processThread(scatters: List[BedRecord], bamFile: File): Stats = {
val totalStats = Stats()
val sortedScatters = scatters.sortBy(_.start)
......@@ -201,10 +202,10 @@ object BamStats extends ToolCommand {
}
/**
* This method will only count the unmapped fragments
* @param bamFile Input bamfile
* @return Output stats
*/
* This method will only count the unmapped fragments
* @param bamFile Input bamfile
* @return Output stats
*/
def processUnmappedReads(bamFile: File): Stats = {
val stats = Stats()
val samReader = SamReaderFactory.makeDefault().open(bamFile)
......
......@@ -34,6 +34,13 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
counts.keys.toList.sorted.foreach(x => writer.println(s"$x\t${counts(x)}"))
writer.close()
}
override def equals(other: Any): Boolean = {
other match {
case c: Counts[T] => this.counts == c.counts
case _ => false
}
}
}
class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric[T]) extends Counts[T](_counts) {
......
......@@ -3,18 +3,16 @@ package nl.lumc.sasc.biopet.tools.bamstats
/**
* Created by pjvanthof on 05/07/16.
*/
case class Stats() {
var totalReads = 0L
var unmapped = 0L
var secondary = 0L
val mappingQualityHistogram = new Histogram[Int]()
val insertSizeHistogram = new Histogram[Int]()
val clippingHistogram = new Histogram[Int]()
val leftClippingHistogram = new Histogram[Int]()
val rightClippingHistogram = new Histogram[Int]()
val _5_ClippingHistogram = new Histogram[Int]()
val _3_ClippingHistogram = new Histogram[Int]()
case class Stats(var totalReads: Long = 0L,
var unmapped: Long = 0L,
var secondary: Long = 0L,
mappingQualityHistogram: Histogram[Int] = new Histogram[Int](),
insertSizeHistogram: Histogram[Int] = new Histogram[Int](),
clippingHistogram: Histogram[Int] = new Histogram[Int](),
leftClippingHistogram: Histogram[Int] = new Histogram[Int](),
rightClippingHistogram: Histogram[Int] = new Histogram[Int](),
_5_ClippingHistogram: Histogram[Int] = new Histogram[Int](),
_3_ClippingHistogram: Histogram[Int] = new Histogram[Int]()) {
/** This will add an other [[Stats]] inside `this` */
def +=(other: Stats): Stats = {
......
......@@ -9,12 +9,12 @@ import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvan_thof on 19-7-16.
*/
* Created by pjvan_thof on 19-7-16.
*/
class CountsTest extends TestNGSuite with Matchers {
@Test
def testValues: Unit = {
val data: Map[String, Long] = Map("1" -> 1, "2" -> 2, "3" -> 3)
def testValues(): Unit = {
val data: Map[String, Long] = Map("1" -> 1, "2" -> 2, "3" -> 3)
val c1 = new Counts[String](data)
c1.countsMap shouldBe data
c1.get("1") shouldBe Some(1)
......@@ -37,14 +37,36 @@ class CountsTest extends TestNGSuite with Matchers {
}
@Test
def testEmpty: Unit = {
def testEmpty(): Unit = {
val c1 = new Counts[Int]()
c1.countsMap.isEmpty shouldBe true
}
@Test
def testTsv: Unit = {
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
def testEqual(): Unit = {
val c1 = new Counts[Int]()
val c2 = new Counts[Int]()
c1 should not be "be a string"
c1 shouldBe c1
c2 shouldBe c2
c1 shouldBe c2
c1.add(1)
c1 shouldBe c1
c2 shouldBe c2
c1 should not be c2
c2.add(1)
c1 shouldBe c1
c2 shouldBe c2
c1 shouldBe c2
}
@Test
def testTsv(): Unit = {
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val c1 = new Counts[Int](data)
val tsvFile = File.createTempFile("counts.", ".tsv")
......
......@@ -9,12 +9,12 @@ import org.testng.annotations.Test
import scala.io.Source
/**
* Created by pjvan_thof on 19-7-16.
*/
* Created by pjvan_thof on 19-7-16.
*/
class HistogramTest extends TestNGSuite with Matchers {
@Test
def testValues: Unit = {
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val c1 = new Histogram[Int](data)
c1.countsMap shouldBe data
c1.get(1) shouldBe Some(1)
......@@ -44,7 +44,7 @@ class HistogramTest extends TestNGSuite with Matchers {
@Test
def testTsv: Unit = {
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3)
val c1 = new Histogram[Int](data)
val tsvFile = File.createTempFile("counts.", ".tsv")
......
package nl.lumc.sasc.biopet.tools.bamstats
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvan_thof on 19-7-16.
*/
class StatsTest extends TestNGSuite with Matchers {
@Test
def testEqual(): Unit = {
val s1 = new Stats()
val s2 = new Stats()
s1 shouldBe s2
s1.totalReads += 1
s1 should not be s2
s2.totalReads += 1
s1 shouldBe s2
s1.mappingQualityHistogram.add(1)
s1 should not be s2
s2.mappingQualityHistogram.add(1)
s1 shouldBe s2
}
@Test
def testEmpty(): Unit = {
val stats = new Stats()
stats.totalReads shouldBe 0
stats.unmapped shouldBe 0
stats.secondary shouldBe 0
stats.clippingHistogram.countsMap shouldBe empty
stats.insertSizeHistogram.countsMap shouldBe empty
stats.mappingQualityHistogram.countsMap shouldBe empty
stats.leftClippingHistogram.countsMap shouldBe empty
stats.rightClippingHistogram.countsMap shouldBe empty
stats._5_ClippingHistogram.countsMap shouldBe empty
stats._3_ClippingHistogram.countsMap shouldBe empty
}
@Test
def testPlus: Unit = {
val s1 = new Stats()
val s2 = new Stats()
s2.totalReads += 1
s2._3_ClippingHistogram.add(1)
s1.totalReads shouldBe 0
s1._3_ClippingHistogram.get(1) shouldBe None
s1 += s2
s1.totalReads shouldBe 1
s1._3_ClippingHistogram.get(1) shouldBe Some(1)
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment