From fbe97ce000402b919cf690f898b0ee2bfddd2278 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Thu, 21 Jul 2016 12:35:17 +0200 Subject: [PATCH] Added test to Stats class --- .../sasc/biopet/tools/bamstats/BamStats.scala | 67 ++++++++++--------- .../biopet/tools/bamstats/Histogram.scala | 7 ++ .../sasc/biopet/tools/bamstats/Stats.scala | 22 +++--- .../biopet/tools/bamstats/CountsTest.scala | 36 ++++++++-- .../biopet/tools/bamstats/HistogramTest.scala | 8 +-- .../biopet/tools/bamstats/StatsTest.scala | 62 +++++++++++++++++ 6 files changed, 146 insertions(+), 56 deletions(-) create mode 100644 biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/StatsTest.scala diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala index db9164bb5..26e14cc75 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala @@ -4,7 +4,7 @@ import java.io.File import java.util.concurrent.TimeoutException import htsjdk.samtools.reference.FastaSequenceFile -import htsjdk.samtools.{SAMSequenceDictionary, SamReaderFactory} +import htsjdk.samtools.{ SAMSequenceDictionary, SamReaderFactory } import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck import nl.lumc.sasc.biopet.utils.ToolCommand import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList } @@ -13,6 +13,7 @@ import scala.collection.JavaConversions._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration._ import scala.concurrent.{ Await, Future } +import scala.language.postfixOps /** * This tool will collect stats from a bamfile @@ -76,14 +77,14 @@ object BamStats extends ToolCommand { } /** - * This is the main running function of [[BamStats]]. This will start the thereads and collect and write the results. - * - * @param outputDir All output files will be placed here - * @param bamFile Input bam file - * @param referenceDict Dict for scattering - * @param binSize stats binsize - * @param threadBinSize Thread binsize - */ + * This is the main running function of [[BamStats]]. This will start the thereads and collect and write the results. + * + * @param outputDir All output files will be placed here + * @param bamFile Input bam file + * @param referenceDict Dict for scattering + * @param binSize stats binsize + * @param threadBinSize Thread binsize + */ def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = { val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig => Future { processContig(contig, bamFile, binSize, threadBinSize) } @@ -105,28 +106,28 @@ object BamStats extends ToolCommand { } /** - * This will start the subjobs for each contig and collect [[Stats]] on contig level - * - * @param region Region to check, mostly yhis is the complete contig - * @param bamFile Input bam file - * @param binSize stats binsize - * @param threadBinSize Thread binsize - * @return Output stats - */ + * This will start the subjobs for each contig and collect [[Stats]] on contig level + * + * @param region Region to check, mostly yhis is the complete contig + * @param bamFile Input bam file + * @param binSize stats binsize + * @param threadBinSize Thread binsize + * @return Output stats + */ def processContig(region: BedRecord, bamFile: File, binSize: Int, threadBinSize: Int): Stats = { val scattersFutures = region .scatter(binSize) .grouped((region.length.toDouble / threadBinSize).ceil.toInt) - .map( scatters => Future { processThread(scatters, bamFile) }) + .map(scatters => Future { processThread(scatters, bamFile) }) waitOnFutures(scattersFutures.toList, Some(region.chr)) } /** - * This method will wait when all futures are complete and collect a single [[Stats]] instance - * @param futures List of futures to monitor - * @param msg Optional message for logging - * @return Output stats - */ + * This method will wait when all futures are complete and collect a single [[Stats]] instance + * @param futures List of futures to monitor + * @param msg Optional message for logging + * @return Output stats + */ def waitOnFutures(futures: List[Future[Stats]], msg: Option[String] = None): Stats = { msg.foreach(m => logger.info(s"Start monitoring jobs for '$m', ${futures.size} jobs")) futures.foreach(_.onFailure { case t => throw new RuntimeException(t) }) @@ -148,12 +149,12 @@ object BamStats extends ToolCommand { } /** - * This method will process 1 thread bin - * - * @param scatters bins to check - * @param bamFile Input bamfile - * @return Output stats - */ + * This method will process 1 thread bin + * + * @param scatters bins to check + * @param bamFile Input bamfile + * @return Output stats + */ def processThread(scatters: List[BedRecord], bamFile: File): Stats = { val totalStats = Stats() val sortedScatters = scatters.sortBy(_.start) @@ -201,10 +202,10 @@ object BamStats extends ToolCommand { } /** - * This method will only count the unmapped fragments - * @param bamFile Input bamfile - * @return Output stats - */ + * This method will only count the unmapped fragments + * @param bamFile Input bamfile + * @return Output stats + */ def processUnmappedReads(bamFile: File): Stats = { val stats = Stats() val samReader = SamReaderFactory.makeDefault().open(bamFile) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala index 67f9bd6cf..154257842 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala @@ -34,6 +34,13 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T counts.keys.toList.sorted.foreach(x => writer.println(s"$x\t${counts(x)}")) writer.close() } + + override def equals(other: Any): Boolean = { + other match { + case c: Counts[T] => this.counts == c.counts + case _ => false + } + } } class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric[T]) extends Counts[T](_counts) { diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala index bf446e2dd..cdc8a3f65 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Stats.scala @@ -3,18 +3,16 @@ package nl.lumc.sasc.biopet.tools.bamstats /** * Created by pjvanthof on 05/07/16. */ -case class Stats() { - - var totalReads = 0L - var unmapped = 0L - var secondary = 0L - val mappingQualityHistogram = new Histogram[Int]() - val insertSizeHistogram = new Histogram[Int]() - val clippingHistogram = new Histogram[Int]() - val leftClippingHistogram = new Histogram[Int]() - val rightClippingHistogram = new Histogram[Int]() - val _5_ClippingHistogram = new Histogram[Int]() - val _3_ClippingHistogram = new Histogram[Int]() +case class Stats(var totalReads: Long = 0L, + var unmapped: Long = 0L, + var secondary: Long = 0L, + mappingQualityHistogram: Histogram[Int] = new Histogram[Int](), + insertSizeHistogram: Histogram[Int] = new Histogram[Int](), + clippingHistogram: Histogram[Int] = new Histogram[Int](), + leftClippingHistogram: Histogram[Int] = new Histogram[Int](), + rightClippingHistogram: Histogram[Int] = new Histogram[Int](), + _5_ClippingHistogram: Histogram[Int] = new Histogram[Int](), + _3_ClippingHistogram: Histogram[Int] = new Histogram[Int]()) { /** This will add an other [[Stats]] inside `this` */ def +=(other: Stats): Stats = { diff --git a/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/CountsTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/CountsTest.scala index 286abc245..af79d371c 100644 --- a/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/CountsTest.scala +++ b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/CountsTest.scala @@ -9,12 +9,12 @@ import org.testng.annotations.Test import scala.io.Source /** - * Created by pjvan_thof on 19-7-16. - */ + * Created by pjvan_thof on 19-7-16. + */ class CountsTest extends TestNGSuite with Matchers { @Test - def testValues: Unit = { - val data: Map[String, Long] = Map("1" -> 1, "2" -> 2, "3" -> 3) + def testValues(): Unit = { + val data: Map[String, Long] = Map("1" -> 1, "2" -> 2, "3" -> 3) val c1 = new Counts[String](data) c1.countsMap shouldBe data c1.get("1") shouldBe Some(1) @@ -37,14 +37,36 @@ class CountsTest extends TestNGSuite with Matchers { } @Test - def testEmpty: Unit = { + def testEmpty(): Unit = { val c1 = new Counts[Int]() c1.countsMap.isEmpty shouldBe true } @Test - def testTsv: Unit = { - val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3) + def testEqual(): Unit = { + val c1 = new Counts[Int]() + val c2 = new Counts[Int]() + + c1 should not be "be a string" + + c1 shouldBe c1 + c2 shouldBe c2 + c1 shouldBe c2 + + c1.add(1) + c1 shouldBe c1 + c2 shouldBe c2 + c1 should not be c2 + + c2.add(1) + c1 shouldBe c1 + c2 shouldBe c2 + c1 shouldBe c2 + } + + @Test + def testTsv(): Unit = { + val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3) val c1 = new Counts[Int](data) val tsvFile = File.createTempFile("counts.", ".tsv") diff --git a/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/HistogramTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/HistogramTest.scala index 9944821b5..798cd0c56 100644 --- a/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/HistogramTest.scala +++ b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/HistogramTest.scala @@ -9,12 +9,12 @@ import org.testng.annotations.Test import scala.io.Source /** - * Created by pjvan_thof on 19-7-16. - */ + * Created by pjvan_thof on 19-7-16. + */ class HistogramTest extends TestNGSuite with Matchers { @Test def testValues: Unit = { - val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3) + val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3) val c1 = new Histogram[Int](data) c1.countsMap shouldBe data c1.get(1) shouldBe Some(1) @@ -44,7 +44,7 @@ class HistogramTest extends TestNGSuite with Matchers { @Test def testTsv: Unit = { - val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3) + val data: Map[Int, Long] = Map(1 -> 1, 2 -> 2, 3 -> 3) val c1 = new Histogram[Int](data) val tsvFile = File.createTempFile("counts.", ".tsv") diff --git a/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/StatsTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/StatsTest.scala new file mode 100644 index 000000000..50194795a --- /dev/null +++ b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/StatsTest.scala @@ -0,0 +1,62 @@ +package nl.lumc.sasc.biopet.tools.bamstats + +import org.scalatest.Matchers +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.Test + +/** + * Created by pjvan_thof on 19-7-16. + */ +class StatsTest extends TestNGSuite with Matchers { + @Test + def testEqual(): Unit = { + val s1 = new Stats() + val s2 = new Stats() + + s1 shouldBe s2 + + s1.totalReads += 1 + s1 should not be s2 + + s2.totalReads += 1 + s1 shouldBe s2 + + s1.mappingQualityHistogram.add(1) + s1 should not be s2 + + s2.mappingQualityHistogram.add(1) + s1 shouldBe s2 + } + + @Test + def testEmpty(): Unit = { + val stats = new Stats() + + stats.totalReads shouldBe 0 + stats.unmapped shouldBe 0 + stats.secondary shouldBe 0 + + stats.clippingHistogram.countsMap shouldBe empty + stats.insertSizeHistogram.countsMap shouldBe empty + stats.mappingQualityHistogram.countsMap shouldBe empty + stats.leftClippingHistogram.countsMap shouldBe empty + stats.rightClippingHistogram.countsMap shouldBe empty + stats._5_ClippingHistogram.countsMap shouldBe empty + stats._3_ClippingHistogram.countsMap shouldBe empty + } + + @Test + def testPlus: Unit = { + val s1 = new Stats() + val s2 = new Stats() + + s2.totalReads += 1 + s2._3_ClippingHistogram.add(1) + + s1.totalReads shouldBe 0 + s1._3_ClippingHistogram.get(1) shouldBe None + s1 += s2 + s1.totalReads shouldBe 1 + s1._3_ClippingHistogram.get(1) shouldBe Some(1) + } +} -- GitLab