From e46fb71829d4961b4962c77471b3acedc51275ff Mon Sep 17 00:00:00 2001 From: Peter van 't Hof Date: Mon, 20 Mar 2017 16:18:14 +0100 Subject: [PATCH] Adding plots to histograms --- .../sasc/biopet/tools/bamstats/BamStats.scala | 23 +++++++------------ .../biopet/tools/bamstats/Histogram.scala | 16 ++++++++++++- .../biopet/tools/bamstats/BamStatsTest.scala | 9 ++++++++ 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala index 10eb3587e..3d8defe13 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStats.scala @@ -104,21 +104,14 @@ object BamStats extends ToolCommand { val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.map(_._2)) stats.flagstat.writeAsTsv(new File(outputDir, "flagstats.tsv")) - stats.insertSizeHistogram.writeHistogramToTsv(new File(outputDir, "insertsize.histogram.tsv")) - stats.insertSizeHistogram.writeAggregateToTsv(new File(outputDir, "insertsize.stats.tsv")) - stats.mappingQualityHistogram.writeHistogramToTsv(new File(outputDir, "mappingQuality.histogram.tsv")) - stats.mappingQualityHistogram.writeAggregateToTsv(new File(outputDir, "mappingQualityHistogram.stats.tsv")) - stats.clippingHistogram.writeHistogramToTsv(new File(outputDir, "clipping.histogram.tsv")) - stats.clippingHistogram.writeAggregateToTsv(new File(outputDir, "clipping.stats.tsv")) - - stats.leftClippingHistogram.writeHistogramToTsv(new File(outputDir, "left_clipping.histogram.tsv")) - stats.leftClippingHistogram.writeAggregateToTsv(new File(outputDir, "left_clipping.stats.tsv")) - stats.rightClippingHistogram.writeHistogramToTsv(new File(outputDir, "right_clipping.histogram.tsv")) - stats.rightClippingHistogram.writeAggregateToTsv(new File(outputDir, "right_clipping.stats.tsv")) - stats._3_ClippingHistogram.writeHistogramToTsv(new File(outputDir, "3prime_clipping.histogram.tsv")) - stats._3_ClippingHistogram.writeAggregateToTsv(new File(outputDir, "3prime_clipping.stats.tsv")) - stats._5_ClippingHistogram.writeHistogramToTsv(new File(outputDir, "5prime_clipping.histogram.tsv")) - stats._5_ClippingHistogram.writeAggregateToTsv(new File(outputDir, "5prime_clipping.stats.tsv")) + stats.insertSizeHistogram.writeFilesAndPlot(outputDir, "insertsize", "Insertsize", "Reads", "Insertsize distribution") + stats.mappingQualityHistogram.writeFilesAndPlot(outputDir, "mappingQuality", "Mapping Quality", "Reads", "Mapping Quality distribution") + stats.clippingHistogram.writeFilesAndPlot(outputDir, "clipping", "CLipped bases", "Reads", "Clipping distribution") + + stats.leftClippingHistogram.writeFilesAndPlot(outputDir, "left_clipping", "CLipped bases", "Reads", "Left Clipping distribution") + stats.rightClippingHistogram.writeFilesAndPlot(outputDir, "right_clipping", "CLipped bases", "Reads", "Right Clipping distribution") + stats._3_ClippingHistogram.writeFilesAndPlot(outputDir, "3prime_clipping", "CLipped bases", "Reads", "3 Prime Clipping distribution") + stats._5_ClippingHistogram.writeFilesAndPlot(outputDir, "5prime_clipping", "CLipped bases", "Reads", "5 Prime Clipping distribution") val statsWriter = new PrintWriter(new File(outputDir, "bamstats.json")) val totalStats = stats.toSummaryMap diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala index 56644eb99..140f327b0 100644 --- a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/bamstats/Histogram.scala @@ -14,7 +14,9 @@ */ package nl.lumc.sasc.biopet.tools.bamstats -import java.io.{ File, PrintWriter } +import java.io.{File, PrintWriter} + +import nl.lumc.sasc.biopet.utils.rscript.LinePlot import nl.lumc.sasc.biopet.utils.sortAnyAny import scala.collection.mutable @@ -89,4 +91,16 @@ class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric writer.close() } + def writeFilesAndPlot(outputDir: File, prefix: String, xlabel: String, ylabel: String, title: String): Unit = { + writeHistogramToTsv(new File(outputDir, prefix + ".histogram.tsv")) + writeAggregateToTsv(new File(outputDir, prefix + ".stats.tsv")) + val plot = new LinePlot(null) + plot.input = new File(outputDir, prefix + ".histogram.tsv") + plot.output = new File(outputDir, prefix + ".histogram.png") + plot.xlabel = Some(xlabel) + plot.ylabel = Some(ylabel) + plot.title = Some(title) + plot.runLocal() + } + } diff --git a/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStatsTest.scala b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStatsTest.scala index e93076696..6215605f2 100644 --- a/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStatsTest.scala +++ b/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/bamstats/BamStatsTest.scala @@ -34,6 +34,15 @@ class BamStatsTest extends TestNGSuite with Matchers { new File(outputDir, "bamstats.json") should exist new File(outputDir, "bamstats.summary.json") should exist + + new File(outputDir, "flagstats.tsv") should exist + new File(outputDir, "insertsize.stats.tsv") should exist + new File(outputDir, "insertsize.histogram.tsv") should exist + new File(outputDir, "mappingQuality.stats.tsv") should exist + new File(outputDir, "mappingQuality.histogram.tsv") should exist + new File(outputDir, "clipping.stats.tsv") should exist + new File(outputDir, "clipping.histogram.tsv") should exist + new File(outputDir, "flagstats") shouldNot exist new File(outputDir, "flagstats.summary.json") shouldNot exist new File(outputDir, "mapping_quality.tsv") shouldNot exist -- GitLab