Commit 2299e7a9 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub
Browse files

Merge pull request #37 from biopet/fix-BIOPET-621

Adding more output files to bamstats
parents 3ab8f781 6e959253
...@@ -39,7 +39,8 @@ object BamStats extends ToolCommand { ...@@ -39,7 +39,8 @@ object BamStats extends ToolCommand {
bamFile: File = null, bamFile: File = null,
referenceFasta: Option[File] = None, referenceFasta: Option[File] = None,
binSize: Int = 10000, binSize: Int = 10000,
threadBinSize: Int = 10000000) extends AbstractArgs threadBinSize: Int = 10000000,
tsvOutputs: Boolean = false) extends AbstractArgs
class OptParser extends AbstractOptParser { class OptParser extends AbstractOptParser {
opt[File]('R', "reference") valueName "<file>" action { (x, c) => opt[File]('R', "reference") valueName "<file>" action { (x, c) =>
...@@ -57,6 +58,9 @@ object BamStats extends ToolCommand { ...@@ -57,6 +58,9 @@ object BamStats extends ToolCommand {
opt[Int]("threadBinSize") valueName "<int>" action { (x, c) => opt[Int]("threadBinSize") valueName "<int>" action { (x, c) =>
c.copy(threadBinSize = x) c.copy(threadBinSize = x)
} text "Size of region per thread" } text "Size of region per thread"
opt[Unit]("tsvOutputs") action { (x, c) =>
c.copy(tsvOutputs = true)
} text "Also output tsv files, default there is only a json"
} }
/** This is the main entry to [[BamStats]], this will do the argument parsing. */ /** This is the main entry to [[BamStats]], this will do the argument parsing. */
...@@ -68,7 +72,7 @@ object BamStats extends ToolCommand { ...@@ -68,7 +72,7 @@ object BamStats extends ToolCommand {
val sequenceDict = validateReferenceInBam(cmdArgs.bamFile, cmdArgs.referenceFasta) val sequenceDict = validateReferenceInBam(cmdArgs.bamFile, cmdArgs.referenceFasta)
init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize) init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize, cmdArgs.tsvOutputs)
logger.info("Done") logger.info("Done")
} }
...@@ -96,13 +100,26 @@ object BamStats extends ToolCommand { ...@@ -96,13 +100,26 @@ object BamStats extends ToolCommand {
* @param binSize stats binsize * @param binSize stats binsize
* @param threadBinSize Thread binsize * @param threadBinSize Thread binsize
*/ */
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = { def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int, tsvOutput: Boolean): Unit = {
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig => val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
contig.chr -> processContig(contig, bamFile, binSize, threadBinSize, outputDir) contig.chr -> processContig(contig, bamFile, binSize, threadBinSize, outputDir)
}.toList }.toList
val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.map(_._2)) val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.map(_._2))
if (tsvOutput) {
stats.flagstat.writeAsTsv(new File(outputDir, "flagstats.tsv"))
stats.insertSizeHistogram.writeFilesAndPlot(outputDir, "insertsize", "Insertsize", "Reads", "Insertsize distribution")
stats.mappingQualityHistogram.writeFilesAndPlot(outputDir, "mappingQuality", "Mapping Quality", "Reads", "Mapping Quality distribution")
stats.clippingHistogram.writeFilesAndPlot(outputDir, "clipping", "CLipped bases", "Reads", "Clipping distribution")
stats.leftClippingHistogram.writeFilesAndPlot(outputDir, "left_clipping", "CLipped bases", "Reads", "Left Clipping distribution")
stats.rightClippingHistogram.writeFilesAndPlot(outputDir, "right_clipping", "CLipped bases", "Reads", "Right Clipping distribution")
stats._3_ClippingHistogram.writeFilesAndPlot(outputDir, "3prime_clipping", "CLipped bases", "Reads", "3 Prime Clipping distribution")
stats._5_ClippingHistogram.writeFilesAndPlot(outputDir, "5prime_clipping", "CLipped bases", "Reads", "5 Prime Clipping distribution")
}
val statsWriter = new PrintWriter(new File(outputDir, "bamstats.json")) val statsWriter = new PrintWriter(new File(outputDir, "bamstats.json"))
val totalStats = stats.toSummaryMap val totalStats = stats.toSummaryMap
val statsMap = Map( val statsMap = Map(
......
...@@ -14,8 +14,10 @@ ...@@ -14,8 +14,10 @@
*/ */
package nl.lumc.sasc.biopet.tools.bamstats package nl.lumc.sasc.biopet.tools.bamstats
import java.io.{ File, PrintWriter } import java.io.{ File, IOException, PrintWriter }
import nl.lumc.sasc.biopet.utils.sortAnyAny
import nl.lumc.sasc.biopet.utils.rscript.LinePlot
import nl.lumc.sasc.biopet.utils.{ Logging, sortAnyAny }
import scala.collection.mutable import scala.collection.mutable
...@@ -43,7 +45,7 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T ...@@ -43,7 +45,7 @@ class Counts[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Ordering[T
} }
/** Write histogram to a tsv/count file */ /** Write histogram to a tsv/count file */
def writeToTsv(file: File): Unit = { def writeHistogramToTsv(file: File): Unit = {
val writer = new PrintWriter(file) val writer = new PrintWriter(file)
writer.println("value\tcount") writer.println("value\tcount")
counts.keys.toList.sorted.foreach(x => writer.println(s"$x\t${counts(x)}")) counts.keys.toList.sorted.foreach(x => writer.println(s"$x\t${counts(x)}"))
...@@ -82,4 +84,28 @@ class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric ...@@ -82,4 +84,28 @@ class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric
} else Map() } else Map()
} }
/** Write histogram to a tsv/count file */
def writeAggregateToTsv(file: File): Unit = {
val writer = new PrintWriter(file)
aggregateStats.foreach(x => writer.println(x._1 + "\t" + x._2))
writer.close()
}
def writeFilesAndPlot(outputDir: File, prefix: String, xlabel: String, ylabel: String, title: String): Unit = {
writeHistogramToTsv(new File(outputDir, prefix + ".histogram.tsv"))
writeAggregateToTsv(new File(outputDir, prefix + ".stats.tsv"))
val plot = new LinePlot(null)
plot.input = new File(outputDir, prefix + ".histogram.tsv")
plot.output = new File(outputDir, prefix + ".histogram.png")
plot.xlabel = Some(xlabel)
plot.ylabel = Some(ylabel)
plot.title = Some(title)
try {
plot.runLocal()
} catch {
// If plotting fails the tools should not fail, this depens on R to be installed
case e: IOException => Logging.logger.warn(s"Error found while plotting ${plot.output}: ${e.getMessage}")
}
}
} }
...@@ -50,13 +50,13 @@ case class Stats(flagstat: FlagstatCollector = new FlagstatCollector(), ...@@ -50,13 +50,13 @@ case class Stats(flagstat: FlagstatCollector = new FlagstatCollector(),
def writeStatsToFiles(outputDir: File): Unit = { def writeStatsToFiles(outputDir: File): Unit = {
this.flagstat.writeReportToFile(new File(outputDir, "flagstats")) this.flagstat.writeReportToFile(new File(outputDir, "flagstats"))
this.flagstat.writeSummaryTofile(new File(outputDir, "flagstats.summary.json")) this.flagstat.writeSummaryTofile(new File(outputDir, "flagstats.summary.json"))
this.mappingQualityHistogram.writeToTsv(new File(outputDir, "mapping_quality.tsv")) this.mappingQualityHistogram.writeHistogramToTsv(new File(outputDir, "mapping_quality.tsv"))
this.insertSizeHistogram.writeToTsv(new File(outputDir, "insert_size.tsv")) this.insertSizeHistogram.writeHistogramToTsv(new File(outputDir, "insert_size.tsv"))
this.clippingHistogram.writeToTsv(new File(outputDir, "clipping.tsv")) this.clippingHistogram.writeHistogramToTsv(new File(outputDir, "clipping.tsv"))
this.leftClippingHistogram.writeToTsv(new File(outputDir, "left_clipping.tsv")) this.leftClippingHistogram.writeHistogramToTsv(new File(outputDir, "left_clipping.tsv"))
this.rightClippingHistogram.writeToTsv(new File(outputDir, "right_clipping.tsv")) this.rightClippingHistogram.writeHistogramToTsv(new File(outputDir, "right_clipping.tsv"))
this._5_ClippingHistogram.writeToTsv(new File(outputDir, "5_prime_clipping.tsv")) this._5_ClippingHistogram.writeHistogramToTsv(new File(outputDir, "5_prime_clipping.tsv"))
this._3_ClippingHistogram.writeToTsv(new File(outputDir, "3_prime_clipping.tsv")) this._3_ClippingHistogram.writeHistogramToTsv(new File(outputDir, "3_prime_clipping.tsv"))
} }
def toSummaryMap = { def toSummaryMap = {
......
...@@ -32,6 +32,12 @@ class FlagstatCollector { ...@@ -32,6 +32,12 @@ class FlagstatCollector {
protected[FlagstatCollector] var totalCounts: Array[Long] = Array() protected[FlagstatCollector] var totalCounts: Array[Long] = Array()
protected[FlagstatCollector] var crossCounts = Array.ofDim[Long](1, 1) protected[FlagstatCollector] var crossCounts = Array.ofDim[Long](1, 1)
def writeAsTsv(file: File): Unit = {
val writer = new PrintWriter(file)
names.foreach(x => writer.println(x._2 + "\t" + totalCounts(x._1)))
writer.close()
}
def loadDefaultFunctions() { def loadDefaultFunctions() {
addFunction("All", record => true) addFunction("All", record => true)
addFunction("Mapped", record => !record.getReadUnmappedFlag) addFunction("Mapped", record => !record.getReadUnmappedFlag)
......
...@@ -34,6 +34,43 @@ class BamStatsTest extends TestNGSuite with Matchers { ...@@ -34,6 +34,43 @@ class BamStatsTest extends TestNGSuite with Matchers {
new File(outputDir, "bamstats.json") should exist new File(outputDir, "bamstats.json") should exist
new File(outputDir, "bamstats.summary.json") should exist new File(outputDir, "bamstats.summary.json") should exist
new File(outputDir, "flagstats.tsv") shouldNot exist
new File(outputDir, "insertsize.stats.tsv") shouldNot exist
new File(outputDir, "insertsize.histogram.tsv") shouldNot exist
new File(outputDir, "mappingQuality.stats.tsv") shouldNot exist
new File(outputDir, "mappingQuality.histogram.tsv") shouldNot exist
new File(outputDir, "clipping.stats.tsv") shouldNot exist
new File(outputDir, "clipping.histogram.tsv") shouldNot exist
new File(outputDir, "flagstats") shouldNot exist
new File(outputDir, "flagstats.summary.json") shouldNot exist
new File(outputDir, "mapping_quality.tsv") shouldNot exist
new File(outputDir, "insert_size.tsv") shouldNot exist
new File(outputDir, "clipping.tsv") shouldNot exist
new File(outputDir, "left_clipping.tsv") shouldNot exist
new File(outputDir, "right_clipping.tsv") shouldNot exist
new File(outputDir, "5_prime_clipping.tsv") shouldNot exist
new File(outputDir, "3_prime_clipping.tsv") shouldNot exist
}
@Test
def testTsvOutputs: Unit = {
val outputDir = Files.createTempDir()
outputDir.deleteOnExit()
BamStats.main(Array("-b", BamStatsTest.pairedBam01.getAbsolutePath, "-o", outputDir.getAbsolutePath, "--tsvOutputs"))
new File(outputDir, "bamstats.json") should exist
new File(outputDir, "bamstats.summary.json") should exist
new File(outputDir, "flagstats.tsv") should exist
new File(outputDir, "insertsize.stats.tsv") should exist
new File(outputDir, "insertsize.histogram.tsv") should exist
new File(outputDir, "mappingQuality.stats.tsv") should exist
new File(outputDir, "mappingQuality.histogram.tsv") should exist
new File(outputDir, "clipping.stats.tsv") should exist
new File(outputDir, "clipping.histogram.tsv") should exist
new File(outputDir, "flagstats") shouldNot exist new File(outputDir, "flagstats") shouldNot exist
new File(outputDir, "flagstats.summary.json") shouldNot exist new File(outputDir, "flagstats.summary.json") shouldNot exist
new File(outputDir, "mapping_quality.tsv") shouldNot exist new File(outputDir, "mapping_quality.tsv") shouldNot exist
......
...@@ -86,7 +86,7 @@ class CountsTest extends TestNGSuite with Matchers { ...@@ -86,7 +86,7 @@ class CountsTest extends TestNGSuite with Matchers {
val tsvFile = File.createTempFile("counts.", ".tsv") val tsvFile = File.createTempFile("counts.", ".tsv")
tsvFile.deleteOnExit() tsvFile.deleteOnExit()
c1.writeToTsv(tsvFile) c1.writeHistogramToTsv(tsvFile)
val reader = Source.fromFile(tsvFile) val reader = Source.fromFile(tsvFile)
reader.getLines().toList shouldBe List("value\tcount", "1\t1", "2\t2", "3\t3") reader.getLines().toList shouldBe List("value\tcount", "1\t1", "2\t2", "3\t3")
......
...@@ -64,7 +64,7 @@ class HistogramTest extends TestNGSuite with Matchers { ...@@ -64,7 +64,7 @@ class HistogramTest extends TestNGSuite with Matchers {
val tsvFile = File.createTempFile("counts.", ".tsv") val tsvFile = File.createTempFile("counts.", ".tsv")
tsvFile.deleteOnExit() tsvFile.deleteOnExit()
c1.writeToTsv(tsvFile) c1.writeHistogramToTsv(tsvFile)
val reader = Source.fromFile(tsvFile) val reader = Source.fromFile(tsvFile)
reader.getLines().toList shouldBe List("value\tcount", "1\t1", "2\t2", "3\t3") reader.getLines().toList shouldBe List("value\tcount", "1\t1", "2\t2", "3\t3")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment