Commit 6e959253 authored by Peter van 't Hof's avatar Peter van 't Hof

Making tsvOutputs optional

parent f4060277
......@@ -39,7 +39,8 @@ object BamStats extends ToolCommand {
bamFile: File = null,
referenceFasta: Option[File] = None,
binSize: Int = 10000,
threadBinSize: Int = 10000000) extends AbstractArgs
threadBinSize: Int = 10000000,
tsvOutputs: Boolean = false) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('R', "reference") valueName "<file>" action { (x, c) =>
......@@ -57,6 +58,9 @@ object BamStats extends ToolCommand {
opt[Int]("threadBinSize") valueName "<int>" action { (x, c) =>
c.copy(threadBinSize = x)
} text "Size of region per thread"
opt[Unit]("tsvOutputs") action { (x, c) =>
c.copy(tsvOutputs = true)
} text "Also output tsv files, default there is only a json"
}
/** This is the main entry to [[BamStats]], this will do the argument parsing. */
......@@ -68,7 +72,7 @@ object BamStats extends ToolCommand {
val sequenceDict = validateReferenceInBam(cmdArgs.bamFile, cmdArgs.referenceFasta)
init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize)
init(cmdArgs.outputDir, cmdArgs.bamFile, sequenceDict, cmdArgs.binSize, cmdArgs.threadBinSize, cmdArgs.tsvOutputs)
logger.info("Done")
}
......@@ -96,22 +100,25 @@ object BamStats extends ToolCommand {
* @param binSize stats binsize
* @param threadBinSize Thread binsize
*/
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int): Unit = {
def init(outputDir: File, bamFile: File, referenceDict: SAMSequenceDictionary, binSize: Int, threadBinSize: Int, tsvOutput: Boolean): Unit = {
val contigsFutures = BedRecordList.fromDict(referenceDict).allRecords.map { contig =>
contig.chr -> processContig(contig, bamFile, binSize, threadBinSize, outputDir)
}.toList
val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.map(_._2))
stats.flagstat.writeAsTsv(new File(outputDir, "flagstats.tsv"))
stats.insertSizeHistogram.writeFilesAndPlot(outputDir, "insertsize", "Insertsize", "Reads", "Insertsize distribution")
stats.mappingQualityHistogram.writeFilesAndPlot(outputDir, "mappingQuality", "Mapping Quality", "Reads", "Mapping Quality distribution")
stats.clippingHistogram.writeFilesAndPlot(outputDir, "clipping", "CLipped bases", "Reads", "Clipping distribution")
if (tsvOutput) {
stats.flagstat.writeAsTsv(new File(outputDir, "flagstats.tsv"))
stats.leftClippingHistogram.writeFilesAndPlot(outputDir, "left_clipping", "CLipped bases", "Reads", "Left Clipping distribution")
stats.rightClippingHistogram.writeFilesAndPlot(outputDir, "right_clipping", "CLipped bases", "Reads", "Right Clipping distribution")
stats._3_ClippingHistogram.writeFilesAndPlot(outputDir, "3prime_clipping", "CLipped bases", "Reads", "3 Prime Clipping distribution")
stats._5_ClippingHistogram.writeFilesAndPlot(outputDir, "5prime_clipping", "CLipped bases", "Reads", "5 Prime Clipping distribution")
stats.insertSizeHistogram.writeFilesAndPlot(outputDir, "insertsize", "Insertsize", "Reads", "Insertsize distribution")
stats.mappingQualityHistogram.writeFilesAndPlot(outputDir, "mappingQuality", "Mapping Quality", "Reads", "Mapping Quality distribution")
stats.clippingHistogram.writeFilesAndPlot(outputDir, "clipping", "CLipped bases", "Reads", "Clipping distribution")
stats.leftClippingHistogram.writeFilesAndPlot(outputDir, "left_clipping", "CLipped bases", "Reads", "Left Clipping distribution")
stats.rightClippingHistogram.writeFilesAndPlot(outputDir, "right_clipping", "CLipped bases", "Reads", "Right Clipping distribution")
stats._3_ClippingHistogram.writeFilesAndPlot(outputDir, "3prime_clipping", "CLipped bases", "Reads", "3 Prime Clipping distribution")
stats._5_ClippingHistogram.writeFilesAndPlot(outputDir, "5prime_clipping", "CLipped bases", "Reads", "5 Prime Clipping distribution")
}
val statsWriter = new PrintWriter(new File(outputDir, "bamstats.json"))
val totalStats = stats.toSummaryMap
......
......@@ -14,10 +14,10 @@
*/
package nl.lumc.sasc.biopet.tools.bamstats
import java.io.{ File, PrintWriter }
import java.io.{ File, IOException, PrintWriter }
import nl.lumc.sasc.biopet.utils.rscript.LinePlot
import nl.lumc.sasc.biopet.utils.sortAnyAny
import nl.lumc.sasc.biopet.utils.{ Logging, sortAnyAny }
import scala.collection.mutable
......@@ -100,7 +100,12 @@ class Histogram[T](_counts: Map[T, Long] = Map[T, Long]())(implicit ord: Numeric
plot.xlabel = Some(xlabel)
plot.ylabel = Some(ylabel)
plot.title = Some(title)
plot.runLocal()
try {
plot.runLocal()
} catch {
// If plotting fails the tools should not fail, this depens on R to be installed
case e: IOException => Logging.logger.warn(s"Error found while plotting ${plot.output}: ${e.getMessage}")
}
}
}
......@@ -35,6 +35,34 @@ class BamStatsTest extends TestNGSuite with Matchers {
new File(outputDir, "bamstats.json") should exist
new File(outputDir, "bamstats.summary.json") should exist
new File(outputDir, "flagstats.tsv") shouldNot exist
new File(outputDir, "insertsize.stats.tsv") shouldNot exist
new File(outputDir, "insertsize.histogram.tsv") shouldNot exist
new File(outputDir, "mappingQuality.stats.tsv") shouldNot exist
new File(outputDir, "mappingQuality.histogram.tsv") shouldNot exist
new File(outputDir, "clipping.stats.tsv") shouldNot exist
new File(outputDir, "clipping.histogram.tsv") shouldNot exist
new File(outputDir, "flagstats") shouldNot exist
new File(outputDir, "flagstats.summary.json") shouldNot exist
new File(outputDir, "mapping_quality.tsv") shouldNot exist
new File(outputDir, "insert_size.tsv") shouldNot exist
new File(outputDir, "clipping.tsv") shouldNot exist
new File(outputDir, "left_clipping.tsv") shouldNot exist
new File(outputDir, "right_clipping.tsv") shouldNot exist
new File(outputDir, "5_prime_clipping.tsv") shouldNot exist
new File(outputDir, "3_prime_clipping.tsv") shouldNot exist
}
@Test
def testTsvOutputs: Unit = {
val outputDir = Files.createTempDir()
outputDir.deleteOnExit()
BamStats.main(Array("-b", BamStatsTest.pairedBam01.getAbsolutePath, "-o", outputDir.getAbsolutePath, "--tsvOutputs"))
new File(outputDir, "bamstats.json") should exist
new File(outputDir, "bamstats.summary.json") should exist
new File(outputDir, "flagstats.tsv") should exist
new File(outputDir, "insertsize.stats.tsv") should exist
new File(outputDir, "insertsize.histogram.tsv") should exist
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment