Commit 2e049a98 authored by Peter van 't Hof's avatar Peter van 't Hof

Adding mapping quality and clipping stats to summary

parent b88623c3
......@@ -9,6 +9,7 @@ import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
import scala.collection.JavaConversions._
import scala.io.Source
/**
* Created by pjvanthof on 18/11/2016.
......@@ -31,13 +32,17 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
override def defaultCoreMemory = 5.0
override def dictRequired = true
def flagstatSummaryFile(implicit contig: Option[String] = None): File = {
def getOutputFile(name: String, contig: Option[String] = None): File = {
contig match {
case Some(contig) => new File(outputDir, "contigs" + File.separator + contig + File.separator + "flagstats.summary.json")
case _ => new File(outputDir, "flagstats.summary.json")
case Some(contig) => new File(outputDir, "contigs" + File.separator + contig + File.separator + name)
case _ => new File(outputDir, name)
}
}
def flagstatSummaryFile(contig: Option[String] = None): File = getOutputFile("flagstats.summary.json", contig)
def mappingQualityFile(contig: Option[String] = None): File = getOutputFile("mapping_quality.tsv", contig)
def clipingFile(contig: Option[String] = None): File = getOutputFile("clipping.tsv", contig)
override def beforeGraph() {
super.beforeGraph()
jobOutputFile = new File(outputDir, ".bamstats.out")
......@@ -56,9 +61,29 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
def summaryFiles: Map[String, File] = Map()
def summaryStats: Map[String, Any] = Map(
"flagstats" -> ConfigUtils.fileToConfigMap(flagstatSummaryFile),
"flagstats" -> ConfigUtils.fileToConfigMap(flagstatSummaryFile()),
"flagstats_per_contig" -> referenceDict.getSequences.map {
c => c.getSequenceName -> ConfigUtils.fileToConfigMap(flagstatSummaryFile(Some(c.getSequenceName)))
}.toMap
}.toMap,
"mapping_quality" -> BamStats.tsvToMap(mappingQualityFile()),
"clipping" -> BamStats.tsvToMap(clipingFile())
)
}
object BamStats {
def tsvToMap(tsvFile: File): Map[String, Array[Int]] = {
val reader = Source.fromFile(tsvFile)
val it = reader.getLines()
val header = it.next().split("\t")
val arrays = header.zipWithIndex.map(x => x._2 -> (x._1 -> Array[Int]()))
for (line <- it) {
val values = line.split("\t")
require(values.size == header.size, s"Line does not have the number of field as header: $line")
for (array <- arrays) {
array._2._2 :+ values(array._1)
}
}
reader.close()
arrays.map(_._2).toMap
}
}
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment