Commit c12d0609 authored by Peter van 't Hof's avatar Peter van 't Hof

Move summary to tool

parent 07304292
......@@ -208,8 +208,8 @@ object BammetricsReport extends ReportBuilder {
val pngFile = new File(outputDir, prefix + ".png")
def paths(name: String) = Map(
"mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "value"),
name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "count")
"mapping_quality" -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "value"),
name -> List("bammetrics", "stats", "bamstats", "mapping_quality", "histogram", "count")
)
val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
......@@ -236,8 +236,8 @@ object BammetricsReport extends ReportBuilder {
val pngFile = new File(outputDir, prefix + ".png")
def paths(name: String) = Map(
"clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "value"),
name -> List("bammetrics", "stats", "bamstats", "clipping", "count")
"clipping" -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "value"),
name -> List("bammetrics", "stats", "bamstats", "clipping", "histogram", "count")
)
val tables = getSampleLibraries(summary, sampleId, libId, libraryLevel)
......
......@@ -4,6 +4,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.core.{Reference, ToolCommandFunction}
import nl.lumc.sasc.biopet.tools.bamstats.BamStats
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
......@@ -40,6 +41,7 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
}
}
def bamstatsSummary: File = new File(outputDir, "bamstats.summary.json")
def flagstatSummaryFile(contig: Option[String] = None): File = getOutputFile("flagstats.summary.json", contig)
def mappingQualityFile(contig: Option[String] = None): File = getOutputFile("mapping_quality.tsv", contig)
def clipingFile(contig: Option[String] = None): File = getOutputFile("clipping.tsv", contig)
......@@ -60,30 +62,5 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
def summaryFiles: Map[String, File] = Map()
def summaryStats: Map[String, Any] = Map(
"flagstats" -> ConfigUtils.fileToConfigMap(flagstatSummaryFile()),
"flagstats_per_contig" -> referenceDict.getSequences.map {
c => c.getSequenceName -> ConfigUtils.fileToConfigMap(flagstatSummaryFile(Some(c.getSequenceName)))
}.toMap,
"mapping_quality" -> BamStats.tsvToMap(mappingQualityFile()),
"clipping" -> BamStats.tsvToMap(clipingFile())
)
def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(bamstatsSummary)
}
object BamStats {
def tsvToMap(tsvFile: File): Map[String, Array[Int]] = {
val reader = Source.fromFile(tsvFile)
val it = reader.getLines()
val header = it.next().split("\t")
val arrays = header.zipWithIndex.map(x => x._2 -> (x._1 -> ArrayBuffer[Int]()))
for (line <- it) {
val values = line.split("\t")
require(values.size == header.size, s"Line does not have the number of field as header: $line")
for (array <- arrays) {
array._2._2.append(values(array._1).toInt)
}
}
reader.close()
arrays.map(x => x._2._1 -> x._2._2.toArray).toMap
}
}
\ No newline at end of file
......@@ -14,19 +14,19 @@
*/
package nl.lumc.sasc.biopet.tools.bamstats
import java.io.File
import java.util.concurrent.TimeoutException
import java.io.{File, PrintWriter}
import htsjdk.samtools.reference.FastaSequenceFile
import htsjdk.samtools.{ SAMSequenceDictionary, SamReaderFactory }
import htsjdk.samtools.{SAMSequenceDictionary, SamReaderFactory}
import nl.lumc.sasc.biopet.utils.BamUtils.SamDictCheck
import nl.lumc.sasc.biopet.utils.{ FastaUtils, ToolCommand }
import nl.lumc.sasc.biopet.utils.intervals.{ BedRecord, BedRecordList }
import nl.lumc.sasc.biopet.utils.{ConfigUtils, FastaUtils, ToolCommand}
import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList}
import scala.collection.JavaConversions._
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.concurrent.{ Await, Future }
import scala.concurrent.{Await, Future}
import scala.io.Source
import scala.language.postfixOps
/**
......@@ -104,6 +104,20 @@ object BamStats extends ToolCommand {
val stats = waitOnFutures(processUnmappedReads(bamFile) :: contigsFutures.toList)
stats.writeStatsToFiles(outputDir)
val summary = Map(
"flagstats" -> ConfigUtils.fileToConfigMap(new File(outputDir, "flagstats.summary.json")),
"flagstats_per_contig" -> referenceDict.getSequences.map {
c => c.getSequenceName -> ConfigUtils.fileToConfigMap(
new File(outputDir, "contigs" + File.separator + c.getSequenceName + File.separator + "flagstats.summary.json"))
}.toMap,
"mapping_quality" -> Map("histogram" ->tsvToMap(new File(outputDir, "mapping_quality.tsv"))),
"clipping" -> Map("histogram" -> tsvToMap(new File(outputDir, "clipping.tsv")))
)
val summaryWriter = new PrintWriter(new File(outputDir, "bamstats.summary.json"))
summaryWriter.println(ConfigUtils.mapToJson(summary).spaces2)
summaryWriter.close()
}
/**
......@@ -209,4 +223,21 @@ object BamStats extends ToolCommand {
samReader.close()
stats
}
def tsvToMap(tsvFile: File): Map[String, Array[Int]] = {
val reader = Source.fromFile(tsvFile)
val it = reader.getLines()
val header = it.next().split("\t")
val arrays = header.zipWithIndex.map(x => x._2 -> (x._1 -> ArrayBuffer[Int]()))
for (line <- it) {
val values = line.split("\t")
require(values.size == header.size, s"Line does not have the number of field as header: $line")
for (array <- arrays) {
array._2._2.append(values(array._1).toInt)
}
}
reader.close()
arrays.map(x => x._2._1 -> x._2._2.toArray).toMap
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment