Commit 4b234972 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'patch-picard_summary' into 'develop'

Patch picard summary

changing some summaries for picard metrics

See merge request !163
parents 74c523c5 4ab3687a
......@@ -30,7 +30,7 @@ trait Summarizable {
def summaryFiles: Map[String, File]
/** Must returns stats to store into summary */
def summaryStats: Map[String, Any]
def summaryStats: Any
/** Can be used to add additional Summarizable, this is executed at the start of WriteSummary*/
def addToQscriptSummary(qscript: SummaryQScript, name: String) {}
......
......@@ -139,7 +139,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
val stats = summarizable.summaryStats
val files = parseFiles(summarizable.summaryFiles)
(if (stats.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> stats))) ++
(Map("stats" -> Map(name -> stats))) ++
(if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
}
......
......@@ -63,7 +63,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard with Summarizabl
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats: Any = Picard.getMetrics(output).getOrElse(Map())
}
object CalculateHsMetrics {
......
......@@ -66,7 +66,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
}
object CollectAlignmentSummaryMetrics {
......
......@@ -71,7 +71,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getHistogram(output, tag = "METRICS CLASS")
}
object CollectGcBiasMetrics {
......
......@@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa
/** Returns files for summary */
def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram)
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
}
object CollectInsertSizeMetrics {
......
......@@ -68,25 +68,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = {
program.foreach(p => {
val stats: Map[String, Any] = p match {
val stats: Any = p match {
case _ if p == Programs.CollectAlignmentSummaryMetrics.toString =>
Picard.getMetrics(new File(outputName + ".alignment_summary_metrics")).getOrElse(Map())
Picard.getMetrics(new File(outputName + ".alignment_summary_metrics"), groupBy = Some("CATEGORY"))
case _ if p == Programs.CollectInsertSizeMetrics.toString =>
Map(
"metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")).getOrElse(Map()),
"histogram" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics"), "HISTOGRAM").getOrElse(Map())
"metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")),
"histogram" -> Picard.getHistogram(new File(outputName + ".insert_size_metrics"))
)
case _ if p == Programs.QualityScoreDistribution.toString =>
Picard.getMetrics(new File(outputName + ".quality_distribution_metrics"), "HISTOGRAM").getOrElse(Map())
Picard.getHistogram(new File(outputName + ".quality_distribution_metrics"))
case _ if p == Programs.MeanQualityByCycle.toString =>
Picard.getMetrics(new File(outputName + ".quality_by_cycle_metrics"), "HISTOGRAM").getOrElse(Map())
Picard.getHistogram(new File(outputName + ".quality_by_cycle_metrics"))
case _ if p == Programs.CollectBaseDistributionByCycle.toString =>
Picard.getMetrics(new File(outputName + ".base_distribution_by_cycle_metrics")).getOrElse(Map())
case _ => Map()
Picard.getHistogram(new File(outputName + ".base_distribution_by_cycle_metrics"), tag = "METRICS CLASS")
case _ => None
}
val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map()
override def summaryStats: Map[String, Any] = stats
override def summaryStats = stats
}
qscript.addSummarizable(sum, p)
})
......
......@@ -84,7 +84,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
"output_chart" -> chartOutput
).collect { case (key, Some(value)) => key -> value }
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated = false) +
......
......@@ -51,7 +51,7 @@ class CollectTargetedPcrMetrics(val root: Configurable) extends Picard with Summ
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
}
object CollectTargetedPcrMetrics {
......
......@@ -51,5 +51,5 @@ class CollectWgsMetrics(val root: Configurable) extends Picard with Summarizable
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
}
......@@ -100,7 +100,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable {
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(outputMetrics).getOrElse(Map())
def summaryStats = Picard.getMetrics(outputMetrics).getOrElse(Map())
}
object MarkDuplicates {
/** Returns default MarkDuplicates */
......
......@@ -20,7 +20,7 @@ import scala.io.Source
import org.broadinstitute.gatk.utils.commandline.Argument
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Logging, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.utils.tryToParseNumber
/**
......@@ -74,14 +74,51 @@ abstract class Picard extends BiopetJavaCommandLineFunction {
conditional(createMd5, "CREATE_MD5_FILE=TRUE")
}
object Picard {
object Picard extends Logging {
def getMetrics(file: File, tag: String = "METRICS CLASS",
groupBy: Option[String] = None): Option[Any] = {
getMetricsContent(file, tag) match {
case Some((header, content)) => {
(content.size, groupBy) match {
case (_, Some(group)) => {
val groupId = header.indexOf(group)
if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file)
if (header.count(_ == group) > 1) logger.warn(group + " multiple times seen in header of: " + file)
Some((for (c <- content) yield c(groupId).toString() -> {
header.filter(_ != group).zip(c.take(groupId) ::: c.takeRight(c.size - groupId - 1)).toMap
}).toMap)
}
case (1, _) => Some(header.zip(content.head).toMap)
case _ => Some(header :: content)
}
}
case _ => None
}
}
/**
* This function parse the metrics but transpose for table
* @param file metrics file
* @param tag default to "HISTOGRAM"
* @return
*/
def getHistogram(file: File, tag: String = "HISTOGRAM") = {
getMetricsContent(file, tag) match {
case Some((header, content)) => {
val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap
Some(colums)
}
case _ => None
}
}
/**
* This function parse a metrics file in separated values
* @param file input metrics file
* @return (header, content)
*/
def getMetrics(file: File, tag: String = "METRICS CLASS"): Option[Map[String, Any]] =
def getMetricsContent(file: File, tag: String) = {
if (!file.exists) None
else {
val lines = Source.fromFile(file).getLines().toArray
......@@ -94,6 +131,7 @@ object Picard {
lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList
}).toList
Some(Map("content" -> (header :: content)))
Some(header, content)
}
}
}
\ No newline at end of file
......@@ -160,6 +160,7 @@ object ConfigUtils extends Logging {
any match {
case j: Json => j
case None => Json.jNull
case Some(x) => anyToJson(x)
case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2)))
case l: List[_] => Json.array(l.map(anyToJson(_)): _*)
case b: Boolean => Json.jBool(b)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment