Skip to content
Snippets Groups Projects
Commit 4b234972 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'patch-picard_summary' into 'develop'

Patch picard summary

changing some summaries for picard metrics

See merge request !163
parents 74c523c5 4ab3687a
No related branches found
No related tags found
No related merge requests found
Showing
with 62 additions and 23 deletions
......@@ -30,7 +30,7 @@ trait Summarizable {
def summaryFiles: Map[String, File]
/** Must returns stats to store into summary */
def summaryStats: Map[String, Any]
def summaryStats: Any
/** Can be used to add additional Summarizable, this is executed at the start of WriteSummary*/
def addToQscriptSummary(qscript: SummaryQScript, name: String) {}
......
......@@ -139,7 +139,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
val stats = summarizable.summaryStats
val files = parseFiles(summarizable.summaryFiles)
(if (stats.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> stats))) ++
(Map("stats" -> Map(name -> stats))) ++
(if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
}
......
......@@ -63,7 +63,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard with Summarizabl
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats: Any = Picard.getMetrics(output).getOrElse(Map())
}
object CalculateHsMetrics {
......
......@@ -66,7 +66,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
}
object CollectAlignmentSummaryMetrics {
......
......@@ -71,7 +71,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getHistogram(output, tag = "METRICS CLASS")
}
object CollectGcBiasMetrics {
......
......@@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa
/** Returns files for summary */
def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram)
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
}
object CollectInsertSizeMetrics {
......
......@@ -68,25 +68,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = {
program.foreach(p => {
val stats: Map[String, Any] = p match {
val stats: Any = p match {
case _ if p == Programs.CollectAlignmentSummaryMetrics.toString =>
Picard.getMetrics(new File(outputName + ".alignment_summary_metrics")).getOrElse(Map())
Picard.getMetrics(new File(outputName + ".alignment_summary_metrics"), groupBy = Some("CATEGORY"))
case _ if p == Programs.CollectInsertSizeMetrics.toString =>
Map(
"metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")).getOrElse(Map()),
"histogram" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics"), "HISTOGRAM").getOrElse(Map())
"metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")),
"histogram" -> Picard.getHistogram(new File(outputName + ".insert_size_metrics"))
)
case _ if p == Programs.QualityScoreDistribution.toString =>
Picard.getMetrics(new File(outputName + ".quality_distribution_metrics"), "HISTOGRAM").getOrElse(Map())
Picard.getHistogram(new File(outputName + ".quality_distribution_metrics"))
case _ if p == Programs.MeanQualityByCycle.toString =>
Picard.getMetrics(new File(outputName + ".quality_by_cycle_metrics"), "HISTOGRAM").getOrElse(Map())
Picard.getHistogram(new File(outputName + ".quality_by_cycle_metrics"))
case _ if p == Programs.CollectBaseDistributionByCycle.toString =>
Picard.getMetrics(new File(outputName + ".base_distribution_by_cycle_metrics")).getOrElse(Map())
case _ => Map()
Picard.getHistogram(new File(outputName + ".base_distribution_by_cycle_metrics"), tag = "METRICS CLASS")
case _ => None
}
val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map()
override def summaryStats: Map[String, Any] = stats
override def summaryStats = stats
}
qscript.addSummarizable(sum, p)
})
......
......@@ -84,7 +84,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
"output_chart" -> chartOutput
).collect { case (key, Some(value)) => key -> value }
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated = false) +
......
......@@ -51,7 +51,7 @@ class CollectTargetedPcrMetrics(val root: Configurable) extends Picard with Summ
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
}
object CollectTargetedPcrMetrics {
......
......@@ -51,5 +51,5 @@ class CollectWgsMetrics(val root: Configurable) extends Picard with Summarizable
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map())
def summaryStats = Picard.getMetrics(output).getOrElse(Map())
}
......@@ -100,7 +100,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable {
def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(outputMetrics).getOrElse(Map())
def summaryStats = Picard.getMetrics(outputMetrics).getOrElse(Map())
}
object MarkDuplicates {
/** Returns default MarkDuplicates */
......
......@@ -20,7 +20,7 @@ import scala.io.Source
import org.broadinstitute.gatk.utils.commandline.Argument
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Logging, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.utils.tryToParseNumber
/**
......@@ -74,14 +74,51 @@ abstract class Picard extends BiopetJavaCommandLineFunction {
conditional(createMd5, "CREATE_MD5_FILE=TRUE")
}
object Picard {
object Picard extends Logging {
def getMetrics(file: File, tag: String = "METRICS CLASS",
groupBy: Option[String] = None): Option[Any] = {
getMetricsContent(file, tag) match {
case Some((header, content)) => {
(content.size, groupBy) match {
case (_, Some(group)) => {
val groupId = header.indexOf(group)
if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file)
if (header.count(_ == group) > 1) logger.warn(group + " multiple times seen in header of: " + file)
Some((for (c <- content) yield c(groupId).toString() -> {
header.filter(_ != group).zip(c.take(groupId) ::: c.takeRight(c.size - groupId - 1)).toMap
}).toMap)
}
case (1, _) => Some(header.zip(content.head).toMap)
case _ => Some(header :: content)
}
}
case _ => None
}
}
/**
* This function parse the metrics but transpose for table
* @param file metrics file
* @param tag default to "HISTOGRAM"
* @return
*/
def getHistogram(file: File, tag: String = "HISTOGRAM") = {
getMetricsContent(file, tag) match {
case Some((header, content)) => {
val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap
Some(colums)
}
case _ => None
}
}
/**
* This function parse a metrics file in separated values
* @param file input metrics file
* @return (header, content)
*/
def getMetrics(file: File, tag: String = "METRICS CLASS"): Option[Map[String, Any]] =
def getMetricsContent(file: File, tag: String) = {
if (!file.exists) None
else {
val lines = Source.fromFile(file).getLines().toArray
......@@ -94,6 +131,7 @@ object Picard {
lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList
}).toList
Some(Map("content" -> (header :: content)))
Some(header, content)
}
}
}
\ No newline at end of file
......@@ -160,6 +160,7 @@ object ConfigUtils extends Logging {
any match {
case j: Json => j
case None => Json.jNull
case Some(x) => anyToJson(x)
case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2)))
case l: List[_] => Json.array(l.map(anyToJson(_)): _*)
case b: Boolean => Json.jBool(b)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment