Commit 4b234972 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'patch-picard_summary' into 'develop'

Patch picard summary

changing some summaries for picard metrics

See merge request !163
parents 74c523c5 4ab3687a
...@@ -30,7 +30,7 @@ trait Summarizable { ...@@ -30,7 +30,7 @@ trait Summarizable {
def summaryFiles: Map[String, File] def summaryFiles: Map[String, File]
/** Must returns stats to store into summary */ /** Must returns stats to store into summary */
def summaryStats: Map[String, Any] def summaryStats: Any
/** Can be used to add additional Summarizable, this is executed at the start of WriteSummary*/ /** Can be used to add additional Summarizable, this is executed at the start of WriteSummary*/
def addToQscriptSummary(qscript: SummaryQScript, name: String) {} def addToQscriptSummary(qscript: SummaryQScript, name: String) {}
......
...@@ -139,7 +139,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config ...@@ -139,7 +139,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
val stats = summarizable.summaryStats val stats = summarizable.summaryStats
val files = parseFiles(summarizable.summaryFiles) val files = parseFiles(summarizable.summaryFiles)
(if (stats.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> stats))) ++ (Map("stats" -> Map(name -> stats))) ++
(if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files))) (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
} }
......
...@@ -63,7 +63,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard with Summarizabl ...@@ -63,7 +63,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard with Summarizabl
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats: Any = Picard.getMetrics(output).getOrElse(Map())
} }
object CalculateHsMetrics { object CalculateHsMetrics {
......
...@@ -66,7 +66,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with ...@@ -66,7 +66,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
} }
object CollectAlignmentSummaryMetrics { object CollectAlignmentSummaryMetrics {
......
...@@ -71,7 +71,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza ...@@ -71,7 +71,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getHistogram(output, tag = "METRICS CLASS")
} }
object CollectGcBiasMetrics { object CollectGcBiasMetrics {
......
...@@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa ...@@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa
/** Returns files for summary */ /** Returns files for summary */
def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram) def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram)
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
} }
object CollectInsertSizeMetrics { object CollectInsertSizeMetrics {
......
...@@ -68,25 +68,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari ...@@ -68,25 +68,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = { override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = {
program.foreach(p => { program.foreach(p => {
val stats: Map[String, Any] = p match { val stats: Any = p match {
case _ if p == Programs.CollectAlignmentSummaryMetrics.toString => case _ if p == Programs.CollectAlignmentSummaryMetrics.toString =>
Picard.getMetrics(new File(outputName + ".alignment_summary_metrics")).getOrElse(Map()) Picard.getMetrics(new File(outputName + ".alignment_summary_metrics"), groupBy = Some("CATEGORY"))
case _ if p == Programs.CollectInsertSizeMetrics.toString => case _ if p == Programs.CollectInsertSizeMetrics.toString =>
Map( Map(
"metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")).getOrElse(Map()), "metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")),
"histogram" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics"), "HISTOGRAM").getOrElse(Map()) "histogram" -> Picard.getHistogram(new File(outputName + ".insert_size_metrics"))
) )
case _ if p == Programs.QualityScoreDistribution.toString => case _ if p == Programs.QualityScoreDistribution.toString =>
Picard.getMetrics(new File(outputName + ".quality_distribution_metrics"), "HISTOGRAM").getOrElse(Map()) Picard.getHistogram(new File(outputName + ".quality_distribution_metrics"))
case _ if p == Programs.MeanQualityByCycle.toString => case _ if p == Programs.MeanQualityByCycle.toString =>
Picard.getMetrics(new File(outputName + ".quality_by_cycle_metrics"), "HISTOGRAM").getOrElse(Map()) Picard.getHistogram(new File(outputName + ".quality_by_cycle_metrics"))
case _ if p == Programs.CollectBaseDistributionByCycle.toString => case _ if p == Programs.CollectBaseDistributionByCycle.toString =>
Picard.getMetrics(new File(outputName + ".base_distribution_by_cycle_metrics")).getOrElse(Map()) Picard.getHistogram(new File(outputName + ".base_distribution_by_cycle_metrics"), tag = "METRICS CLASS")
case _ => Map() case _ => None
} }
val sum = new Summarizable { val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map() override def summaryFiles: Map[String, File] = Map()
override def summaryStats: Map[String, Any] = stats override def summaryStats = stats
} }
qscript.addSummarizable(sum, p) qscript.addSummarizable(sum, p)
}) })
......
...@@ -84,7 +84,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza ...@@ -84,7 +84,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
"output_chart" -> chartOutput "output_chart" -> chartOutput
).collect { case (key, Some(value)) => key -> value } ).collect { case (key, Some(value)) => key -> value }
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
override def commandLine = super.commandLine + override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated = false) + required("INPUT=", input, spaceSeparated = false) +
......
...@@ -51,7 +51,7 @@ class CollectTargetedPcrMetrics(val root: Configurable) extends Picard with Summ ...@@ -51,7 +51,7 @@ class CollectTargetedPcrMetrics(val root: Configurable) extends Picard with Summ
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
} }
object CollectTargetedPcrMetrics { object CollectTargetedPcrMetrics {
......
...@@ -51,5 +51,5 @@ class CollectWgsMetrics(val root: Configurable) extends Picard with Summarizable ...@@ -51,5 +51,5 @@ class CollectWgsMetrics(val root: Configurable) extends Picard with Summarizable
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
} }
...@@ -100,7 +100,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable { ...@@ -100,7 +100,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable {
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(outputMetrics).getOrElse(Map()) def summaryStats = Picard.getMetrics(outputMetrics).getOrElse(Map())
} }
object MarkDuplicates { object MarkDuplicates {
/** Returns default MarkDuplicates */ /** Returns default MarkDuplicates */
......
...@@ -20,7 +20,7 @@ import scala.io.Source ...@@ -20,7 +20,7 @@ import scala.io.Source
import org.broadinstitute.gatk.utils.commandline.Argument import org.broadinstitute.gatk.utils.commandline.Argument
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.core.{ Logging, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.utils.tryToParseNumber import nl.lumc.sasc.biopet.utils.tryToParseNumber
/** /**
...@@ -74,14 +74,51 @@ abstract class Picard extends BiopetJavaCommandLineFunction { ...@@ -74,14 +74,51 @@ abstract class Picard extends BiopetJavaCommandLineFunction {
conditional(createMd5, "CREATE_MD5_FILE=TRUE") conditional(createMd5, "CREATE_MD5_FILE=TRUE")
} }
object Picard { object Picard extends Logging {
def getMetrics(file: File, tag: String = "METRICS CLASS",
groupBy: Option[String] = None): Option[Any] = {
getMetricsContent(file, tag) match {
case Some((header, content)) => {
(content.size, groupBy) match {
case (_, Some(group)) => {
val groupId = header.indexOf(group)
if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file)
if (header.count(_ == group) > 1) logger.warn(group + " multiple times seen in header of: " + file)
Some((for (c <- content) yield c(groupId).toString() -> {
header.filter(_ != group).zip(c.take(groupId) ::: c.takeRight(c.size - groupId - 1)).toMap
}).toMap)
}
case (1, _) => Some(header.zip(content.head).toMap)
case _ => Some(header :: content)
}
}
case _ => None
}
}
/**
* This function parse the metrics but transpose for table
* @param file metrics file
* @param tag default to "HISTOGRAM"
* @return
*/
def getHistogram(file: File, tag: String = "HISTOGRAM") = {
getMetricsContent(file, tag) match {
case Some((header, content)) => {
val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap
Some(colums)
}
case _ => None
}
}
/** /**
* This function parse a metrics file in separated values * This function parse a metrics file in separated values
* @param file input metrics file * @param file input metrics file
* @return (header, content) * @return (header, content)
*/ */
def getMetrics(file: File, tag: String = "METRICS CLASS"): Option[Map[String, Any]] = def getMetricsContent(file: File, tag: String) = {
if (!file.exists) None if (!file.exists) None
else { else {
val lines = Source.fromFile(file).getLines().toArray val lines = Source.fromFile(file).getLines().toArray
...@@ -94,6 +131,7 @@ object Picard { ...@@ -94,6 +131,7 @@ object Picard {
lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList
}).toList }).toList
Some(Map("content" -> (header :: content))) Some(header, content)
}
} }
} }
\ No newline at end of file
...@@ -160,6 +160,7 @@ object ConfigUtils extends Logging { ...@@ -160,6 +160,7 @@ object ConfigUtils extends Logging {
any match { any match {
case j: Json => j case j: Json => j
case None => Json.jNull case None => Json.jNull
case Some(x) => anyToJson(x)
case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2))) case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2)))
case l: List[_] => Json.array(l.map(anyToJson(_)): _*) case l: List[_] => Json.array(l.map(anyToJson(_)): _*)
case b: Boolean => Json.jBool(b) case b: Boolean => Json.jBool(b)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment