Skip to content
Snippets Groups Projects
Commit 4b234972 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'patch-picard_summary' into 'develop'

Patch picard summary

changing some summaries for picard metrics

See merge request !163
parents 74c523c5 4ab3687a
No related branches found
No related tags found
No related merge requests found
Showing
with 62 additions and 23 deletions
...@@ -30,7 +30,7 @@ trait Summarizable { ...@@ -30,7 +30,7 @@ trait Summarizable {
def summaryFiles: Map[String, File] def summaryFiles: Map[String, File]
/** Must returns stats to store into summary */ /** Must returns stats to store into summary */
def summaryStats: Map[String, Any] def summaryStats: Any
/** Can be used to add additional Summarizable, this is executed at the start of WriteSummary*/ /** Can be used to add additional Summarizable, this is executed at the start of WriteSummary*/
def addToQscriptSummary(qscript: SummaryQScript, name: String) {} def addToQscriptSummary(qscript: SummaryQScript, name: String) {}
......
...@@ -139,7 +139,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config ...@@ -139,7 +139,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
val stats = summarizable.summaryStats val stats = summarizable.summaryStats
val files = parseFiles(summarizable.summaryFiles) val files = parseFiles(summarizable.summaryFiles)
(if (stats.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> stats))) ++ (Map("stats" -> Map(name -> stats))) ++
(if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files))) (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
} }
......
...@@ -63,7 +63,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard with Summarizabl ...@@ -63,7 +63,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard with Summarizabl
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats: Any = Picard.getMetrics(output).getOrElse(Map())
} }
object CalculateHsMetrics { object CalculateHsMetrics {
......
...@@ -66,7 +66,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with ...@@ -66,7 +66,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
} }
object CollectAlignmentSummaryMetrics { object CollectAlignmentSummaryMetrics {
......
...@@ -71,7 +71,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza ...@@ -71,7 +71,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getHistogram(output, tag = "METRICS CLASS")
} }
object CollectGcBiasMetrics { object CollectGcBiasMetrics {
......
...@@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa ...@@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa
/** Returns files for summary */ /** Returns files for summary */
def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram) def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram)
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
} }
object CollectInsertSizeMetrics { object CollectInsertSizeMetrics {
......
...@@ -68,25 +68,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari ...@@ -68,25 +68,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari
override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = { override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = {
program.foreach(p => { program.foreach(p => {
val stats: Map[String, Any] = p match { val stats: Any = p match {
case _ if p == Programs.CollectAlignmentSummaryMetrics.toString => case _ if p == Programs.CollectAlignmentSummaryMetrics.toString =>
Picard.getMetrics(new File(outputName + ".alignment_summary_metrics")).getOrElse(Map()) Picard.getMetrics(new File(outputName + ".alignment_summary_metrics"), groupBy = Some("CATEGORY"))
case _ if p == Programs.CollectInsertSizeMetrics.toString => case _ if p == Programs.CollectInsertSizeMetrics.toString =>
Map( Map(
"metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")).getOrElse(Map()), "metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")),
"histogram" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics"), "HISTOGRAM").getOrElse(Map()) "histogram" -> Picard.getHistogram(new File(outputName + ".insert_size_metrics"))
) )
case _ if p == Programs.QualityScoreDistribution.toString => case _ if p == Programs.QualityScoreDistribution.toString =>
Picard.getMetrics(new File(outputName + ".quality_distribution_metrics"), "HISTOGRAM").getOrElse(Map()) Picard.getHistogram(new File(outputName + ".quality_distribution_metrics"))
case _ if p == Programs.MeanQualityByCycle.toString => case _ if p == Programs.MeanQualityByCycle.toString =>
Picard.getMetrics(new File(outputName + ".quality_by_cycle_metrics"), "HISTOGRAM").getOrElse(Map()) Picard.getHistogram(new File(outputName + ".quality_by_cycle_metrics"))
case _ if p == Programs.CollectBaseDistributionByCycle.toString => case _ if p == Programs.CollectBaseDistributionByCycle.toString =>
Picard.getMetrics(new File(outputName + ".base_distribution_by_cycle_metrics")).getOrElse(Map()) Picard.getHistogram(new File(outputName + ".base_distribution_by_cycle_metrics"), tag = "METRICS CLASS")
case _ => Map() case _ => None
} }
val sum = new Summarizable { val sum = new Summarizable {
override def summaryFiles: Map[String, File] = Map() override def summaryFiles: Map[String, File] = Map()
override def summaryStats: Map[String, Any] = stats override def summaryStats = stats
} }
qscript.addSummarizable(sum, p) qscript.addSummarizable(sum, p)
}) })
......
...@@ -84,7 +84,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza ...@@ -84,7 +84,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza
"output_chart" -> chartOutput "output_chart" -> chartOutput
).collect { case (key, Some(value)) => key -> value } ).collect { case (key, Some(value)) => key -> value }
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
override def commandLine = super.commandLine + override def commandLine = super.commandLine +
required("INPUT=", input, spaceSeparated = false) + required("INPUT=", input, spaceSeparated = false) +
......
...@@ -51,7 +51,7 @@ class CollectTargetedPcrMetrics(val root: Configurable) extends Picard with Summ ...@@ -51,7 +51,7 @@ class CollectTargetedPcrMetrics(val root: Configurable) extends Picard with Summ
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
} }
object CollectTargetedPcrMetrics { object CollectTargetedPcrMetrics {
......
...@@ -51,5 +51,5 @@ class CollectWgsMetrics(val root: Configurable) extends Picard with Summarizable ...@@ -51,5 +51,5 @@ class CollectWgsMetrics(val root: Configurable) extends Picard with Summarizable
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) def summaryStats = Picard.getMetrics(output).getOrElse(Map())
} }
...@@ -100,7 +100,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable { ...@@ -100,7 +100,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable {
def summaryFiles: Map[String, File] = Map() def summaryFiles: Map[String, File] = Map()
/** Returns stats for summary */ /** Returns stats for summary */
def summaryStats: Map[String, Any] = Picard.getMetrics(outputMetrics).getOrElse(Map()) def summaryStats = Picard.getMetrics(outputMetrics).getOrElse(Map())
} }
object MarkDuplicates { object MarkDuplicates {
/** Returns default MarkDuplicates */ /** Returns default MarkDuplicates */
......
...@@ -20,7 +20,7 @@ import scala.io.Source ...@@ -20,7 +20,7 @@ import scala.io.Source
import org.broadinstitute.gatk.utils.commandline.Argument import org.broadinstitute.gatk.utils.commandline.Argument
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.core.{ Logging, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.utils.tryToParseNumber import nl.lumc.sasc.biopet.utils.tryToParseNumber
/** /**
...@@ -74,14 +74,51 @@ abstract class Picard extends BiopetJavaCommandLineFunction { ...@@ -74,14 +74,51 @@ abstract class Picard extends BiopetJavaCommandLineFunction {
conditional(createMd5, "CREATE_MD5_FILE=TRUE") conditional(createMd5, "CREATE_MD5_FILE=TRUE")
} }
object Picard { object Picard extends Logging {
def getMetrics(file: File, tag: String = "METRICS CLASS",
groupBy: Option[String] = None): Option[Any] = {
getMetricsContent(file, tag) match {
case Some((header, content)) => {
(content.size, groupBy) match {
case (_, Some(group)) => {
val groupId = header.indexOf(group)
if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file)
if (header.count(_ == group) > 1) logger.warn(group + " multiple times seen in header of: " + file)
Some((for (c <- content) yield c(groupId).toString() -> {
header.filter(_ != group).zip(c.take(groupId) ::: c.takeRight(c.size - groupId - 1)).toMap
}).toMap)
}
case (1, _) => Some(header.zip(content.head).toMap)
case _ => Some(header :: content)
}
}
case _ => None
}
}
/**
* This function parse the metrics but transpose for table
* @param file metrics file
* @param tag default to "HISTOGRAM"
* @return
*/
def getHistogram(file: File, tag: String = "HISTOGRAM") = {
getMetricsContent(file, tag) match {
case Some((header, content)) => {
val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap
Some(colums)
}
case _ => None
}
}
/** /**
* This function parse a metrics file in separated values * This function parse a metrics file in separated values
* @param file input metrics file * @param file input metrics file
* @return (header, content) * @return (header, content)
*/ */
def getMetrics(file: File, tag: String = "METRICS CLASS"): Option[Map[String, Any]] = def getMetricsContent(file: File, tag: String) = {
if (!file.exists) None if (!file.exists) None
else { else {
val lines = Source.fromFile(file).getLines().toArray val lines = Source.fromFile(file).getLines().toArray
...@@ -94,6 +131,7 @@ object Picard { ...@@ -94,6 +131,7 @@ object Picard {
lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList
}).toList }).toList
Some(Map("content" -> (header :: content))) Some(header, content)
} }
}
} }
\ No newline at end of file
...@@ -160,6 +160,7 @@ object ConfigUtils extends Logging { ...@@ -160,6 +160,7 @@ object ConfigUtils extends Logging {
any match { any match {
case j: Json => j case j: Json => j
case None => Json.jNull case None => Json.jNull
case Some(x) => anyToJson(x)
case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2))) case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2)))
case l: List[_] => Json.array(l.map(anyToJson(_)): _*) case l: List[_] => Json.array(l.map(anyToJson(_)): _*)
case b: Boolean => Json.jBool(b) case b: Boolean => Json.jBool(b)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment