diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala index db27fd0896e12d7c6ab6197ef19ad40863a32ea5..c9a11228209d9ddd3b8fbdc2580dc5a71b7df258 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala @@ -30,7 +30,7 @@ trait Summarizable { def summaryFiles: Map[String, File] /** Must returns stats to store into summary */ - def summaryStats: Map[String, Any] + def summaryStats: Any /** Can be used to add additional Summarizable, this is executed at the start of WriteSummary*/ def addToQscriptSummary(qscript: SummaryQScript, name: String) {} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala index 2b034ce872652331af6918b2e09280619754809b..2b9d14276a2d1ef10052fdad64cfb1d45e6d4fc7 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -139,7 +139,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config val stats = summarizable.summaryStats val files = parseFiles(summarizable.summaryFiles) - (if (stats.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> stats))) ++ + (Map("stats" -> Map(name -> stats))) ++ (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files))) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala index 2fd93a22df05adae07a0d73baec622fb243a3479..0fa3742e0f7136aef201ab69cf3dfbcd19d3e047 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CalculateHsMetrics.scala @@ -63,7 +63,7 @@ class CalculateHsMetrics(val root: Configurable) extends Picard with Summarizabl def summaryFiles: Map[String, File] = Map() /** Returns stats for summary */ - def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) + def summaryStats: Any = Picard.getMetrics(output).getOrElse(Map()) } object CalculateHsMetrics { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala index b32e3a681aabeca6236f22717540453ebe4f8865..4807a3d74dd086b1de816aa18104fedef028da90 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala @@ -66,7 +66,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with def summaryFiles: Map[String, File] = Map() /** Returns stats for summary */ - def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) + def summaryStats = Picard.getMetrics(output).getOrElse(Map()) } object CollectAlignmentSummaryMetrics { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala index 27ab368510b27ea0d5688e6c600b077b6ade9897..e15de02c60afaa1fc7a556a2bd3b16e996b3106a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala @@ -71,7 +71,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza def summaryFiles: Map[String, File] = Map() /** Returns stats for summary */ - def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) + def summaryStats = Picard.getHistogram(output, tag = "METRICS CLASS") } object CollectGcBiasMetrics { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala index 81d6ebba40cdb4e1623aa5cb70b4a2b911d77369..f1cdd312ae326eac34ab6a25eaedf9897701eaab 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala @@ -75,7 +75,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa /** Returns files for summary */ def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram) - def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) + def summaryStats = Picard.getMetrics(output).getOrElse(Map()) } object CollectInsertSizeMetrics { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala index 8facf759bf434e366ee0bbf2f99e13a4560a4b03..920dd091c4e0db65a07ee801af04e397a2624d38 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectMultipleMetrics.scala @@ -68,25 +68,25 @@ class CollectMultipleMetrics(val root: Configurable) extends Picard with Summari override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = { program.foreach(p => { - val stats: Map[String, Any] = p match { + val stats: Any = p match { case _ if p == Programs.CollectAlignmentSummaryMetrics.toString => - Picard.getMetrics(new File(outputName + ".alignment_summary_metrics")).getOrElse(Map()) + Picard.getMetrics(new File(outputName + ".alignment_summary_metrics"), groupBy = Some("CATEGORY")) case _ if p == Programs.CollectInsertSizeMetrics.toString => Map( - "metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")).getOrElse(Map()), - "histogram" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics"), "HISTOGRAM").getOrElse(Map()) + "metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")), + "histogram" -> Picard.getHistogram(new File(outputName + ".insert_size_metrics")) ) case _ if p == Programs.QualityScoreDistribution.toString => - Picard.getMetrics(new File(outputName + ".quality_distribution_metrics"), "HISTOGRAM").getOrElse(Map()) + Picard.getHistogram(new File(outputName + ".quality_distribution_metrics")) case _ if p == Programs.MeanQualityByCycle.toString => - Picard.getMetrics(new File(outputName + ".quality_by_cycle_metrics"), "HISTOGRAM").getOrElse(Map()) + Picard.getHistogram(new File(outputName + ".quality_by_cycle_metrics")) case _ if p == Programs.CollectBaseDistributionByCycle.toString => - Picard.getMetrics(new File(outputName + ".base_distribution_by_cycle_metrics")).getOrElse(Map()) - case _ => Map() + Picard.getHistogram(new File(outputName + ".base_distribution_by_cycle_metrics"), tag = "METRICS CLASS") + case _ => None } val sum = new Summarizable { override def summaryFiles: Map[String, File] = Map() - override def summaryStats: Map[String, Any] = stats + override def summaryStats = stats } qscript.addSummarizable(sum, p) }) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala index 107dffd61be43a64f6dd13243e14385815701aee..f75a901d8b2ac51efb95fe63f576d2d6e2733add 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectRnaSeqMetrics.scala @@ -84,7 +84,7 @@ class CollectRnaSeqMetrics(val root: Configurable) extends Picard with Summariza "output_chart" -> chartOutput ).collect { case (key, Some(value)) => key -> value } - def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) + def summaryStats = Picard.getMetrics(output).getOrElse(Map()) override def commandLine = super.commandLine + required("INPUT=", input, spaceSeparated = false) + diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectTargetedPcrMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectTargetedPcrMetrics.scala index a89f751d44c3cd2f15a3f7a9be18a5a0c6ec8632..8f5c71a6d241349fae4a9306887eb5574e071aa0 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectTargetedPcrMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectTargetedPcrMetrics.scala @@ -51,7 +51,7 @@ class CollectTargetedPcrMetrics(val root: Configurable) extends Picard with Summ def summaryFiles: Map[String, File] = Map() /** Returns stats for summary */ - def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) + def summaryStats = Picard.getMetrics(output).getOrElse(Map()) } object CollectTargetedPcrMetrics { diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectWgsMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectWgsMetrics.scala index 6c87bdd2017ebca1bcc95bc5068c0630a614a3a8..f3f0d1b866d0712505fe041be28fdb3827675a58 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectWgsMetrics.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectWgsMetrics.scala @@ -51,5 +51,5 @@ class CollectWgsMetrics(val root: Configurable) extends Picard with Summarizable def summaryFiles: Map[String, File] = Map() /** Returns stats for summary */ - def summaryStats: Map[String, Any] = Picard.getMetrics(output).getOrElse(Map()) + def summaryStats = Picard.getMetrics(output).getOrElse(Map()) } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala index 9e09e5acaeb15ed68566569d263c690487aa9aae..8c21c48e42dba4ee953c471c05dfbc87816c197a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala @@ -100,7 +100,7 @@ class MarkDuplicates(val root: Configurable) extends Picard with Summarizable { def summaryFiles: Map[String, File] = Map() /** Returns stats for summary */ - def summaryStats: Map[String, Any] = Picard.getMetrics(outputMetrics).getOrElse(Map()) + def summaryStats = Picard.getMetrics(outputMetrics).getOrElse(Map()) } object MarkDuplicates { /** Returns default MarkDuplicates */ diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala index f78243baea58f5a74831f924fe9499facf237bb3..77d2ec0a477c04143def73aa34fb2346f713b961 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala @@ -20,7 +20,7 @@ import scala.io.Source import org.broadinstitute.gatk.utils.commandline.Argument -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.{ Logging, BiopetJavaCommandLineFunction } import nl.lumc.sasc.biopet.utils.tryToParseNumber /** @@ -74,14 +74,51 @@ abstract class Picard extends BiopetJavaCommandLineFunction { conditional(createMd5, "CREATE_MD5_FILE=TRUE") } -object Picard { +object Picard extends Logging { + + def getMetrics(file: File, tag: String = "METRICS CLASS", + groupBy: Option[String] = None): Option[Any] = { + getMetricsContent(file, tag) match { + case Some((header, content)) => { + (content.size, groupBy) match { + case (_, Some(group)) => { + val groupId = header.indexOf(group) + if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file) + if (header.count(_ == group) > 1) logger.warn(group + " multiple times seen in header of: " + file) + Some((for (c <- content) yield c(groupId).toString() -> { + header.filter(_ != group).zip(c.take(groupId) ::: c.takeRight(c.size - groupId - 1)).toMap + }).toMap) + } + case (1, _) => Some(header.zip(content.head).toMap) + case _ => Some(header :: content) + } + } + case _ => None + } + } + + /** + * This function parse the metrics but transpose for table + * @param file metrics file + * @param tag default to "HISTOGRAM" + * @return + */ + def getHistogram(file: File, tag: String = "HISTOGRAM") = { + getMetricsContent(file, tag) match { + case Some((header, content)) => { + val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap + Some(colums) + } + case _ => None + } + } /** * This function parse a metrics file in separated values * @param file input metrics file * @return (header, content) */ - def getMetrics(file: File, tag: String = "METRICS CLASS"): Option[Map[String, Any]] = + def getMetricsContent(file: File, tag: String) = { if (!file.exists) None else { val lines = Source.fromFile(file).getLines().toArray @@ -94,6 +131,7 @@ object Picard { lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList }).toList - Some(Map("content" -> (header :: content))) + Some(header, content) } + } } \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala index b82f3da406ec4c2cd12ad6403ed574e6919e63a3..24018f6d3dec182b3b3b304e724dff6bf2095f85 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/utils/ConfigUtils.scala @@ -160,6 +160,7 @@ object ConfigUtils extends Logging { any match { case j: Json => j case None => Json.jNull + case Some(x) => anyToJson(x) case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2))) case l: List[_] => Json.array(l.map(anyToJson(_)): _*) case b: Boolean => Json.jBool(b)