From d1016462358de706e476f1189d800851ffe5fdd8 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Wed, 18 Feb 2015 17:04:56 +0100
Subject: [PATCH] Remove version from summary and rename data to stats

---
 .../core/BiopetCommandLineFunctionTrait.scala |  2 +-
 .../biopet/core/summary/Summarizable.scala    |  6 +-
 .../biopet/core/summary/WriteSummary.scala    | 70 +++++++++++++------
 .../sasc/biopet/extensions/Cutadapt.scala     |  7 +-
 .../lumc/sasc/biopet/extensions/Seqstat.scala |  2 +-
 .../lumc/sasc/biopet/extensions/Sickle.scala  |  4 +-
 .../CollectAlignmentSummaryMetrics.scala      |  2 +-
 .../picard/CollectInsertSizeMetrics.scala     |  2 +-
 .../biopet/extensions/seqtk/SeqtkSeq.scala    |  7 +-
 .../sasc/biopet/tools/BiopetFlagstat.scala    |  2 +-
 .../nl/lumc/sasc/biopet/tools/FastqSync.scala |  5 +-
 .../CollectAlignmentSummaryMetricsTest.scala  |  2 +-
 .../picard/CollectInsertSizeMetricsTest.scala |  2 +-
 .../biopet/pipelines/flexiprep/Fastqc.scala   |  2 +-
 14 files changed, 74 insertions(+), 41 deletions(-)

diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala
index 944dee43f..0dcf13eea 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala
@@ -207,6 +207,6 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
 object BiopetCommandLineFunctionTrait {
   import scala.collection.mutable.Map
   private val versionCache: Map[String, String] = Map()
-  private val executableMd5Cache: Map[String, String] = Map()
+  private[core] val executableMd5Cache: Map[String, String] = Map()
   private val executableCache: Map[String, String] = Map()
 }
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala
index d71eaa160..a1901ed79 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala
@@ -11,7 +11,7 @@ trait Summarizable extends Configurable {
 
   def summaryFiles: Map[String, File]
 
-  def summaryData: Map[String, Any]
+  def summaryStats: Map[String, Any]
 
   /**
    * This function is used to merge
@@ -20,5 +20,7 @@ trait Summarizable extends Configurable {
    * @param key
    * @return
    */
-  def resolveSummaryConflict(v1: Any, v2: Any, key: String) = v1
+  def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
+    throw new IllegalStateException("Merge can not have same key by default")
+  }
 }
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala
index 207acd79a..fac1055e3 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala
@@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.core.summary
 import java.io.{ FileInputStream, PrintWriter, File }
 import java.security.MessageDigest
 
+import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunctionTrait, SampleLibraryTag }
 import nl.lumc.sasc.biopet.core.config.Configurable
 import nl.lumc.sasc.biopet.utils.ConfigUtils
 import org.broadinstitute.gatk.queue.function.{ QFunction, InProcessFunction }
@@ -43,45 +44,72 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
   }
 
   def run(): Unit = {
-    val map = (for (
+
+    val pipelineMap = {
+      val files = parseFiles(qscript.summaryFiles)
+      val settings = qscript.summarySettings
+      val executables = {
+        for ((name, (file, version)) <- qscript.executables) yield {
+          name -> Map("version" -> version, "md5" -> BiopetCommandLineFunctionTrait.executableMd5Cache.getOrElse(file.getCanonicalPath, "N/A"))
+        }
+      }
+
+      val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++
+        (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map("pipeline" -> files))) ++
+        (if (executables.isEmpty) Map[String, Any]() else Map("executables" -> executables.toMap))))
+
+      qscript match {
+        case tag: SampleLibraryTag => prefixSampleLibrary(map, tag.sampleId, tag.libId)
+        case _                     => map
+      }
+    }
+
+    val jobsMap = (for (
       ((name, sampleId, libraryId), summarizables) <- qscript.summarizables;
       summarizable <- summarizables
     ) yield {
-      val map = Map(qscript.summaryName -> Map(name -> parseSummarizable(summarizable)))
+      val map = Map(qscript.summaryName -> parseSummarizable(summarizable, name))
 
-      (sampleId match {
-        case Some(sampleId) => Map("samples" -> Map(sampleId -> (libraryId match {
-          case Some(libraryId) => Map("libraries" -> Map(libraryId -> map))
-          case _               => map
-        })))
-        case _ => map
-      }, (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
-    }).foldRight(Map[String, Any]())((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
+      (prefixSampleLibrary(map, sampleId, libraryId),
+        (v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
+    }).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
 
     val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
       ConfigUtils.fileToConfigMap(qscript.summaryFile)
-    }).foldRight(map)((a, b) => ConfigUtils.mergeMaps(a, b))
+    }).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b))
 
     val writer = new PrintWriter(out)
     writer.println(ConfigUtils.mapToJson(combinedMap).spaces4)
     writer.close()
   }
 
-  def parseSummarizable(summarizable: Summarizable) = {
-    val data = summarizable.summaryData
+  def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
+    sampleId match {
+      case Some(sampleId) => Map("samples" -> Map(sampleId -> (libraryId match {
+        case Some(libraryId) => Map("libraries" -> Map(libraryId -> map))
+        case _               => map
+      })))
+      case _ => map
+    }
+  }
+
+  def parseSummarizable(summarizable: Summarizable, name: String) = {
+    val data = summarizable.summaryStats
     val files = parseFiles(summarizable.summaryFiles)
 
-    (if (data.isEmpty) Map[String, Any]() else Map("data" -> data)) ++
-      (if (files.isEmpty) Map[String, Any]() else Map("files" -> files))
+    (if (data.isEmpty) Map[String, Any]() else Map("stats" -> Map(name -> data))) ++
+      (if (files.isEmpty) Map[String, Any]() else Map("files" -> Map(name -> files)))
   }
 
   def parseFiles(files: Map[String, File]): Map[String, Map[String, Any]] = {
-    for ((key, file) <- files) yield {
-      val map: mutable.Map[String, Any] = mutable.Map()
-      map += "path" -> file.getAbsolutePath
-      if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file))
-      key -> map.toMap
-    }
+    for ((key, file) <- files) yield key -> parseFile(file)
+  }
+
+  def parseFile(file: File): Map[String, Any] = {
+    val map: mutable.Map[String, Any] = mutable.Map()
+    map += "path" -> file.getAbsolutePath
+    if (md5sum) map += "md5" -> parseChecksum(SummaryQScript.md5sumCache(file))
+    map.toMap
   }
 
   def parseChecksum(checksumFile: File): String = {
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
index ccf56efc8..fc90139f8 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala
@@ -47,7 +47,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
   if (config.contains("front")) for (adapter <- config("front").asList) opt_front += adapter.toString
 
   var opt_discard: Boolean = config("discard", default = false)
-  var opt_minimum_length: Option[Int] = config("minimum_length", 1)
+  var opt_minimum_length: Int = config("minimum_length", 1)
   var opt_maximum_length: Option[Int] = config("maximum_length")
 
   def cmdLine = required(executable) +
@@ -63,7 +63,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
     required("--output", fastq_output) +
     " > " + required(stats_output)
 
-  def summaryData: Map[String, Any] = {
+  def summaryStats: Map[String, Any] = {
     val trimR = """.*Trimmed reads: *(\d*) .*""".r
     val tooShortR = """.*Too short reads: *(\d*) .*""".r
     val tooLongR = """.*Too long reads: *(\d*) .*""".r
@@ -82,8 +82,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
       }
     }
 
-    Map("version" -> getVersion,
-      "num_reads_affected" -> stats("trimmed"),
+    Map("num_reads_affected" -> stats("trimmed"),
       "num_reads_discarded_too_short" -> stats("tooshort"),
       "num_reads_discarded_too_long" -> stats("toolong"),
       "adapters" -> adapter_stats.toMap
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala
index 556f8639f..bb014d224 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala
@@ -45,7 +45,7 @@ class Seqstat(val root: Configurable) extends BiopetCommandLineFunction with Sum
 
   def cmdLine = required(executable) + required(input) + " > " + required(output)
 
-  def summaryData: Map[String, Any] = {
+  def summaryStats: Map[String, Any] = {
     val map = ConfigUtils.fileToConfigMap(output)
 
     ConfigUtils.any2map(map.getOrElse("stats", Map()))
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala
index 8a546534d..fae8c5802 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Sickle.scala
@@ -81,7 +81,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ
       " > " + required(output_stats)
   }
 
-  def summaryData: Map[String, Any] = {
+  def summaryStats: Map[String, Any] = {
     val pairKept = """FastQ paired records kept: (\d*) \((\d*) pairs\)""".r
     val singleKept = """FastQ single records kept: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r
     val pairDiscarded = """FastQ paired records discarded: (\d*) \((\d*) pairs\)""".r
@@ -105,7 +105,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ
       }
     }
 
-    stats.toMap ++ Map("version" -> getVersion)
+    stats.toMap
   }
 
   override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala
index f69114521..53b109b8c 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala
@@ -63,7 +63,7 @@ class CollectAlignmentSummaryMetrics(val root: Configurable) extends Picard with
 
   def summaryFiles: Map[String, File] = Map()
 
-  def summaryData: Map[String, Any] = {
+  def summaryStats: Map[String, Any] = {
     val (header, content) = Picard.getMetrics(output)
 
     (for (category <- 0 until content.size) yield {
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala
index 9585491f9..d55653162 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala
@@ -73,7 +73,7 @@ class CollectInsertSizeMetrics(val root: Configurable) extends Picard with Summa
 
   def summaryFiles: Map[String, File] = Map("output_histogram" -> outputHistogram)
 
-  def summaryData: Map[String, Any] = {
+  def summaryStats: Map[String, Any] = {
     val (header, content) = Picard.getMetrics(output)
     (for (i <- 0 to header.size if i < content.head.size)
       yield (header(i).toLowerCase -> content.head(i))).toMap
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala
index 9838040cc..a3dc12182 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/seqtk/SeqtkSeq.scala
@@ -16,6 +16,7 @@
 package nl.lumc.sasc.biopet.extensions.seqtk
 
 import java.io.File
+import nl.lumc.sasc.biopet.core.summary.Summarizable
 import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
 import nl.lumc.sasc.biopet.core.config.Configurable
 
@@ -23,7 +24,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable
  * Wrapper for the seqtk seq subcommand.
  * Written based on seqtk version 1.0-r63-dirty.
  */
-class SeqtkSeq(val root: Configurable) extends Seqtk {
+class SeqtkSeq(val root: Configurable) extends Seqtk with Summarizable {
 
   /** input file */
   @Input(doc = "Input file (FASTQ or FASTA)")
@@ -81,6 +82,10 @@ class SeqtkSeq(val root: Configurable) extends Seqtk {
   /** shift quality by '(-Q) - 33' */
   var V: Boolean = config("V", default = false)
 
+  def summaryStats: Map[String, Any] = Map()
+
+  def summaryFiles: Map[String, File] = Map()
+
   def cmdLine = {
     required(executable) +
       " seq " +
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala
index e866195ae..48e1c36e1 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala
@@ -44,7 +44,7 @@ class BiopetFlagstat(val root: Configurable) extends BiopetJavaCommandLineFuncti
 
   def summaryFiles: Map[String, File] = Map()
 
-  def summaryData: Map[String, Any] = {
+  def summaryStats: Map[String, Any] = {
     ConfigUtils.fileToConfigMap(summaryFile)
   }
 }
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala
index 32c0c5b1e..2b44e6300 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala
@@ -64,7 +64,7 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction wi
 
   def summaryFiles: Map[String, File] = Map()
 
-  def summaryData: Map[String, Any] = {
+  def summaryStats: Map[String, Any] = {
     val regex = new Regex("""Filtered (\d*) reads from first read file.
                             |Filtered (\d*) reads from second read file.
                             |Synced read files contain (\d*) reads.""".stripMargin,
@@ -82,8 +82,7 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction wi
         }
       } else (0, 0, 0)
 
-    Map("version" -> BiopetExecutable.getVersion,
-      "num_reads_discarded_R1" -> countFilteredR1,
+    Map("num_reads_discarded_R1" -> countFilteredR1,
       "num_reads_discarded_R2" -> countFilteredR2,
       "num_reads_kept" -> countRLeft
     )
diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala
index 51139812f..f46a9b91f 100644
--- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala
+++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetricsTest.scala
@@ -18,6 +18,6 @@ class CollectAlignmentSummaryMetricsTest extends TestNGSuite with Matchers {
     val job = new CollectAlignmentSummaryMetrics(null)
     job.output = file
 
-    job.summaryData
+    job.summaryStats
   }
 }
diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala
index fc0c563e8..ffb5d528c 100644
--- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala
+++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetricsTest.scala
@@ -18,6 +18,6 @@ class CollectInsertSizeMetricsTest extends TestNGSuite with Matchers {
     val job = new CollectInsertSizeMetrics(null)
     job.output = file
 
-    job.summaryData
+    job.summaryStats
   }
 }
diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala
index cd9dd4ae0..8946db96c 100644
--- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala
+++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala
@@ -159,7 +159,7 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
     outputFiles ++ Map("fastq_file" -> this.fastqfile)
   }
 
-  def summaryData: Map[String, Any] = Map("version" -> getVersion)
+  def summaryStats: Map[String, Any] = Map()
 }
 
 object Fastqc {
-- 
GitLab