From 0eed5996af962b39b0e2ae3cf519a52b4c90d011 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Tue, 17 Feb 2015 11:32:27 +0100
Subject: [PATCH] Added new summary to cutadept

---
 .../biopet/pipelines/flexiprep/Cutadapt.scala | 48 +++++++++++++++++--
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala
index 9aaca5f66..15996cbe7 100644
--- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala
+++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala
@@ -15,6 +15,8 @@
  */
 package nl.lumc.sasc.biopet.pipelines.flexiprep
 
+import nl.lumc.sasc.biopet.core.summary.Summarizable
+
 import scala.io.Source
 
 import nl.lumc.sasc.biopet.extensions.Ln
@@ -25,9 +27,9 @@ import scalaz._, Scalaz._
 
 import java.io.File
 import nl.lumc.sasc.biopet.core.config.Configurable
-import scala.collection.mutable.Map
+import scala.collection.mutable
 
-class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) {
+class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) with Summarizable {
   var fastqc: Fastqc = _
 
   override def beforeCmd() {
@@ -49,14 +51,50 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada
     }
   }
 
+  def summaryData: Map[String, Any] = {
+    val trimR = """.*Trimmed reads: *(\d*) .*""".r
+    val tooShortR = """.*Too short reads: *(\d*) .*""".r
+    val tooLongR = """.*Too long reads: *(\d*) .*""".r
+    val adapterR = """Adapter '([C|T|A|G]*)'.*trimmed (\d*) times.""".r
+
+    val stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0)
+    val adapter_stats: mutable.Map[String, Int] = mutable.Map()
+
+    if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines) {
+      line match {
+        case trimR(m)                 => stats += ("trimmed" -> m.toInt)
+        case tooShortR(m)             => stats += ("tooshort" -> m.toInt)
+        case tooLongR(m)              => stats += ("toolong" -> m.toInt)
+        case adapterR(adapter, count) => adapter_stats += (adapter -> count.toInt)
+        case _                        =>
+      }
+    }
+
+    Map("version" -> getVersion,
+      "num_reads_affected" -> stats("trimmed"),
+      "num_reads_discarded_too_short" -> stats("tooshort"),
+      "num_reads_discarded_too_long" -> stats("toolong"),
+      "adapters" -> adapter_stats
+    )
+  }
+
+  override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
+    (v1, v2) match {
+      case (v1: Int, v2: Int) => v1 + v2
+      case _                  => v1
+    }
+  }
+
+  def summaryFiles: Map[String, File] = Map("input" -> fastq_input, "output" -> fastq_output)
+
   def getSummary: Json = {
     val trimR = """.*Trimmed reads: *(\d*) .*""".r
     val tooShortR = """.*Too short reads: *(\d*) .*""".r
     val tooLongR = """.*Too long reads: *(\d*) .*""".r
     val adapterR = """Adapter '([C|T|A|G]*)'.*trimmed (\d*) times.""".r
 
-    var stats: Map[String, Int] = Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0)
-    var adapter_stats: Map[String, Int] = Map()
+    var stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0)
+    var adapter_stats: mutable.Map[String, Int] = mutable.Map()
 
     if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines) {
       line match {
@@ -88,7 +126,7 @@ object Cutadapt {
     var affected = 0
     var tooShort = 0
     var tooLong = 0
-    var adapter_stats: Map[String, Int] = Map()
+    var adapter_stats: mutable.Map[String, Int] = mutable.Map()
 
     for (json <- jsons) {
       affected += json.field("num_reads_affected").get.numberOrZero.toInt
-- 
GitLab