From 0eed5996af962b39b0e2ae3cf519a52b4c90d011 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 17 Feb 2015 11:32:27 +0100 Subject: [PATCH] Added new summary to cutadept --- .../biopet/pipelines/flexiprep/Cutadapt.scala | 48 +++++++++++++++++-- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala index 9aaca5f66..15996cbe7 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -15,6 +15,8 @@ */ package nl.lumc.sasc.biopet.pipelines.flexiprep +import nl.lumc.sasc.biopet.core.summary.Summarizable + import scala.io.Source import nl.lumc.sasc.biopet.extensions.Ln @@ -25,9 +27,9 @@ import scalaz._, Scalaz._ import java.io.File import nl.lumc.sasc.biopet.core.config.Configurable -import scala.collection.mutable.Map +import scala.collection.mutable -class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) { +class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) with Summarizable { var fastqc: Fastqc = _ override def beforeCmd() { @@ -49,14 +51,50 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada } } + def summaryData: Map[String, Any] = { + val trimR = """.*Trimmed reads: *(\d*) .*""".r + val tooShortR = """.*Too short reads: *(\d*) .*""".r + val tooLongR = """.*Too long reads: *(\d*) .*""".r + val adapterR = """Adapter '([C|T|A|G]*)'.*trimmed (\d*) times.""".r + + val stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0) + val adapter_stats: mutable.Map[String, Int] = mutable.Map() + + if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines) { + line match { + case trimR(m) => stats += ("trimmed" -> m.toInt) + case tooShortR(m) => stats += ("tooshort" -> m.toInt) + case tooLongR(m) => stats += ("toolong" -> m.toInt) + case adapterR(adapter, count) => adapter_stats += (adapter -> count.toInt) + case _ => + } + } + + Map("version" -> getVersion, + "num_reads_affected" -> stats("trimmed"), + "num_reads_discarded_too_short" -> stats("tooshort"), + "num_reads_discarded_too_long" -> stats("toolong"), + "adapters" -> adapter_stats + ) + } + + override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = { + (v1, v2) match { + case (v1: Int, v2: Int) => v1 + v2 + case _ => v1 + } + } + + def summaryFiles: Map[String, File] = Map("input" -> fastq_input, "output" -> fastq_output) + def getSummary: Json = { val trimR = """.*Trimmed reads: *(\d*) .*""".r val tooShortR = """.*Too short reads: *(\d*) .*""".r val tooLongR = """.*Too long reads: *(\d*) .*""".r val adapterR = """Adapter '([C|T|A|G]*)'.*trimmed (\d*) times.""".r - var stats: Map[String, Int] = Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0) - var adapter_stats: Map[String, Int] = Map() + var stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0) + var adapter_stats: mutable.Map[String, Int] = mutable.Map() if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines) { line match { @@ -88,7 +126,7 @@ object Cutadapt { var affected = 0 var tooShort = 0 var tooLong = 0 - var adapter_stats: Map[String, Int] = Map() + var adapter_stats: mutable.Map[String, Int] = mutable.Map() for (json <- jsons) { affected += json.field("num_reads_affected").get.numberOrZero.toInt -- GitLab