From 8145ca779a07098b5903a6aad0fa4697e47067c6 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 17 Feb 2015 11:53:14 +0100 Subject: [PATCH] Add new summary to seqstat --- .../lumc/sasc/biopet/extensions/Seqstat.scala | 31 ++++++++++++++++--- .../pipelines/flexiprep/Flexiprep.scala | 4 ++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala index d30d19fdf..c7c1a4436 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala @@ -21,16 +21,18 @@ package nl.lumc.sasc.biopet.extensions */ import argonaut._, Argonaut._ +import nl.lumc.sasc.biopet.core.summary.Summarizable +import nl.lumc.sasc.biopet.utils.ConfigUtils import scalaz._, Scalaz._ import scala.io.Source -import scala.collection.mutable.Map +import scala.collection.mutable import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import java.io.File -class Seqstat(val root: Configurable) extends BiopetCommandLineFunction { +class Seqstat(val root: Configurable) extends BiopetCommandLineFunction with Summarizable { override val defaultVmem = "4G" @Input(doc = "Input FastQ", required = true) @@ -48,6 +50,25 @@ class Seqstat(val root: Configurable) extends BiopetCommandLineFunction { if (json.isEmpty) return jNull else return json.get.fieldOrEmptyObject("stats") } + + def summaryData: Map[String, Any] = { + val map = ConfigUtils.fileToConfigMap(output) + + ConfigUtils.any2map(map.getOrElse("stats", Map())) + } + + def summaryFiles: Map[String, File] = { + Map("fastq" -> input) + } + + override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = { + (v1, v2) match { + case (v1: Int, v2: Int) if key == "len_min" => if (v1 < v2) v1 else v2 + case (v1: Int, v2: Int) if key == "len_max" => if (v1 > v2) v1 else v2 + case (v1: Int, v2: Int) => v1 + v2 + case _ => v1 + } + } } object Seqstat { @@ -60,7 +81,7 @@ object Seqstat { } def mergeSummaries(jsons: List[Json]): Json = { - def addJson(json: Json, total: Map[String, Long]) { + def addJson(json: Json, total: mutable.Map[String, Long]) { for (key <- json.objectFieldsOrEmpty) { if (json.field(key).get.isObject) addJson(json.field(key).get, total) else if (json.field(key).get.isNumber) { @@ -76,8 +97,8 @@ object Seqstat { } } - var basesTotal: Map[String, Long] = Map() - var readsTotal: Map[String, Long] = Map() + var basesTotal: mutable.Map[String, Long] = mutable.Map() + var readsTotal: mutable.Map[String, Long] = mutable.Map() var encoding: Set[Json] = Set() for (json <- jsons) { encoding += json.fieldOrEmptyString("qual_encoding") diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 2d4c5c2be..9db2ab7ae 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -45,7 +45,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with def summaryFiles = Map() - def summaryData = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip) + def summaryData = Map("skip_trim" -> skipTrim, "skip_clip" -> skipClip, "paired" -> paired) var paired: Boolean = input_R2.isDefined var R1_ext: String = _ @@ -161,12 +161,14 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with val seqstat_R1 = Seqstat(this, R1, outDir) seqstat_R1.isIntermediate = true add(seqstat_R1) + addSummarizable(seqstat_R1, "seqstat_R1") //summary.addSeqstat(seqstat_R1, R2 = false, after = false, chunk) if (paired) { val seqstat_R2 = Seqstat(this, R2, outDir) seqstat_R2.isIntermediate = true add(seqstat_R2) + addSummarizable(seqstat_R2, "seqstat_R2") //summary.addSeqstat(seqstat_R2, R2 = true, after = false, chunk) } -- GitLab