From f6791cc08d6bccd3f607be69a74dc30745a2b3a4 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 17 Feb 2015 13:09:59 +0100 Subject: [PATCH] Remove old summary code --- .../lumc/sasc/biopet/extensions/Seqstat.scala | 67 ----- .../nl/lumc/sasc/biopet/tools/FastqSync.scala | 50 ---- .../biopet/pipelines/flexiprep/Cutadapt.scala | 51 ---- .../biopet/pipelines/flexiprep/Fastqc.scala | 30 --- .../pipelines/flexiprep/Flexiprep.scala | 40 +-- .../flexiprep/FlexiprepSummary.scala | 242 ------------------ .../biopet/pipelines/flexiprep/Sickle.scala | 47 ---- 7 files changed, 5 insertions(+), 522 deletions(-) delete mode 100644 public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala index c7c1a4436..a8bf55177 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala @@ -45,12 +45,6 @@ class Seqstat(val root: Configurable) extends BiopetCommandLineFunction with Sum def cmdLine = required(executable) + required(input) + " > " + required(output) - def getSummary: Json = { - val json = Parse.parseOption(Source.fromFile(output).mkString) - if (json.isEmpty) return jNull - else return json.get.fieldOrEmptyObject("stats") - } - def summaryData: Map[String, Any] = { val map = ConfigUtils.fileToConfigMap(output) @@ -79,65 +73,4 @@ object Seqstat { seqstat.output = new File(outDir + fastqfile.getName.substring(0, fastqfile.getName.lastIndexOf(".")) + ".seqstats.json") return seqstat } - - def mergeSummaries(jsons: List[Json]): Json = { - def addJson(json: Json, total: mutable.Map[String, Long]) { - for (key <- json.objectFieldsOrEmpty) { - if (json.field(key).get.isObject) addJson(json.field(key).get, total) - else if (json.field(key).get.isNumber) { - val number = json.field(key).get.numberOrZero.toLong - if (total.contains(key)) { - if (key == "len_min") { - if (total(key) > number) total(key) = number - } else if (key == "len_max") { - if (total(key) < number) total(key) = number - } else total(key) += number - } else total += (key -> number) - } - } - } - - var basesTotal: mutable.Map[String, Long] = mutable.Map() - var readsTotal: mutable.Map[String, Long] = mutable.Map() - var encoding: Set[Json] = Set() - for (json <- jsons) { - encoding += json.fieldOrEmptyString("qual_encoding") - - val bases = json.fieldOrEmptyObject("bases") - addJson(bases, basesTotal) - - val reads = json.fieldOrEmptyObject("reads") - addJson(reads, readsTotal) - } - return ("bases" := ( - ("num_n" := basesTotal("num_n")) ->: - ("num_total" := basesTotal("num_total")) ->: - ("num_qual_gte" := ( - ("1" := basesTotal("1")) ->: - ("10" := basesTotal("10")) ->: - ("20" := basesTotal("20")) ->: - ("30" := basesTotal("30")) ->: - ("40" := basesTotal("40")) ->: - ("50" := basesTotal("50")) ->: - ("60" := basesTotal("60")) ->: - jEmptyObject - )) ->: jEmptyObject)) ->: - ("reads" := ( - ("num_with_n" := readsTotal("num_with_n")) ->: - ("num_total" := readsTotal("num_total")) ->: - ("len_min" := readsTotal("len_min")) ->: - ("len_max" := readsTotal("len_max")) ->: - ("num_mean_qual_gte" := ( - ("1" := readsTotal("1")) ->: - ("10" := readsTotal("10")) ->: - ("20" := readsTotal("20")) ->: - ("30" := readsTotal("30")) ->: - ("40" := readsTotal("40")) ->: - ("50" := readsTotal("50")) ->: - ("60" := readsTotal("60")) ->: - jEmptyObject - )) ->: jEmptyObject)) ->: - ("qual_encoding" := encoding.head) ->: - jEmptyObject - } } diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala index 9c11d1519..f47eeb300 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSync.scala @@ -105,32 +105,6 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction wi case _ => v1 } } - - // summary statistics - def summary: Json = { - - val regex = new Regex("""Filtered (\d*) reads from first read file. - |Filtered (\d*) reads from second read file. - |Synced read files contain (\d*) reads.""".stripMargin, - "R1", "R2", "RL") - - val (countFilteredR1, countFilteredR2, countRLeft) = - if (outputStats.exists) { - val text = Source - .fromFile(outputStats) - .getLines() - .mkString("\n") - regex.findFirstMatchIn(text) match { - case None => (0, 0, 0) - case Some(rmatch) => (rmatch.group("R1").toInt, rmatch.group("R2").toInt, rmatch.group("RL").toInt) - } - } else (0, 0, 0) - - ("num_reads_discarded_R1" := countFilteredR1) ->: - ("num_reads_discarded_R2" := countFilteredR2) ->: - ("num_reads_kept" := countRLeft) ->: - jEmptyObject - } } object FastqSync extends ToolCommand { @@ -210,30 +184,6 @@ object FastqSync extends ToolCommand { (numDiscA, numDiscB, numKept) } - /** Function to merge this tool's summary with summaries from other objects */ - // TODO: refactor this into the object? At least make it work on the summary object - def mergeSummaries(jsons: List[Json]): Json = { - - val (read1FilteredCount, read2FilteredCount, readsLeftCount) = jsons - // extract the values we require from each JSON object into tuples - .map { - case json => - (json.field("num_reads_discarded_R1").get.numberOrZero.toInt, - json.field("num_reads_discarded_R2").get.numberOrZero.toInt, - json.field("num_reads_kept").get.numberOrZero.toInt) - } - // reduce the tuples - .reduceLeft { - (x: (Int, Int, Int), y: (Int, Int, Int)) => - (x._1 + y._1, x._2 + y._2, x._3 + y._3) - } - - ("num_reads_discarded_R1" := read1FilteredCount) ->: - ("num_reads_discarded_R2" := read2FilteredCount) ->: - ("num_reads_kept" := readsLeftCount) ->: - jEmptyObject - } - case class Args(refFastq: File = new File(""), inputFastq1: File = new File(""), inputFastq2: File = new File(""), diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala index 15996cbe7..564bb4427 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -86,31 +86,6 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada } def summaryFiles: Map[String, File] = Map("input" -> fastq_input, "output" -> fastq_output) - - def getSummary: Json = { - val trimR = """.*Trimmed reads: *(\d*) .*""".r - val tooShortR = """.*Too short reads: *(\d*) .*""".r - val tooLongR = """.*Too long reads: *(\d*) .*""".r - val adapterR = """Adapter '([C|T|A|G]*)'.*trimmed (\d*) times.""".r - - var stats: mutable.Map[String, Int] = mutable.Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0) - var adapter_stats: mutable.Map[String, Int] = mutable.Map() - - if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines) { - line match { - case trimR(m) => stats += ("trimmed" -> m.toInt) - case tooShortR(m) => stats += ("tooshort" -> m.toInt) - case tooLongR(m) => stats += ("toolong" -> m.toInt) - case adapterR(adapter, count) => adapter_stats += (adapter -> count.toInt) - case _ => - } - } - return ("num_reads_affected" := stats("trimmed")) ->: - ("num_reads_discarded_too_short" := stats("tooshort")) ->: - ("num_reads_discarded_too_long" := stats("toolong")) ->: - ("adapters" := adapter_stats.toMap) ->: - jEmptyObject - } } object Cutadapt { @@ -121,30 +96,4 @@ object Cutadapt { cutadapt.stats_output = new File(output.getAbsolutePath.substring(0, output.getAbsolutePath.lastIndexOf(".")) + ".stats") return cutadapt } - - def mergeSummaries(jsons: List[Json]): Json = { - var affected = 0 - var tooShort = 0 - var tooLong = 0 - var adapter_stats: mutable.Map[String, Int] = mutable.Map() - - for (json <- jsons) { - affected += json.field("num_reads_affected").get.numberOrZero.toInt - tooShort += json.field("num_reads_discarded_too_short").get.numberOrZero.toInt - tooLong += json.field("num_reads_discarded_too_long").get.numberOrZero.toInt - - val adapters = json.fieldOrEmptyObject("adapters") - for (key <- adapters.objectFieldsOrEmpty) { - val number = adapters.field(key).get.numberOrZero.toInt - if (adapter_stats.contains(key)) adapter_stats(key) += number - else adapter_stats += (key -> number) - } - } - - return ("num_reads_affected" := affected) ->: - ("num_reads_discarded_too_short" := tooShort) ->: - ("num_reads_discarded_too_long" := tooLong) ->: - ("adapters" := adapter_stats.toMap) ->: - jEmptyObject - } } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index c7554f0e4..cd9dd4ae0 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -23,11 +23,7 @@ import org.broadinstitute.gatk.utils.commandline.Output import scala.io.Source -import argonaut._, Argonaut._ -import scalaz._, Scalaz._ - import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.utils.ConfigUtils /** * FastQC wrapper with added functionality for the Flexiprep pipeline @@ -142,32 +138,6 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r } else Set() } - /** - * Summary of the FastQC run, stored in a [[Json]] object - * - * @deprecated - */ - def summary: Json = { - - val outputMap = - Map("plot_duplication_levels" -> "Images/duplication_levels.png", - "plot_kmer_profiles" -> "Images/kmer_profiles.png", - "plot_per_base_gc_content" -> "Images/per_base_gc_content.png", - "plot_per_base_n_content" -> "Images/per_base_n_content.png", - "plot_per_base_quality" -> "Images/per_base_quality.png", - "plot_per_base_sequence_content" -> "Images/per_base_sequence_content.png", - "plot_per_sequence_gc_content" -> "Images/per_sequence_gc_content.png", - "plot_per_sequence_quality" -> "Images/per_sequence_quality.png", - "plot_sequence_length_distribution" -> "Images/sequence_length_distribution.png", - "fastqc_data" -> "fastqc_data.txt") - .map { - case (name, relPath) => - name -> Map("path" -> (outputDir + File.separator + relPath)) - } - - ConfigUtils.mapToJson(outputMap) - } - @Output var outputFiles: List[File] = Nil diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 4199c69ec..785efc483 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -61,13 +61,11 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with var fastqc_R1_after: Fastqc = _ var fastqc_R2_after: Fastqc = _ - //val summary = new FlexiprepSummary(this) - def init() { require(outputDir != null, "Missing output directory on flexiprep module") require(input_R1 != null, "Missing input R1 on flexiprep module") - //require(sampleId != null, "Missing sample ID on flexiprep module") - //require(libId != null, "Missing library ID on flexiprep module") + require(sampleId != null, "Missing sample ID on flexiprep module") + require(libId != null, "Missing library ID on flexiprep module") paired = input_R2.isDefined @@ -88,8 +86,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with } case _ => } - - //summary.out = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".qc.summary.json") } def biopetScript() { @@ -109,24 +105,14 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with fastqc_R1 = Fastqc(this, input_R1, new File(outputDir, R1_name + ".fastqc/")) add(fastqc_R1) - //summary.addFastqc(fastqc_R1) addSummarizable(fastqc_R1, "fastqc_R1") outputFiles += ("fastqc_R1" -> fastqc_R1.output) - //val md5sum_R1 = Md5sum(this, input_R1, outputDir) - //add(md5sum_R1) - //summary.addMd5sum(md5sum_R1, R2 = false, after = false) - if (paired) { fastqc_R2 = Fastqc(this, input_R2.get, new File(outputDir, R2_name + ".fastqc/")) add(fastqc_R2) addSummarizable(fastqc_R2, "fastqc_R2") - //summary.addFastqc(fastqc_R2, R2 = true) outputFiles += ("fastqc_R2" -> fastqc_R2.output) - - //val md5sum_R2 = Md5sum(this, input_R2.get, outputDir) - //add(md5sum_R2) - //summary.addMd5sum(md5sum_R2, R2 = true, after = false) } } @@ -165,14 +151,12 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with seqstat_R1.isIntermediate = true add(seqstat_R1) addSummarizable(seqstat_R1, "seqstat_R1") - //summary.addSeqstat(seqstat_R1, R2 = false, after = false, chunk) if (paired) { val seqstat_R2 = Seqstat(this, R2, outDir) seqstat_R2.isIntermediate = true add(seqstat_R2) addSummarizable(seqstat_R2, "seqstat_R2") - //summary.addSeqstat(seqstat_R2, R2 = true, after = false, chunk) } if (!skipClip) { // Adapter clipping @@ -182,7 +166,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with cutadapt_R1.isIntermediate = true add(cutadapt_R1) addSummarizable(cutadapt_R1, "clipping_R1") - //summary.addCutadapt(cutadapt_R1, R2 = false, chunk) R1 = cutadapt_R1.fastq_output deps ::= R1 outputFiles += ("cutadapt_R1_stats" -> cutadapt_R1.stats_output) @@ -194,7 +177,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with cutadapt_R2.isIntermediate = true add(cutadapt_R2) addSummarizable(cutadapt_R2, "clipping_R2") - //summary.addCutadapt(cutadapt_R2, R2 = true, chunk) R2 = cutadapt_R2.fastq_output deps ::= R2 @@ -208,7 +190,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with fqSync.deps :::= deps add(fqSync) addSummarizable(fqSync, "fastq_sync") - //summary.addFastqcSync(fqSync, chunk) outputFiles += ("syncStats" -> fqSync.outputStats) R1 = fqSync.outputFastq1 R2 = fqSync.outputFastq2 @@ -230,7 +211,6 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with sickle.isIntermediate = true add(sickle) addSummarizable(sickle, "trimming") - //summary.addSickle(sickle, chunk) R1 = sickle.output_R1 if (paired) R2 = sickle.output_R2 } @@ -238,13 +218,13 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with val seqstat_R1_after = Seqstat(this, R1, outDir) seqstat_R1_after.deps = deps add(seqstat_R1_after) - //summary.addSeqstat(seqstat_R1_after, R2 = false, after = true, chunk) + addSummarizable(seqstat_R1_after, "seqstat_R1_after") if (paired) { val seqstat_R2_after = Seqstat(this, R2, outDir) seqstat_R2_after.deps = deps add(seqstat_R2_after) - //summary.addSeqstat(seqstat_R2_after, R2 = true, after = true, chunk) + addSummarizable(seqstat_R2_after, "seqstat_R2_after") } outputFiles += (chunk + "output_R1" -> R1) @@ -264,27 +244,17 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with if (paired) outputFiles += ("output_R2_gzip" -> R2) if (!skipTrim || !skipClip) { - val md5sum_R1 = Md5sum(this, R1, outputDir) - add(md5sum_R1) - //summary.addMd5sum(md5sum_R1, R2 = false, after = true) - if (paired) { - val md5sum_R2 = Md5sum(this, R2, outputDir) - add(md5sum_R2) - //summary.addMd5sum(md5sum_R2, R2 = true, after = true) - } fastqc_R1_after = Fastqc(this, R1, new File(outputDir, R1_name + ".qc.fastqc/")) add(fastqc_R1_after) addSummarizable(fastqc_R1_after, "fastqc_R1_qc") - //summary.addFastqc(fastqc_R1_after, after = true) + if (paired) { fastqc_R2_after = Fastqc(this, R2, new File(outputDir, R2_name + ".qc.fastqc/")) add(fastqc_R2_after) addSummarizable(fastqc_R2_after, "fastqc_R2_qc") - //summary.addFastqc(fastqc_R2_after, R2 = true, after = true) } } - //add(summary) addSummaryJobs } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala deleted file mode 100644 index 86a483166..000000000 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala +++ /dev/null @@ -1,242 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.pipelines.flexiprep - -import java.io.PrintWriter -import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.{ Md5sum, Seqstat } -import nl.lumc.sasc.biopet.tools.FastqSync -import org.broadinstitute.gatk.queue.function.InProcessFunction -import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -import java.io.File -import argonaut._, Argonaut._ -import scalaz._, Scalaz._ - -class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Configurable { - this.analysisName = getClass.getSimpleName - - @Input(doc = "deps") - var deps: List[File] = Nil - - @Output(doc = "Summary output", required = true) - var out: File = _ - - class Chunk { - var seqstatR1: Seqstat = _ - var seqstatR2: Seqstat = _ - var seqstatR1after: Seqstat = _ - var seqstatR2after: Seqstat = _ - - var cutadaptR1: Cutadapt = _ - var cutadaptR2: Cutadapt = _ - - var fastqSync: FastqSync = _ - - var sickle: Sickle = _ - } - - var chunks: Map[String, Chunk] = Map() - - var md5R1: Md5sum = _ - var md5R2: Md5sum = _ - var md5R1after: Md5sum = _ - var md5R2after: Md5sum = _ - - var fastqcR1: Fastqc = _ - var fastqcR2: Fastqc = _ - var fastqcR1after: Fastqc = _ - var fastqcR2after: Fastqc = _ - - var flexiprep: Flexiprep = if (root.isInstanceOf[Flexiprep]) root.asInstanceOf[Flexiprep] else { - throw new IllegalStateException("Root is no instance of Flexiprep") - } - - var resources: Map[String, Json] = Map() - - def addFastqc(fastqc: Fastqc, R2: Boolean = false, after: Boolean = false): Fastqc = { - if (!R2 && !after) this.fastqcR1 = fastqc - else if (!R2 && after) this.fastqcR1after = fastqc - else if (R2 && !after) this.fastqcR2 = fastqc - else if (R2 && after) this.fastqcR2after = fastqc - deps ::= fastqc.output - return fastqc - } - - def addMd5sum(md5sum: Md5sum, R2: Boolean = false, after: Boolean = false): Md5sum = { - if (!R2 && !after) this.md5R1 = md5sum - else if (!R2 && after) this.md5R1after = md5sum - else if (R2 && !after) this.md5R2 = md5sum - else if (R2 && after) this.md5R2after = md5sum - deps ::= md5sum.output - return md5sum - } - - def addSeqstat(seqstat: Seqstat, R2: Boolean = false, after: Boolean = false, chunk: String = ""): Seqstat = { - if (!chunks.contains(chunk)) chunks += (chunk -> new Chunk) - if (!R2 && !after) chunks(chunk).seqstatR1 = seqstat - else if (!R2 && after) chunks(chunk).seqstatR1after = seqstat - else if (R2 && !after) chunks(chunk).seqstatR2 = seqstat - else if (R2 && after) chunks(chunk).seqstatR2after = seqstat - deps ::= seqstat.output - return seqstat - } - - def addCutadapt(cutadapt: Cutadapt, R2: Boolean = false, chunk: String = ""): Cutadapt = { - if (!chunks.contains(chunk)) chunks += (chunk -> new Chunk) - if (!R2) chunks(chunk).cutadaptR1 = cutadapt - else chunks(chunk).cutadaptR2 = cutadapt - deps ::= cutadapt.stats_output - return cutadapt - } - - def addSickle(sickle: Sickle, chunk: String = ""): Sickle = { - if (!chunks.contains(chunk)) chunks += (chunk -> new Chunk) - chunks(chunk).sickle = sickle - deps ::= sickle.output_stats - return sickle - } - - def addFastqcSync(fastqSync: FastqSync, chunk: String = ""): FastqSync = { - if (!chunks.contains(chunk)) chunks += (chunk -> new Chunk) - chunks(chunk).fastqSync = fastqSync - deps ::= fastqSync.outputStats - fastqSync - } - // format: OFF - override def run { - logger.debug("Start") - md5Summary() - val summary = - ("samples" := ( flexiprep.sampleId.getOrElse("x") := - ("libraries" := ( flexiprep.libId.getOrElse("x") := ( - ("flexiprep" := ( - ("clipping" := !flexiprep.skipClip) ->: - ("trimming" := !flexiprep.skipTrim) ->: - ("paired" := flexiprep.paired) ->: - jEmptyObject)) ->: - ("stats" := ( - ("fastq" := seqstatSummary) ->: - ("clipping" :=? clipstatSummary) ->?: - ("trimming" :=? trimstatSummary) ->?: - jEmptyObject)) ->: - ("resources" := (("raw_R1" := getResources(fastqcR1, md5R1)) ->: - ("raw_R2" :?= getResources(fastqcR2, md5R2)) ->?: - ("proc_R1" :?= getResources(fastqcR1after, md5R1after)) ->?: - ("proc_R2" :?= getResources(fastqcR2after, md5R2after)) ->?: - jEmptyObject)) ->: - jEmptyObject ))->: jEmptyObject)->: jEmptyObject)->: jEmptyObject) ->: jEmptyObject - // format: ON - val summeryText = summary.spaces2 - logger.debug("\n" + summeryText) - val writer = new PrintWriter(out) - writer.write(summeryText) - writer.close() - logger.debug("Stop") - } - - def seqstatSummary(): Option[Json] = { - val R1_chunks = for ((key, value) <- chunks) yield value.seqstatR1.getSummary - val R1: Json = Seqstat.mergeSummaries(R1_chunks.toList) - - val R2: Option[Json] = if (!flexiprep.paired) None - else if (chunks.size == 1) Option(chunks.head._2.seqstatR2.getSummary) - else { - val s = for ((key, value) <- chunks) yield value.seqstatR2.getSummary - Option(Seqstat.mergeSummaries(s.toList)) - } - val R1_proc: Option[Json] = if (flexiprep.skipClip && flexiprep.skipTrim) None - else if (chunks.size == 1) Option(chunks.head._2.seqstatR1after.getSummary) - else { - val s = for ((key, value) <- chunks) yield value.seqstatR1after.getSummary - Option(Seqstat.mergeSummaries(s.toList)) - } - val R2_proc: Option[Json] = if (!flexiprep.paired || (flexiprep.skipClip && flexiprep.skipTrim)) None - else if (chunks.size == 1) Option(chunks.head._2.seqstatR2after.getSummary) - else { - val s = for ((key, value) <- chunks) yield value.seqstatR2after.getSummary - Option(Seqstat.mergeSummaries(s.toList)) - } - return Option(("R1_raw" := R1) ->: - ("R2_raw" :=? R2) ->?: - ("R1_proc" :=? R1_proc) ->?: - ("R2_proc" :=? R2_proc) ->?: - jEmptyObject) - } - - def md5Summary() { - val R1_raw = md5Summary(md5R1) - val R2_raw = md5Summary(md5R2) - val R1_proc = md5Summary(md5R1after) - val R2_proc = md5Summary(md5R2after) - - if (!R1_raw.isEmpty) resources += ("fastq_R1_raw" -> R1_raw.get) - if (!R2_raw.isEmpty) resources += ("fastq_R2_raw" -> R2_raw.get) - if (!R1_proc.isEmpty) resources += ("fastq_R1_proc" -> R1_proc.get) - if (!R2_proc.isEmpty) resources += ("fastq_R2_proc" -> R2_proc.get) - } - - def md5Summary(md5sum: Md5sum): Option[Json] = { - if (md5sum == null) return None - else return Option(md5sum.getSummary) - } - - def getResources(fastqc: Fastqc, md5sum: Md5sum): Option[Json] = { - if (fastqc == null || md5sum == null) return None - val fastqcSum = fastqcSummary(fastqc).get - return Option(("fastq" := md5Summary(md5sum)) ->: fastqcSum) - } - - def fastqcSummary(fastqc: Fastqc): Option[Json] = { - if (fastqc == null) return None - else return Option(fastqc.summary) - } - - def clipstatSummary(): Option[Json] = { - if (flexiprep.skipClip) return None - val R1: Json = if (chunks.size == 1) chunks.head._2.cutadaptR1.getSummary - else { - val s = for ((key, value) <- chunks) yield value.cutadaptR1.getSummary - Cutadapt.mergeSummaries(s.toList) - } - val R2: Option[Json] = if (!flexiprep.paired) None - else if (chunks.size == 1) Option(chunks.head._2.cutadaptR2.getSummary) - else { - val s = for ((key, value) <- chunks) yield value.cutadaptR2.getSummary - Option(Cutadapt.mergeSummaries(s.toList)) - } - return Option(("R1" := R1) ->: - ("R2" :=? R2) ->?: - ("fastq_sync" :=? syncstatSummary) ->?: - jEmptyObject) - } - - def syncstatSummary(): Option[Json] = - if (flexiprep.skipClip || !flexiprep.paired) - None - else { - val s = for ((key, value) <- chunks) yield value.fastqSync.summary - Option(FastqSync.mergeSummaries(s.toList)) - } - - def trimstatSummary(): Option[Json] = { - if (flexiprep.skipTrim) return None - if (chunks.size == 1) return Option(chunks.head._2.sickle.getSummary) - else { - val s = for ((key, value) <- chunks) yield value.sickle.getSummary - return Option(Sickle.mergeSummaries(s.toList)) - } - } -} diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Sickle.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Sickle.scala index 0ec269874..dc63f70bf 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Sickle.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Sickle.scala @@ -66,51 +66,4 @@ class Sickle(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Sickle(r Map("input_R1" -> input_R1, "output_R1" -> output_R1) ++ (if (input_R2 != null) Map("input_R2" -> input_R2, "output_R2" -> output_R2) else Map()) } - - def getSummary: Json = { - val pairKept = """FastQ paired records kept: (\d*) \((\d*) pairs\)""".r - val singleKept = """FastQ single records kept: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r - val pairDiscarded = """FastQ paired records discarded: (\d*) \((\d*) pairs\)""".r - val singleDiscarded = """FastQ single records discarded: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r - - var stats: mutable.Map[String, Int] = mutable.Map() - - if (output_stats.exists) for (line <- Source.fromFile(output_stats).getLines) { - line match { - case pairKept(reads, pairs) => stats += ("num_paired_reads_kept" -> reads.toInt) - case singleKept(total, r1, r2) => { - stats += ("num_reads_kept_R1" -> r1.toInt) - stats += ("num_reads_kept_R2" -> r2.toInt) - } - case pairDiscarded(reads, pairs) => stats += ("num_paired_reads_discarded" -> reads.toInt) - case singleDiscarded(total, r1, r2) => { - stats += ("num_reads_discarded_R1" -> r1.toInt) - stats += ("num_reads_discarded_R2" -> r2.toInt) - } - case _ => - } - } - - val temp = ("" := stats.toMap) ->: jEmptyObject - return temp.fieldOrEmptyObject("") - } } - -object Sickle { - def mergeSummaries(jsons: List[Json]): Json = { - var total: mutable.Map[String, Int] = mutable.Map() - - for (json <- jsons) { - for (key <- json.objectFieldsOrEmpty) { - if (json.field(key).get.isNumber) { - val number = json.field(key).get.numberOrZero.toInt - if (total.contains(key)) total(key) += number - else total += (key -> number) - } - } - } - - val temp = ("" := total.toMap) ->: jEmptyObject - return temp.fieldOrEmptyObject("") - } -} \ No newline at end of file -- GitLab