diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala new file mode 100644 index 0000000000000000000000000000000000000000..f9f36c7e09366c6c04bf84bc625cd86056363e5e --- /dev/null +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/Summarizable.scala @@ -0,0 +1,26 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.File + +import nl.lumc.sasc.biopet.core.config.Configurable + +/** + * Created by pjvan_thof on 2/14/15. + */ +trait Summarizable extends Configurable { + + var summaryModule = configName + + def summaryFiles: Map[String, File] + + def summaryStats: Map[String, Any] + + /** + * This function is used to merge + * @param v1 + * @param v2 + * @param key + * @return + */ + def resolveSummaryConflict(v1: Any, v2: Any, key: String) = v1 +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala new file mode 100644 index 0000000000000000000000000000000000000000..acf2727f5f9c7db5512b51061f040e5f60c3d8b2 --- /dev/null +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala @@ -0,0 +1,48 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.File + +import nl.lumc.sasc.biopet.core.BiopetQScript +import nl.lumc.sasc.biopet.extensions.Md5sum + +/** + * Created by pjvan_thof on 2/14/15. + */ +trait SummaryQScript extends BiopetQScript { + + /** Key is sample/library, None is sample or library is not applicable */ + private[summary] var summarizables: Map[(Option[String], Option[String]), List[Summarizable]] = Map() + private[summary] var summaryQScripts: List[SummaryQScript] = Nil + + def summaryFile: File + + def addSummarizable(summarizable: Summarizable, + sampleId: Option[String] = None, + libraryId: Option[String] = None): Unit = { + require(libraryId.isDefined == sampleId.isDefined) // Library always require a sample + summarizables += (sampleId, libraryId) -> (summarizable :: summarizables.getOrElse((sampleId, libraryId), Nil)) + } + + def addSummaryQScript(summaryQScript: SummaryQScript): Unit = { + summaryQScripts :+= summaryQScript + } + + def addSummaryJobs: Unit = { + val writeSummary = new WriteSummary(this) + + //Automatic checksums + val keepChecksums: Boolean = config("keep_checksums_files", default = false) + + for ((_, summarizableList) <- summarizables; summarizable <- summarizableList; (_, file) <- summarizable.summaryFiles) { + if (writeSummary.md5sum) { + val md5sum = Md5sum(this, file) + md5sum.isIntermediate = !keepChecksums + writeSummary.deps :+= md5sum.output + add(md5sum) + } + //TODO: add more checksums types + } + + add(writeSummary) + } +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala new file mode 100644 index 0000000000000000000000000000000000000000..c573136488a4eecd121ad616b3a2616f18ffe5f3 --- /dev/null +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala @@ -0,0 +1,42 @@ +package nl.lumc.sasc.biopet.core.summary + +import java.io.{PrintWriter, File} + +import nl.lumc.sasc.biopet.core.config.Configurable +import org.broadinstitute.gatk.queue.function.{ QFunction, InProcessFunction } +import org.broadinstitute.gatk.utils.commandline.{ Output, Input } + +/** + * Created by pjvan_thof on 2/14/15. + */ +class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable { + this.analysisName = getClass.getSimpleName + + require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript") + + val summaryQScript = root.asInstanceOf[SummaryQScript] + + @Input(doc = "deps", required = false) + var deps: List[File] = Nil + + @Output(doc = "Summary output", required = true) + var out: File = summaryQScript.summaryFile + + var md5sum: Boolean = config("summary_md5", default = true) + //TODO: add more checksums types + + override def freezeFieldValues(): Unit = { + for (q <- summaryQScript.summaryQScripts) deps :+= q.summaryFile + for ((_, l) <- summaryQScript.summarizables; s <- l) s match { + case f: QFunction => deps :+= f.firstOutput + case _ => + } + super.freezeFieldValues() + } + + def run(): Unit = { + val writer = new PrintWriter(out) + + writer.close() + } +} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala index ea2d704f09ce2388c441338ff25b402606288366..091d2b6c39136bd9fcb2c9c36e82f9f4c0301644 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Md5sum.scala @@ -49,4 +49,11 @@ object Md5sum { md5sum.output = new File(outDir + fastqfile.getName + ".md5") return md5sum } + + def apply(root: Configurable, file: File): Md5sum = { + val md5sum = new Md5sum(root) + md5sum.input = file + md5sum.output = new File(file.getParentFile, file.getName + ".md5") + return md5sum + } } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index 21cb069d658623ab5a7b8986e454db6000f927fa..31fdc6b84005d40a40e39ccba110c9e5400899e0 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -18,6 +18,8 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep import java.io.{ File, FileNotFoundException } +import nl.lumc.sasc.biopet.core.summary.Summarizable + import scala.io.Source import argonaut._, Argonaut._ @@ -32,7 +34,7 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils * This wrapper implements additional methods for parsing FastQC output files and aggregating everything in a summary * object. The current implementation is based on FastQC v0.10.1. */ -class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) { +class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) with Summarizable { /** Class for storing a single FastQC module result */ protected case class FastQCModule(name: String, status: String, lines: Seq[String]) @@ -160,6 +162,10 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r ConfigUtils.mapToJson(outputMap) } + + def summaryFiles: Map[String, File] = Map("test" -> this.fastqfile) + + def summaryStats: Map[String, Any] = Map() } object Fastqc { diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 9ab16032bd095d40728e91f92a611366b400af2f..531ed8bbb219da0327795eb295dab842531abf85 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -15,6 +15,7 @@ */ package nl.lumc.sasc.biopet.pipelines.flexiprep +import nl.lumc.sasc.biopet.core.summary.SummaryQScript import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } @@ -23,7 +24,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.{ Gzip, Pbzip2, Md5sum, Zcat, Seqstat } import nl.lumc.sasc.biopet.tools.FastqSync -class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { +class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with SummaryQScript { def this() = this(null) @Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true) @@ -48,6 +49,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { @Argument(doc = "Library ID", shortName = "library", required = true) var libId: String = _ + def summaryFile = new File(outputDir, sampleId + "-" + libId + ".qc.summary.json") + var paired: Boolean = input_R2.isDefined var R1_ext: String = _ var R2_ext: String = _ @@ -264,6 +267,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { summary.addMd5sum(md5sum_R2, R2 = true, after = true) } fastqc_R1_after = Fastqc(this, R1, outputDir + "/" + R1_name + ".qc.fastqc/") + addSummarizable(fastqc_R1_after) add(fastqc_R1_after) summary.addFastqc(fastqc_R1_after, after = true) if (paired) { @@ -273,7 +277,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { } } - add(summary) + //add(summary) + addSummaryJobs } def extractIfNeeded(file: File, runDir: String): File = {