Commit e8f474c1 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added init version of summary

parent faa0baaf
package nl.lumc.sasc.biopet.core.summary
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
/**
* Created by pjvan_thof on 2/14/15.
*/
trait Summarizable extends Configurable {
var summaryModule = configName
def summaryFiles: Map[String, File]
def summaryStats: Map[String, Any]
/**
* This function is used to merge
* @param v1
* @param v2
* @param key
* @return
*/
def resolveSummaryConflict(v1: Any, v2: Any, key: String) = v1
}
package nl.lumc.sasc.biopet.core.summary
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.extensions.Md5sum
/**
* Created by pjvan_thof on 2/14/15.
*/
trait SummaryQScript extends BiopetQScript {
/** Key is sample/library, None is sample or library is not applicable */
private[summary] var summarizables: Map[(Option[String], Option[String]), List[Summarizable]] = Map()
private[summary] var summaryQScripts: List[SummaryQScript] = Nil
def summaryFile: File
def addSummarizable(summarizable: Summarizable,
sampleId: Option[String] = None,
libraryId: Option[String] = None): Unit = {
require(libraryId.isDefined == sampleId.isDefined) // Library always require a sample
summarizables += (sampleId, libraryId) -> (summarizable :: summarizables.getOrElse((sampleId, libraryId), Nil))
}
def addSummaryQScript(summaryQScript: SummaryQScript): Unit = {
summaryQScripts :+= summaryQScript
}
def addSummaryJobs: Unit = {
val writeSummary = new WriteSummary(this)
//Automatic checksums
val keepChecksums: Boolean = config("keep_checksums_files", default = false)
for ((_, summarizableList) <- summarizables; summarizable <- summarizableList; (_, file) <- summarizable.summaryFiles) {
if (writeSummary.md5sum) {
val md5sum = Md5sum(this, file)
md5sum.isIntermediate = !keepChecksums
writeSummary.deps :+= md5sum.output
add(md5sum)
}
//TODO: add more checksums types
}
add(writeSummary)
}
}
package nl.lumc.sasc.biopet.core.summary
import java.io.{PrintWriter, File}
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.queue.function.{ QFunction, InProcessFunction }
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
/**
* Created by pjvan_thof on 2/14/15.
*/
class WriteSummary(val root: Configurable) extends InProcessFunction with Configurable {
this.analysisName = getClass.getSimpleName
require(root.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")
val summaryQScript = root.asInstanceOf[SummaryQScript]
@Input(doc = "deps", required = false)
var deps: List[File] = Nil
@Output(doc = "Summary output", required = true)
var out: File = summaryQScript.summaryFile
var md5sum: Boolean = config("summary_md5", default = true)
//TODO: add more checksums types
override def freezeFieldValues(): Unit = {
for (q <- summaryQScript.summaryQScripts) deps :+= q.summaryFile
for ((_, l) <- summaryQScript.summarizables; s <- l) s match {
case f: QFunction => deps :+= f.firstOutput
case _ =>
}
super.freezeFieldValues()
}
def run(): Unit = {
val writer = new PrintWriter(out)
writer.close()
}
}
......@@ -49,4 +49,11 @@ object Md5sum {
md5sum.output = new File(outDir + fastqfile.getName + ".md5")
return md5sum
}
def apply(root: Configurable, file: File): Md5sum = {
val md5sum = new Md5sum(root)
md5sum.input = file
md5sum.output = new File(file.getParentFile, file.getName + ".md5")
return md5sum
}
}
......@@ -18,6 +18,8 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep
import java.io.{ File, FileNotFoundException }
import nl.lumc.sasc.biopet.core.summary.Summarizable
import scala.io.Source
import argonaut._, Argonaut._
......@@ -32,7 +34,7 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils
* This wrapper implements additional methods for parsing FastQC output files and aggregating everything in a summary
* object. The current implementation is based on FastQC v0.10.1.
*/
class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) {
class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(root) with Summarizable {
/** Class for storing a single FastQC module result */
protected case class FastQCModule(name: String, status: String, lines: Seq[String])
......@@ -160,6 +162,10 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r
ConfigUtils.mapToJson(outputMap)
}
def summaryFiles: Map[String, File] = Map("test" -> this.fastqfile)
def summaryStats: Map[String, Any] = Map()
}
object Fastqc {
......
......@@ -15,6 +15,7 @@
*/
package nl.lumc.sasc.biopet.pipelines.flexiprep
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
......@@ -23,7 +24,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.{ Gzip, Pbzip2, Md5sum, Zcat, Seqstat }
import nl.lumc.sasc.biopet.tools.FastqSync
class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
class Flexiprep(val root: Configurable) extends QScript with BiopetQScript with SummaryQScript {
def this() = this(null)
@Input(doc = "R1 fastq file (gzipped allowed)", shortName = "R1", required = true)
......@@ -48,6 +49,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
@Argument(doc = "Library ID", shortName = "library", required = true)
var libId: String = _
def summaryFile = new File(outputDir, sampleId + "-" + libId + ".qc.summary.json")
var paired: Boolean = input_R2.isDefined
var R1_ext: String = _
var R2_ext: String = _
......@@ -264,6 +267,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
summary.addMd5sum(md5sum_R2, R2 = true, after = true)
}
fastqc_R1_after = Fastqc(this, R1, outputDir + "/" + R1_name + ".qc.fastqc/")
addSummarizable(fastqc_R1_after)
add(fastqc_R1_after)
summary.addFastqc(fastqc_R1_after, after = true)
if (paired) {
......@@ -273,7 +277,8 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
}
}
add(summary)
//add(summary)
addSummaryJobs
}
def extractIfNeeded(file: File, runDir: String): File = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment