Commit 481937c7 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Adding some initial summary creation

parent dcff73d6
......@@ -17,10 +17,15 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import nl.lumc.sasc.biopet.core.summary.{Summarizable, SummaryQScript}
import nl.lumc.sasc.biopet.utils.summary.SummaryDb
import nl.lumc.sasc.biopet.utils.{ConfigUtils, Logging}
import org.broadinstitute.gatk.queue.QScript
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.concurrent.ExecutionContext.Implicits.global
/** This trait creates a structured way of use multisample pipelines */
trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
......@@ -252,6 +257,23 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
}
sample ::: lib ::: super.configFullPath
}
def initSummaryDb: Unit = {
val db = SummaryDb.openSqliteSummary(summaryDbFile)
val namesOld = Await.result(db.getSamples(runId = Some(summaryRunId)).map(_.map(_.name).toSet), Duration.Inf)
for ((sampleName, sample) <- samples) {
val sampleId: Int = if (!namesOld.contains(sampleName))
Await.result(db.createSample(sampleName, summaryRunId), Duration.Inf)
else Await.result(db.getSamples(runId = Some(summaryRunId), name = Some(sampleName)).map(_.head.id), Duration.Inf)
// TODO: Add tags
val libNamesOld = Await.result(db.getLibraries(runId = summaryRunId, sampleId = sampleId).map(_.map(_.name)), Duration.Inf)
for ((libName, lib) <- sample.libraries) {
if (!libNamesOld.contains(libName)) db.createLibrary(libName, summaryRunId, sampleId)
// TODO: Add tags
}
}
db.close()
}
}
object MultiSampleQScript {
......
......@@ -14,13 +14,17 @@
*/
package nl.lumc.sasc.biopet.core.summary
import java.io.File
import java.io.{File, PrintWriter}
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.extensions.{ CheckChecksum, Md5sum }
import nl.lumc.sasc.biopet.core.extensions.{CheckChecksum, Md5sum}
import nl.lumc.sasc.biopet.utils.summary.SummaryDb
import org.broadinstitute.gatk.queue.QScript
import scala.collection.mutable
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.io.Source
/**
* This trait is used for qscript / pipelines that will produce a summary
......@@ -47,6 +51,11 @@ trait SummaryQScript extends BiopetQScript { qscript: QScript =>
/** Name of summary output file */
def summaryFile: File
def summaryDbFile: File = root match {
case s:SummaryQScript => new File(s.outputDir, s"${s.summaryName}.summary.db")
case _ => throw new IllegalStateException("Root should be a SummaryQScript")
}
/**
* Add a module to summary for this pipeline
*
......@@ -93,6 +102,30 @@ trait SummaryQScript extends BiopetQScript { qscript: QScript =>
private var addedJobs = false
final lazy val summaryRunId: Int = {
if (runIdFile.exists()) {
val reader = Source.fromFile(runIdFile)
val id = reader.getLines().next().toInt
reader.close()
id
} else createRun
}
private def runIdFile = root match {
case s: SummaryQScript => new File(s.outputDir, s".log/summary.runid")
case _ => throw new IllegalStateException("Root should be a SummaryQscript")
}
private def createRun(): Int = {
val db = SummaryDb.openSqliteSummary(summaryDbFile)
val id = Await.result(db.createRun(summaryName, outputDir), Duration.Inf)
val writer = new PrintWriter(runIdFile)
writer.println(id)
writer.close()
id
}
/** Add jobs to qscript to execute summary, also add checksum jobs */
def addSummaryJobs(): Unit = {
if (addedJobs) throw new IllegalStateException("Summary jobs for this QScript are already executed")
......
......@@ -14,30 +14,30 @@
*/
package nl.lumc.sasc.biopet.core.summary
import java.io.{ File, PrintWriter }
import java.io.{File, PrintWriter}
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.LastCommitHash
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.utils.summary.SummaryDb
import org.broadinstitute.gatk.queue.function.{InProcessFunction, QFunction}
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.collection.mutable
import scala.io.Source
import slick.driver.H2Driver.api._
/**
* This will collect and write the summary
*
* Created by pjvan_thof on 2/14/15.
*/
class WriteSummary(val parent: Configurable) extends InProcessFunction with Configurable {
class WriteSummary(val parent: SummaryQScript) extends InProcessFunction with Configurable {
this.analysisName = getClass.getSimpleName
require(parent.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")
/** To access qscript for this summary */
val qscript = parent.asInstanceOf[SummaryQScript]
val qscript = parent
@Input(doc = "deps", required = false)
var deps: List[File] = Nil
......@@ -54,6 +54,12 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf
}
def init(): Unit = {
if (qscript == root) {
qscript match {
case s: MultiSampleQScript => s.initSummaryDb
case _ => qscript.summaryRunId
}
} // This initialize the database
for (q <- qscript.summaryQScripts)
deps :+= q.summaryFile
for ((_, l) <- qscript.summarizables; s <- l) {
......@@ -68,11 +74,13 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf
}
}
jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName))
jobOutputFile = new File(qscript.summaryDbFile.getParentFile, "." + qscript.summaryDbFile.getName.stripSuffix(".db") + ".out")
}
/** Function to create summary */
def run(): Unit = {
val summaryDb = SummaryDb.openSqliteSummary(qscript.summaryDbFile)
for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
summarizable.addToQscriptSummary(qscript, name)
}
......@@ -161,9 +169,10 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf
"summary_creation" -> System.currentTimeMillis()
))
val writer = new PrintWriter(out)
writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
writer.close()
// val writer = new PrintWriter(out)
// writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
// writer.close()
summaryDb.close()
}
def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
......
......@@ -210,7 +210,7 @@ class WriteSummaryTest extends TestNGSuite with Matchers {
}
object WriteSummaryTest {
def makeWriter(root: Configurable, c: Map[String, Any] = Map()) = new WriteSummary(root) {
def makeWriter(root: SummaryQScript, c: Map[String, Any] = Map()) = new WriteSummary(root) {
override def globalConfig = new Config(c + ("exe" -> "test"))
override def outputs = Seq()
override def inputs = Seq()
......
......@@ -22,6 +22,8 @@ trait Configurable extends ImplicitConversions {
def parent: Configurable
def globalConfig: Config = if (parent != null) parent.globalConfig else Config.global
final def root: Configurable = if (parent == null) this else parent.root
/** suffix to the path */
def subPath: List[String] = Nil
......
package nl.lumc.sasc.biopet.utils.summary
import java.io.{Closeable, File}
import nl.lumc.sasc.biopet.utils.ConfigUtils
import slick.driver.H2Driver.api._
......@@ -12,7 +14,10 @@ import scala.concurrent.ExecutionContext.Implicits.global
/**
* Created by pjvanthof on 05/02/2017.
*/
class SummaryDb(db: Database) {
class SummaryDb(db: Database) extends Closeable {
def close(): Unit = db.close()
/** This method will create all table */
def createTables(): Unit = {
try {
......@@ -56,7 +61,7 @@ class SummaryDb(db: Database) {
name.map(sample.name === _)
).collect({ case Some(criteria) => criteria }).reduceLeftOption(_ && _).getOrElse(true: Rep[Boolean])
}
db.run(q.map(x => (x.id, x.runId, x.name)).result)
db.run(q.result)
}
def getSampleTags(sampleId: Int): Future[Option[Map[String, Any]]] = {
......@@ -78,7 +83,7 @@ class SummaryDb(db: Database) {
name.map(lib.name === _) // not a condition as `criteriaRoast` evaluates to `None`
).collect({ case Some(criteria) => criteria }).reduceLeftOption(_ && _).getOrElse(true: Rep[Boolean])
}
db.run(q.map(x => (x.id, x.name, x.runId, x.sampleId)).result)
db.run(q.result)
}
def getLibraryTags(libId: Int): Future[Option[Map[String, Any]]] = {
......@@ -184,5 +189,14 @@ class SummaryDb(db: Database) {
val q = l.foldLeft(settings.subquery)((a,b) => b(a))
db.run(q.map(_.content).result).map(_.headOption.map(ConfigUtils.jsonTextToMap))
}
}
object SummaryDb {
def openSqliteSummary(file: File): SummaryDb = {
val exist = file.exists()
val db = Database.forURL(s"jdbc:sqlite:${file.getAbsolutePath}", driver = "org.sqlite.JDBC")
val s = new SummaryDb(db)
if (!exist) s.createTables()
s
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment