Commit 481937c7 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Adding some initial summary creation

parent dcff73d6
...@@ -17,10 +17,15 @@ package nl.lumc.sasc.biopet.core ...@@ -17,10 +17,15 @@ package nl.lumc.sasc.biopet.core
import java.io.File import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript } import nl.lumc.sasc.biopet.core.summary.{Summarizable, SummaryQScript}
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils } import nl.lumc.sasc.biopet.utils.summary.SummaryDb
import nl.lumc.sasc.biopet.utils.{ConfigUtils, Logging}
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.concurrent.ExecutionContext.Implicits.global
/** This trait creates a structured way of use multisample pipelines */ /** This trait creates a structured way of use multisample pipelines */
trait MultiSampleQScript extends SummaryQScript { qscript: QScript => trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
...@@ -252,6 +257,23 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript => ...@@ -252,6 +257,23 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
} }
sample ::: lib ::: super.configFullPath sample ::: lib ::: super.configFullPath
} }
def initSummaryDb: Unit = {
val db = SummaryDb.openSqliteSummary(summaryDbFile)
val namesOld = Await.result(db.getSamples(runId = Some(summaryRunId)).map(_.map(_.name).toSet), Duration.Inf)
for ((sampleName, sample) <- samples) {
val sampleId: Int = if (!namesOld.contains(sampleName))
Await.result(db.createSample(sampleName, summaryRunId), Duration.Inf)
else Await.result(db.getSamples(runId = Some(summaryRunId), name = Some(sampleName)).map(_.head.id), Duration.Inf)
// TODO: Add tags
val libNamesOld = Await.result(db.getLibraries(runId = summaryRunId, sampleId = sampleId).map(_.map(_.name)), Duration.Inf)
for ((libName, lib) <- sample.libraries) {
if (!libNamesOld.contains(libName)) db.createLibrary(libName, summaryRunId, sampleId)
// TODO: Add tags
}
}
db.close()
}
} }
object MultiSampleQScript { object MultiSampleQScript {
......
...@@ -14,13 +14,17 @@ ...@@ -14,13 +14,17 @@
*/ */
package nl.lumc.sasc.biopet.core.summary package nl.lumc.sasc.biopet.core.summary
import java.io.File import java.io.{File, PrintWriter}
import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.extensions.{ CheckChecksum, Md5sum } import nl.lumc.sasc.biopet.core.extensions.{CheckChecksum, Md5sum}
import nl.lumc.sasc.biopet.utils.summary.SummaryDb
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
import scala.collection.mutable import scala.collection.mutable
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.io.Source
/** /**
* This trait is used for qscript / pipelines that will produce a summary * This trait is used for qscript / pipelines that will produce a summary
...@@ -47,6 +51,11 @@ trait SummaryQScript extends BiopetQScript { qscript: QScript => ...@@ -47,6 +51,11 @@ trait SummaryQScript extends BiopetQScript { qscript: QScript =>
/** Name of summary output file */ /** Name of summary output file */
def summaryFile: File def summaryFile: File
def summaryDbFile: File = root match {
case s:SummaryQScript => new File(s.outputDir, s"${s.summaryName}.summary.db")
case _ => throw new IllegalStateException("Root should be a SummaryQScript")
}
/** /**
* Add a module to summary for this pipeline * Add a module to summary for this pipeline
* *
...@@ -93,6 +102,30 @@ trait SummaryQScript extends BiopetQScript { qscript: QScript => ...@@ -93,6 +102,30 @@ trait SummaryQScript extends BiopetQScript { qscript: QScript =>
private var addedJobs = false private var addedJobs = false
final lazy val summaryRunId: Int = {
if (runIdFile.exists()) {
val reader = Source.fromFile(runIdFile)
val id = reader.getLines().next().toInt
reader.close()
id
} else createRun
}
private def runIdFile = root match {
case s: SummaryQScript => new File(s.outputDir, s".log/summary.runid")
case _ => throw new IllegalStateException("Root should be a SummaryQscript")
}
private def createRun(): Int = {
val db = SummaryDb.openSqliteSummary(summaryDbFile)
val id = Await.result(db.createRun(summaryName, outputDir), Duration.Inf)
val writer = new PrintWriter(runIdFile)
writer.println(id)
writer.close()
id
}
/** Add jobs to qscript to execute summary, also add checksum jobs */ /** Add jobs to qscript to execute summary, also add checksum jobs */
def addSummaryJobs(): Unit = { def addSummaryJobs(): Unit = {
if (addedJobs) throw new IllegalStateException("Summary jobs for this QScript are already executed") if (addedJobs) throw new IllegalStateException("Summary jobs for this QScript are already executed")
......
...@@ -14,30 +14,30 @@ ...@@ -14,30 +14,30 @@
*/ */
package nl.lumc.sasc.biopet.core.summary package nl.lumc.sasc.biopet.core.summary
import java.io.{ File, PrintWriter } import java.io.{File, PrintWriter}
import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.LastCommitHash import nl.lumc.sasc.biopet.LastCommitHash
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction } import nl.lumc.sasc.biopet.utils.summary.SummaryDb
import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import org.broadinstitute.gatk.queue.function.{InProcessFunction, QFunction}
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.collection.mutable import scala.collection.mutable
import scala.io.Source import scala.io.Source
import slick.driver.H2Driver.api._
/** /**
* This will collect and write the summary * This will collect and write the summary
* *
* Created by pjvan_thof on 2/14/15. * Created by pjvan_thof on 2/14/15.
*/ */
class WriteSummary(val parent: Configurable) extends InProcessFunction with Configurable { class WriteSummary(val parent: SummaryQScript) extends InProcessFunction with Configurable {
this.analysisName = getClass.getSimpleName this.analysisName = getClass.getSimpleName
require(parent.isInstanceOf[SummaryQScript], "root is not a SummaryQScript")
/** To access qscript for this summary */ /** To access qscript for this summary */
val qscript = parent.asInstanceOf[SummaryQScript] val qscript = parent
@Input(doc = "deps", required = false) @Input(doc = "deps", required = false)
var deps: List[File] = Nil var deps: List[File] = Nil
...@@ -54,6 +54,12 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf ...@@ -54,6 +54,12 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf
} }
def init(): Unit = { def init(): Unit = {
if (qscript == root) {
qscript match {
case s: MultiSampleQScript => s.initSummaryDb
case _ => qscript.summaryRunId
}
} // This initialize the database
for (q <- qscript.summaryQScripts) for (q <- qscript.summaryQScripts)
deps :+= q.summaryFile deps :+= q.summaryFile
for ((_, l) <- qscript.summarizables; s <- l) { for ((_, l) <- qscript.summarizables; s <- l) {
...@@ -68,11 +74,13 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf ...@@ -68,11 +74,13 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf
} }
} }
jobOutputFile = new File(out.getParentFile, ".%s.%s.out".format(out.getName, analysisName)) jobOutputFile = new File(qscript.summaryDbFile.getParentFile, "." + qscript.summaryDbFile.getName.stripSuffix(".db") + ".out")
} }
/** Function to create summary */ /** Function to create summary */
def run(): Unit = { def run(): Unit = {
val summaryDb = SummaryDb.openSqliteSummary(qscript.summaryDbFile)
for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) { for (((name, sampleId, libraryId), summarizables) <- qscript.summarizables; summarizable <- summarizables) {
summarizable.addToQscriptSummary(qscript, name) summarizable.addToQscriptSummary(qscript, name)
} }
...@@ -161,9 +169,10 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf ...@@ -161,9 +169,10 @@ class WriteSummary(val parent: Configurable) extends InProcessFunction with Conf
"summary_creation" -> System.currentTimeMillis() "summary_creation" -> System.currentTimeMillis()
)) ))
val writer = new PrintWriter(out) // val writer = new PrintWriter(out)
writer.println(ConfigUtils.mapToJson(combinedMap).nospaces) // writer.println(ConfigUtils.mapToJson(combinedMap).nospaces)
writer.close() // writer.close()
summaryDb.close()
} }
def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = { def prefixSampleLibrary(map: Map[String, Any], sampleId: Option[String], libraryId: Option[String]): Map[String, Any] = {
......
...@@ -210,7 +210,7 @@ class WriteSummaryTest extends TestNGSuite with Matchers { ...@@ -210,7 +210,7 @@ class WriteSummaryTest extends TestNGSuite with Matchers {
} }
object WriteSummaryTest { object WriteSummaryTest {
def makeWriter(root: Configurable, c: Map[String, Any] = Map()) = new WriteSummary(root) { def makeWriter(root: SummaryQScript, c: Map[String, Any] = Map()) = new WriteSummary(root) {
override def globalConfig = new Config(c + ("exe" -> "test")) override def globalConfig = new Config(c + ("exe" -> "test"))
override def outputs = Seq() override def outputs = Seq()
override def inputs = Seq() override def inputs = Seq()
......
...@@ -22,6 +22,8 @@ trait Configurable extends ImplicitConversions { ...@@ -22,6 +22,8 @@ trait Configurable extends ImplicitConversions {
def parent: Configurable def parent: Configurable
def globalConfig: Config = if (parent != null) parent.globalConfig else Config.global def globalConfig: Config = if (parent != null) parent.globalConfig else Config.global
final def root: Configurable = if (parent == null) this else parent.root
/** suffix to the path */ /** suffix to the path */
def subPath: List[String] = Nil def subPath: List[String] = Nil
......
package nl.lumc.sasc.biopet.utils.summary package nl.lumc.sasc.biopet.utils.summary
import java.io.{Closeable, File}
import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.ConfigUtils
import slick.driver.H2Driver.api._ import slick.driver.H2Driver.api._
...@@ -12,7 +14,10 @@ import scala.concurrent.ExecutionContext.Implicits.global ...@@ -12,7 +14,10 @@ import scala.concurrent.ExecutionContext.Implicits.global
/** /**
* Created by pjvanthof on 05/02/2017. * Created by pjvanthof on 05/02/2017.
*/ */
class SummaryDb(db: Database) { class SummaryDb(db: Database) extends Closeable {
def close(): Unit = db.close()
/** This method will create all table */ /** This method will create all table */
def createTables(): Unit = { def createTables(): Unit = {
try { try {
...@@ -56,7 +61,7 @@ class SummaryDb(db: Database) { ...@@ -56,7 +61,7 @@ class SummaryDb(db: Database) {
name.map(sample.name === _) name.map(sample.name === _)
).collect({ case Some(criteria) => criteria }).reduceLeftOption(_ && _).getOrElse(true: Rep[Boolean]) ).collect({ case Some(criteria) => criteria }).reduceLeftOption(_ && _).getOrElse(true: Rep[Boolean])
} }
db.run(q.map(x => (x.id, x.runId, x.name)).result) db.run(q.result)
} }
def getSampleTags(sampleId: Int): Future[Option[Map[String, Any]]] = { def getSampleTags(sampleId: Int): Future[Option[Map[String, Any]]] = {
...@@ -78,7 +83,7 @@ class SummaryDb(db: Database) { ...@@ -78,7 +83,7 @@ class SummaryDb(db: Database) {
name.map(lib.name === _) // not a condition as `criteriaRoast` evaluates to `None` name.map(lib.name === _) // not a condition as `criteriaRoast` evaluates to `None`
).collect({ case Some(criteria) => criteria }).reduceLeftOption(_ && _).getOrElse(true: Rep[Boolean]) ).collect({ case Some(criteria) => criteria }).reduceLeftOption(_ && _).getOrElse(true: Rep[Boolean])
} }
db.run(q.map(x => (x.id, x.name, x.runId, x.sampleId)).result) db.run(q.result)
} }
def getLibraryTags(libId: Int): Future[Option[Map[String, Any]]] = { def getLibraryTags(libId: Int): Future[Option[Map[String, Any]]] = {
...@@ -184,5 +189,14 @@ class SummaryDb(db: Database) { ...@@ -184,5 +189,14 @@ class SummaryDb(db: Database) {
val q = l.foldLeft(settings.subquery)((a,b) => b(a)) val q = l.foldLeft(settings.subquery)((a,b) => b(a))
db.run(q.map(_.content).result).map(_.headOption.map(ConfigUtils.jsonTextToMap)) db.run(q.map(_.content).result).map(_.headOption.map(ConfigUtils.jsonTextToMap))
} }
} }
object SummaryDb {
def openSqliteSummary(file: File): SummaryDb = {
val exist = file.exists()
val db = Database.forURL(s"jdbc:sqlite:${file.getAbsolutePath}", driver = "org.sqlite.JDBC")
val s = new SummaryDb(db)
if (!exist) s.createTables()
s
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment