Commit 23f8f923 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #150 from biopet/fix-BIOPET-492

First implementation of PIM in biopet
parents 6edd8e7b 8d5abea6
......@@ -16,10 +16,14 @@ package nl.lumc.sasc.biopet.core
import java.io.{File, PrintWriter}
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import nl.lumc.sasc.biopet.core.pipelinestatus.{Deps, PipelineStatus}
import nl.lumc.sasc.biopet.core.summary.WriteSummary
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.{ConfigUtils, Logging}
import org.broadinstitute.gatk.queue.function.{CommandLineFunction, QFunction}
import play.api.libs.ws.ahc.AhcWSClient
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
......@@ -47,7 +51,7 @@ object WriteDependencies extends Logging with Configurable {
* This method will generate a json file where information about job and file dependencies are stored
*
* @param functions This should be all functions that are given to the graph of Queue
* @param outputDir
* @param outputDir Output where the files will be placed
*/
def writeDependencies(functions: Seq[QFunction], outputDir: File): Unit = {
outputDir.mkdirs()
......@@ -59,17 +63,17 @@ object WriteDependencies extends Logging with Configurable {
case class QueueFile(file: File) {
private val inputJobs: ListBuffer[QFunction] = ListBuffer()
def addInputJob(function: QFunction) = inputJobs += function
def inputJobNames = inputJobs.toList.map(functionNames)
def addInputJob(function: QFunction): Unit = inputJobs += function
def inputJobNames: List[String] = inputJobs.toList.map(functionNames)
private val outputJobs: ListBuffer[QFunction] = ListBuffer()
def addOutputJob(function: QFunction) = {
def addOutputJob(function: QFunction): Unit = {
if (outputJobs.nonEmpty) logger.warn(s"File '$file' is found as output of multiple jobs")
outputJobs += function
}
def outputJobNames = outputJobs.toList.map(functionNames)
def outputJobNames: List[String] = outputJobs.toList.map(functionNames)
def getMap = {
def getMap: Map[String, Any] = {
val fileExist = file.exists()
if (!fileExist && outputJobs.isEmpty) {
if (errorOnMissingInput) Logging.addError(s"Input file does not exist: $file")
......@@ -85,7 +89,7 @@ object WriteDependencies extends Logging with Configurable {
)
}
def isIntermediate = outputJobs.exists(_.isIntermediate)
def isIntermediate: Boolean = outputJobs.exists(_.isIntermediate)
}
val files: mutable.Map[File, QueueFile] = mutable.Map()
......@@ -161,8 +165,15 @@ object WriteDependencies extends Logging with Configurable {
.spaces2)
writer.close()
PipelineStatus.writePipelineStatus(PipelineStatus.readDepsFile(outputFile), outputDir)
logger.info("done calculating dependencies")
implicit lazy val system = ActorSystem()
implicit lazy val materializer = ActorMaterializer()
implicit lazy val ws = AhcWSClient()
PipelineStatus.writePipelineStatus(Deps.readDepsFile(outputFile), outputDir)
ws.close()
system.terminate()
}
}
package nl.lumc.sasc.biopet.core.pipelinestatus
import java.io.File
import nl.lumc.sasc.biopet.core.pipelinestatus.PipelineStatus.logger
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.pim._
import play.api.libs.ws.WSResponse
import play.api.libs.ws.ahc.AhcWSClient
import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global
/**
* This class can store the deps.json from a pipeline that stores all jobs and files and the connections
*
* Created by pjvanthof on 24/06/2017.
*/
case class Deps(jobs: Map[String, Job], files: Array[Map[String, Any]]) {
/**
* This method will compress the graph by combining all common job names
* @param main When set true the non main jobs will be skipped in the graph
* @return List of dependencies
*/
def compressOnType(main: Boolean = false): Map[String, List[String]] = {
(for ((_, job) <- jobs.toSet if !main || job.mainJob) yield {
job.name -> (if (main) getMainDependencies(job.name).map(Job.compressedName(_)._1)
else job.dependsOnJobs.map(Job.compressedName(_)._1))
}).groupBy(x => Job.compressedName(x._1)._1)
.map(x => x._1 -> x._2.flatMap(_._2).toList.distinct)
}
/** this will return all main dependencies */
def getMainDeps: Map[String, List[String]] = {
jobs.filter(_._2.mainJob).map(x => x._1 -> getMainDependencies(x._1))
}
/**
* This will return for a single job the main dependencies.
* When a job depend on a non main job it will take the dependencies from that job till it finds a main dependency
*/
def getMainDependencies(jobName: String): List[String] = {
val job = this.jobs(jobName)
val dependencies = job.dependsOnJobs match {
case l: List[_] => l.map(_.toString)
}
dependencies.flatMap { dep =>
if (this.jobs(dep).mainJob) List(dep)
else getMainDependencies(dep)
}.distinct
}
/** This publish the graph to a pim host */
def publishCompressedGraphToPim(host: String, runId: String)(
implicit ws: AhcWSClient): Future[WSResponse] = {
val links: List[Link] = this
.compressOnType()
.flatMap(x => x._2.map(y => Link("link", y, "output", x._1, "input", "test")))
.toList
val run = Run(
runId,
Network("graph",
Nil,
this
.compressOnType()
.map(
x =>
Node(x._1,
"root",
List(Port("input", "input")),
List(Port("output", "output")),
"test"))
.toList,
links),
"Biopet pipeline",
"biopet"
)
val request = ws
.url(s"$host/api/runs/")
.withHeaders("Accept" -> "application/json", "Content-Type" -> "application/json")
.put(run.toString)
request.onFailure { case e => logger.warn("Post workflow did fail", e) }
request.onSuccess {
case r if r.status == 200 =>
logger.debug(r)
case r => logger.warn(r)
}
request
}
}
object Deps {
/** This will read a deps.json and returns it as a [[Deps]] class */
def readDepsFile(depsFile: File): Deps = {
val deps = ConfigUtils.fileToConfigMap(depsFile)
val jobs =
ConfigUtils.any2map(deps("jobs")).map(x => x._1 -> new Job(x._1, ConfigUtils.any2map(x._2)))
val files = ConfigUtils.any2list(deps("files")).map(x => ConfigUtils.any2map(x)).toArray
Deps(jobs, files)
}
}
package nl.lumc.sasc.biopet.core.pipelinestatus
import java.io.File
import nl.lumc.sasc.biopet.utils.ConfigUtils
import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global
import scala.util.matching.Regex
/**
* This class can store a single job from a deps.json
*
* Created by pjvanthof on 24/06/2017.
*/
class Job(val name: String, map: Map[String, Any]) {
/** When true this job was done at the moment of the deps.json creation */
def doneAtStart: Boolean = ConfigUtils.any2boolean(map("done_at_start"))
/** If one of this files exist the job is marked as failed */
def failFiles: List[File] = ConfigUtils.any2fileList(map("fail_files"))
/** If all of this files exist the job is marked as done */
def doneFiles: List[File] = ConfigUtils.any2fileList(map("done_files"))
/** Returns a list of jobs that depends on this job */
def outputUsedByJobs: List[String] = ConfigUtils.any2stringList(map("output_used_by_jobs"))
/** Returns a list of job where this job depends on */
def dependsOnJobs: List[String] = ConfigUtils.any2stringList(map("depends_on_jobs"))
/** Location of the stdout file of this job */
def stdoutFile = new File(ConfigUtils.any2string(map("stdout_file")))
/** All output files of this job */
def outputsFiles: List[File] = ConfigUtils.any2fileList(map("outputs"))
/** All input files of this job */
def inputFiles: List[File] = ConfigUtils.any2fileList(map("inputs"))
/** When true this job is marked as a main job in the graph */
def mainJob: Boolean = ConfigUtils.any2boolean(map("main_job"))
/** When true this job is marked as a intermediate job */
def intermediate: Boolean = ConfigUtils.any2boolean(map("intermediate"))
/** Return a [[Future[Boolean]] to check if the job is done */
def isDone: Future[Boolean] = Future { doneFiles.forall(_.exists()) }
/** Return a [[Future[Boolean]] to check if the job is failed */
def isFailed: Future[Boolean] = Future { failFiles.exists(_.exists()) }
/** Returns the compressed name of this job */
def compressedName: (String, Int) = Job.compressedName(name)
}
object Job {
val numberRegex: Regex = """(.*)_(\d*)$""".r
/** This splits a job name from it's id */
def compressedName(jobName: String): (String, Int) = jobName match {
case Job.numberRegex(name, number) => (name, number.toInt)
}
}
......@@ -21,6 +21,7 @@ import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import PipelineStatusTest.Status
import nl.lumc.sasc.biopet.core.pipelinestatus.{Deps, PipelineStatus}
import nl.lumc.sasc.biopet.utils.IoUtils._
import org.apache.commons.io.FileUtils
......@@ -103,7 +104,7 @@ class PipelineStatusTest extends TestNGSuite with Matchers {
val depsFile = File.createTempFile("deps.", ".json")
depsFile.deleteOnExit()
PipelineStatusTest.writeDeps(depsFile, new File("/tmp"))
val deps = PipelineStatus.readDepsFile(depsFile)
val deps = Deps.readDepsFile(depsFile)
deps.jobs.size shouldBe 3
deps.files.length shouldBe 5
......
......@@ -136,6 +136,8 @@
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.1</version>
<configuration>
<shadedArtifactAttached>true</shadedArtifactAttached>
<shadedClassifierName>allinone</shadedClassifierName>
<!--suppress MavenModelInspection -->
<finalName>Biopet-${project.version}-${git.commit.id.abbrev}</finalName>
<transformers>
......@@ -148,6 +150,9 @@
<X-Compile-Target-JDK>${maven.compile.target}</X-Compile-Target-JDK>
</manifestEntries>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>reference.conf</resource>
</transformer>
</transformers>
<filters>
</filters>
......
......@@ -14,7 +14,7 @@
*/
package nl.lumc.sasc.biopet
import nl.lumc.sasc.biopet.core.PipelineStatus
import nl.lumc.sasc.biopet.core.pipelinestatus.PipelineStatus
import nl.lumc.sasc.biopet.utils.{BiopetExecutable, MainCommand}
object BiopetExecutableMain extends BiopetExecutable {
......
......@@ -30,6 +30,11 @@
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>com.typesafe.play</groupId>
<artifactId>play-ws_2.11</artifactId>
<version>2.5.15</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
......
# In this file you can override any option defined in the reference files.
# Copy in parts of the reference files and modify as you please.
akka {
# Loggers to register at boot time (akka.event.Logging$DefaultLogger logs
# to STDOUT)
loggers = ["akka.event.slf4j.Slf4jLogger"]
# Log level used by the configured loggers (see "loggers") as soon
# as they have been started; before that, see "stdout-loglevel"
# Options: OFF, ERROR, WARNING, INFO, DEBUG
loglevel = "INFO"
# Log level for the very basic logger activated during ActorSystem startup.
# This logger prints the log messages to stdout (System.out).
# Options: OFF, ERROR, WARNING, INFO, DEBUG
stdout-loglevel = "INFO"
# Filter of log events that is used by the LoggingAdapter before
# publishing log events to the eventStream.
logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
actor {
provider = "local"
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.utils.pim
import argonaut.Json
import nl.lumc.sasc.biopet.utils.ConfigUtils
/**
* Created by pjvanthof on 17/03/2017.
*/
trait PimClasses {
def toMap: Map[String, Any]
def toJson: Json = ConfigUtils.mapToJson(toMap)
override def toString: String = toJson.nospaces
}
case class Run(id: String,
network: Network,
description: String,
workflowEngine: String,
collapse: Boolean = false)
extends PimClasses {
def toMap = Map(
"id" -> id,
"network" -> network.toMap,
"description" -> description,
"workflow_engine" -> workflowEngine,
"collapse" -> collapse
)
}
case class Network(description: String, groups: List[Group], nodes: List[Node], links: List[Link])
extends PimClasses {
def toMap = Map(
"description" -> description,
"groups" -> groups.map(_.toMap),
"nodes" -> nodes.map(_.toMap),
"links" -> links.map(_.toMap)
)
}
case class Group(description: String, id: String, parentGroup: String) extends PimClasses {
def toMap = Map(
"id" -> id,
"description" -> description,
"parent_group" -> parentGroup
)
}
case class Node(id: String,
groupId: String,
inPorts: List[Port],
outPorts: List[Port],
nodeType: String)
extends PimClasses {
def toMap = Map(
"id" -> id,
"group_id" -> groupId,
"in_ports" -> inPorts.map(_.toMap),
"out_ports" -> outPorts.map(_.toMap),
"type" -> nodeType
)
}
case class Link(id: String,
fromNode: String,
fromPort: String,
toNode: String,
toPort: String,
linkType: String)
extends PimClasses {
def toMap = Map(
"id" -> id,
"from_node" -> fromNode,
"from_port" -> fromPort,
"to_node" -> toNode,
"to_port" -> toPort,
"type" -> linkType
)
}
case class Port(id: String, description: String) extends PimClasses {
def toMap = Map(
"id" -> id,
"description" -> description
)
}
case class Job(id: String,
nodeId: String,
runId: String,
sampleId: String,
status: JobStatus.Value)
extends PimClasses {
def toMap = Map(
"id" -> id,
"node_id" -> nodeId,
"run_id" -> runId,
"sample_id" -> sampleId,
"status" -> status.toString
)
}
object JobStatus extends Enumeration {
val idle, running, success, failed = Value
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment