Commit cb7a051c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'release-0.7.0' into 'master'

Release 0.7.0



See merge request !449
parents 23c1ccea bf548a42
......@@ -8,18 +8,17 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.core
import java.io.{ PrintWriter, File, FileInputStream }
import java.io.{ File, FileInputStream, PrintWriter }
import java.security.MessageDigest
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output }
import org.broadinstitute.gatk.utils.runtime.ProcessSettings
import org.ggf.drmaa.JobTemplate
......@@ -30,14 +29,11 @@ import scala.collection.JavaConversions._
/** Biopet command line trait to auto check executable and cluster values */
trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
analysisName = configName
analysisName = configNamespace
@Input(doc = "deps", required = false)
var deps: List[File] = Nil
@Output
var outputFiles: List[File] = Nil
var executable: String = _
/** This is the default shell for drmaa jobs */
......@@ -50,9 +46,16 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
writer.println("set -eubf")
writer.println("set -o pipefail")
lines.foreach(writer.println)
jobDelayTime.foreach(x => writer.println(s"sleep $x"))
writer.close()
}
/**
* This value is used to let you job wait a x number of second after it finish.
* This is ionly used when having storage delay issues
*/
var jobDelayTime: Option[Int] = config("job_delay_time")
// This overrides the default "sh" from queue. For Biopet the default is "bash"
updateJobRun = {
case jt: JobTemplate =>
......@@ -73,6 +76,15 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
def beforeGraph() {}
override def freezeFieldValues() {
this match {
case r: Reference =>
if (r.dictRequired) deps :+= r.referenceDict
if (r.faiRequired) deps :+= r.referenceFai
deps = deps.distinct
case _ =>
}
preProcessExecutable()
beforeGraph()
internalBeforeGraph()
......@@ -165,6 +177,27 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
this
}
/**
* This method can handle args that have multiple args for 1 arg name
* @param argName Name of the arg like "-h" or "--help"
* @param values Values for this arg
* @param groupSize Values must come in groups of x number, default is 1
* @param minGroups Minimal groups that are required, default is 0, when 0 the method return en empty string
* @param maxGroups Max number of groups that can be filled here
* @return Command part of this arg
*/
def multiArg(argName: String, values: Iterable[Any], groupSize: Int = 1, minGroups: Int = 0, maxGroups: Int = 0): String = {
if (values.size % groupSize != 0)
Logging.addError(s"Arg '${argName}' values: '${values}' does not fit to a groupSize of ${groupSize}")
val groups = values.size / groupSize
if (groups < minGroups)
Logging.addError(s"Args '${argName}' need atleast $minGroups with size $groupSize")
if (maxGroups > 0 && groups > maxGroups)
Logging.addError(s"Args '${argName}' may only have $maxGroups with size $groupSize")
if (values.nonEmpty) required(argName) + values.map(required(_)).mkString
else ""
}
@Output(required = false)
private[core] var stdoutFile: Option[File] = None
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......@@ -18,6 +17,7 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Output
/**
* Created by pjvan_thof on 9/29/15.
......@@ -46,6 +46,9 @@ class BiopetFifoPipe(val root: Configurable,
) yield outputFile
}
@Output
private var outputFiles: List[File] = Nil
override def beforeGraph(): Unit = {
val outputs: Map[BiopetCommandLineFunction, Seq[File]] = try {
commands.map(x => x -> x.outputs).toMap
......@@ -69,6 +72,8 @@ class BiopetFifoPipe(val root: Configurable,
_pipesJobs :::= commands
_pipesJobs = _pipesJobs.distinct
analysisName = commands.map(_.analysisName).mkString("_")
}
override def beforeCmd(): Unit = {
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......@@ -19,12 +18,13 @@ import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction
/** Biopet commandline class for java based programs */
trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetCommandLineFunction {
executable = config("java", default = "java", submodule = "java", freeVar = false)
executable = config("java", default = "java", namespace = "java", freeVar = false)
javaGCThreads = config("java_gc_threads", default = 4)
javaGCHeapFreeLimit = config("java_gc_heap_freelimit", default = 10)
javaGCTimeLimit = config("java_gc_timelimit", default = 50)
override def defaultResidentFactor: Double = 1.5
override def defaultVmemFactor: Double = 2.0
/** Constructs java opts, this adds scala threads */
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......@@ -27,7 +26,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
*/
class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCommandLineFunction {
@Input
@Input(required = false)
lazy val input: List[File] = try {
commands.flatMap(_.inputs)
} catch {
......@@ -74,7 +73,7 @@ class BiopetPipe(val commands: List[BiopetCommandLineFunction]) extends BiopetCo
override def defaultThreads = 0
val root: Configurable = commands.head.root
override def configName = commands.map(_.configName).mkString("-")
override def configNamespace = commands.map(_.configNamespace).mkString("-")
def cmdLine: String = {
"(" + commands.head.cmdLine + (if (commands.head.stdinFile.isDefined) {
" < " + required(commands.head.stdinFile.map(_.getAbsoluteFile))
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......@@ -31,7 +30,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
@Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
val configfiles: List[File] = Nil
@Argument(doc = "Config values, value should be formatted like 'key=value' or 'path:path:key=value'", fullName = "config_value", shortName = "cv", required = false)
@Argument(doc = "Config values, value should be formatted like 'key=value' or 'namespace:namespace:key=value'", fullName = "config_value", shortName = "cv", required = false)
val configValues: List[String] = Nil
/** Output directory of pipeline */
......@@ -70,11 +69,15 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
outputDir = outputDir.getAbsoluteFile
init()
biopetScript()
logger.info("Biopet script done")
if (disableScatter) for (function <- functions) function match {
if (disableScatter) {
logger.info("Disable scatters")
for (function <- functions) function match {
case f: ScatterGatherableFunction => f.scatterCount = 1
case _ =>
}
}
this match {
case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
......@@ -82,6 +85,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
case _ => reportClass.foreach(add(_))
}
logger.info("Running pre commands")
for (function <- functions) function match {
case f: BiopetCommandLineFunction =>
f.preProcessExecutable()
......@@ -95,7 +99,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
globalConfig.writeReport(qSettings.runName, new File(outputDir, ".log/" + qSettings.runName))
else Logging.addError("Parent of output dir: '" + outputDir.getParent + "' is not writeable, output directory cannot be created")
inputFiles.foreach { i =>
logger.info("Checking input files")
inputFiles.par.foreach { i =>
if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
......@@ -112,6 +117,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
if (logger.isDebugEnabled) WriteDependencies.writeDependencies(functions, new File(outputDir, s".log/${qSettings.runName}.deps.json"))
Logging.checkErrors()
logger.info("Script complete without errors")
}
/** Get implemented from org.broadinstitute.gatk.queue.QScript */
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......@@ -32,12 +31,16 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
t
}
val multiplyVmemThreads: Boolean = config("multiply_vmem_threads", default = true)
val multiplyRssThreads: Boolean = config("multiply_rss_threads", default = true)
var vmem: Option[String] = config("vmem")
def defaultCoreMemory: Double = 2.0
def defaultVmemFactor: Double = 1.4
def defaultResidentFactor: Double = 1.2
var vmemFactor: Double = config("vmem_factor", default = defaultVmemFactor)
var residentFactor: Double = config("resident_factor", default = 1.2)
var residentFactor: Double = config("resident_factor", default = defaultResidentFactor)
private var _coreMemory: Double = 2.0
def coreMemory = _coreMemory
......@@ -85,10 +88,11 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
else memoryLimit = Some(_coreMemory * threads)
if (config.contains("resident_limit")) residentLimit = config("resident_limit")
else residentLimit = Some((_coreMemory + (0.5 * retryMultipler)) * residentFactor)
else residentLimit = Some((_coreMemory + (0.5 * retryMultipler)) * residentFactor * (if (multiplyRssThreads) threads else 1))
if (!config.contains("vmem")) vmem = Some((_coreMemory * (vmemFactor + (0.5 * retryMultipler))) + "G")
jobName = configName + ":" + (if (firstOutput != null) firstOutput.getName else jobOutputFile)
if (!config.contains("vmem"))
vmem = Some((_coreMemory * (vmemFactor + (0.5 * retryMultipler)) * (if (multiplyVmemThreads) threads else 1)) + "G")
jobName = configNamespace + ":" + (if (firstOutput != null) firstOutput.getName else jobOutputFile)
}
override def setupRetry(): Unit = {
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......@@ -25,10 +24,10 @@ import org.broadinstitute.gatk.queue.QScript
/** This trait creates a structured way of use multisample pipelines */
trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
@Argument(doc = "Only Sample", shortName = "s", required = false, fullName = "sample")
@Argument(doc = "Only Process This Sample", shortName = "s", required = false, fullName = "sample")
private[core] val onlySamples: List[String] = Nil
require(globalConfig.map.contains("samples"), "No Samples found in config")
if (!globalConfig.map.contains("samples")) Logging.addError("No Samples found in config")
/** Sample class with basic functions build in */
abstract class AbstractSample(val sampleId: String) extends Summarizable { sample =>
......@@ -70,7 +69,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
def libDir = new File(sampleDir, "lib_" + libId)
lazy val libTags: Map[String, Any] =
config("tags", default = Map(), freeVar = false, submodule = libId, path = List("samples", sampleId, "libraries"))
config("tags", default = Map(), freeVar = false, namespace = libId, path = List("samples", sampleId, "libraries"))
def sampleId = sample.sampleId
......@@ -91,7 +90,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
val libraries: Map[String, Library] = libIds.map(id => id -> makeLibrary(id)).toMap
lazy val sampleTags: Map[String, Any] =
config("tags", default = Map(), freeVar = false, submodule = sampleId, path = List("samples"))
config("tags", default = Map(), freeVar = false, namespace = sampleId, path = List("samples"))
lazy val gender = {
val g: Option[String] = sampleTags.get("gender").map(_.toString)
......@@ -191,7 +190,7 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
val samples: Map[String, Sample] = sampleIds.map(id => id -> makeSample(id)).toMap
/** Returns a list of all sampleIDs */
protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet
protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map.getOrElse("samples", Map())).keySet
protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r
protected lazy val nameError = "has an invalid name. " +
......@@ -202,8 +201,17 @@ trait MultiSampleQScript extends SummaryQScript { qscript: QScript =>
/** Runs addAndTrackJobs method for each sample */
final def addSamplesJobs() {
logger.info(s"Starting script for ${samples.size} samples")
var count = 0
if (onlySamples.isEmpty || samples.forall(x => onlySamples.contains(x._1))) {
samples.foreach { case (sampleId, sample) => sample.addAndTrackJobs() }
samples.foreach {
case (sampleId, sample) =>
logger.info(s"Starting script sample '$sampleId'")
sample.addAndTrackJobs()
count += 1
logger.info(s"Finish script for '$sampleId', samples done: $count / ${samples.size}")
}
logger.info("Starting script for multisample jobs")
addMultiSampleJobs()
} else onlySamples.foreach(sampleId => samples.get(sampleId) match {
case Some(sample) => sample.addAndTrackJobs()
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......@@ -61,25 +60,25 @@ trait Reference extends Configurable {
}
/** When set override this on true the pipeline with raise an exception when fai index is not found */
protected def faiRequired = false
def faiRequired = false
/** When set override this on true the pipeline with raise an exception when dict index is not found */
protected def dictRequired = this.isInstanceOf[Summarizable] || this.isInstanceOf[SummaryQScript]
def dictRequired = this.isInstanceOf[Summarizable] || this.isInstanceOf[SummaryQScript]
/** Returns the dict file belonging to the fasta file */
def referenceDict = new File(referenceFasta().getAbsolutePath
.stripSuffix(".fa")
.stripSuffix(".fasta")
.stripSuffix(".fna") + ".dict")
/** Returns the fai file belonging to the fasta file */
def referenceFai = new File(referenceFasta().getAbsolutePath + ".fai")
/** Returns the fasta file */
def referenceFasta(): File = {
val file: File = config("reference_fasta")
if (config.contains("reference_fasta")) {
checkFasta(file)
val dict = new File(file.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta").stripSuffix(".fna") + ".dict")
val fai = new File(file.getAbsolutePath + ".fai")
this match {
case c: BiopetCommandLineFunction => c.deps :::= dict :: fai :: Nil
case _ =>
}
} else {
if (config.contains("reference_fasta")) checkFasta(file)
else {
val defaults = ConfigUtils.mergeMaps(this.defaults, this.internalDefaults)
def getReferences(map: Map[String, Any]): Set[(String, String)] = (for (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment