Commit d67cd69b authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'develop' into feature-varda

parents 144d5836 98bc93d9
......@@ -14,8 +14,8 @@ class ApplyRecalibration(val root: Configurable) extends org.broadinstitute.gatk
override val defaultThreads = 3
override def beforeGraph() {
super.beforeGraph()
override def freezeFieldValues() {
super.freezeFieldValues()
nt = Option(getThreads)
memoryLimit = Option(nt.getOrElse(1) * 2)
......
......@@ -20,7 +20,6 @@ object BaseRecalibrator {
val br = new BaseRecalibrator(root)
br.input_file :+= input
br.out = output
br.beforeGraph()
br
}
}
\ No newline at end of file
......@@ -5,17 +5,19 @@
*/
package nl.lumc.sasc.biopet.extensions.gatk.broad
import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.core.{ CommandLineResources, Reference, BiopetJavaCommandLineFunction }
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport
import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction with Reference {
trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference {
memoryLimit = Option(3)
override def subPath = "gatk" :: super.subPath
jarFile = config("gatk_jar")
reference_sequence = referenceFasta()
override def defaultCoreMemory = 4.0
override def faiRequired = true
......@@ -33,14 +35,9 @@ trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction wit
if (config.contains("gatk_key")) gatk_key = config("gatk_key")
if (config.contains("pedigree")) pedigree = config("pedigree")
override def versionRegex = """(.*)""".r
override def versionExitcode = List(0, 1)
override def versionCommand = executable + " -jar " + jarFile + " -version"
override def getVersion = super.getVersion.collect { case v => "Gatk " + v }
//override def versionRegex = """(.*)""".r
//override def versionExitcode = List(0, 1)
//override def versionCommand = executable + " -jar " + jarFile + " -version"
override def beforeGraph(): Unit = {
super.beforeGraph()
if (reference_sequence == null) reference_sequence = referenceFasta()
}
//override def getVersion = super.getVersion.collect { case v => "Gatk " + v }
}
......@@ -38,11 +38,11 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu
stand_emit_conf = config("stand_emit_conf", default = 0)
}
override def beforeGraph() {
super.beforeGraph()
override def freezeFieldValues() {
super.freezeFieldValues()
if (bamOutput != null && nct.getOrElse(1) > 1) {
threads = 1
logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug")
nCoresRequest = Some(1)
}
nct = Some(getThreads)
memoryLimit = Option(memoryLimit.getOrElse(2.0) * nct.getOrElse(1))
......
......@@ -28,8 +28,8 @@ class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.q
override val defaultThreads = 1
override def beforeGraph() {
super.beforeGraph()
override def freezeFieldValues() {
super.freezeFieldValues()
genotype_likelihoods_model = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH
nct = Some(getThreads)
......
......@@ -10,9 +10,6 @@ import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
class VariantEval(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantEval with GatkGeneral {
override def beforeGraph() {
super.beforeGraph()
}
}
object VariantEval {
......@@ -22,7 +19,6 @@ object VariantEval {
vareval.eval = Seq(sample)
vareval.comp = Seq(compareWith)
vareval.out = output
vareval.beforeGraph()
vareval
}
......@@ -36,7 +32,6 @@ object VariantEval {
vareval.ST = ST
vareval.noEV = true
vareval.EV = EV
vareval.beforeGraph()
vareval
}
......
......@@ -153,11 +153,11 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
scriptOutput.rawVcfFile = m2v.output
val vcfFilter = new VcfFilter(this) {
override def defaults = ConfigUtils.mergeMaps(Map("min_sample_depth" -> 8,
override def defaults = Map("min_sample_depth" -> 8,
"min_alternate_depth" -> 2,
"min_samples_pass" -> 1,
"filter_ref_calls" -> true
), super.defaults)
)
}
vcfFilter.inputVcf = m2v.output
vcfFilter.outputVcf = swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz")
......
......@@ -74,8 +74,6 @@ class ShivaTest extends TestNGSuite with Matchers {
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0)
pipeline.functions.count(_.isInstanceOf[BwaMem]) shouldBe numberLibs
pipeline.functions.count(_.isInstanceOf[SortSam]) shouldBe numberLibs
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample3) 1 else 0))
// Gatk preprocess
......
......@@ -35,10 +35,10 @@ trait BastyTrait extends MultiSampleQScript {
def variantcallers = List("freebayes")
override def defaults = ConfigUtils.mergeMaps(Map(
override def defaults = Map(
"ploidy" -> 1,
"variantcallers" -> variantcallers
), super.defaults)
)
lazy val shiva: ShivaTrait = new Shiva(qscript)
......@@ -137,7 +137,6 @@ trait BastyTrait extends MultiSampleQScript {
val numBoot = config("boot_runs", default = 100, submodule = "raxml").asInt
val bootList = for (t <- 0 until numBoot) yield {
val raxmlBoot = new Raxml(this)
raxmlBoot.threads = 1
raxmlBoot.input = variants
raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBoot.p = Some(seed)
......
......@@ -15,25 +15,276 @@
*/
package nl.lumc.sasc.biopet.core
/**
* This class is for commandline programs where the executable is a non JVM based program
*/
abstract class BiopetCommandLineFunction extends BiopetCommandLineFunctionTrait {
import java.io.{ PrintWriter, File, FileInputStream }
import java.security.MessageDigest
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import org.broadinstitute.gatk.utils.runtime.ProcessSettings
import org.ggf.drmaa.JobTemplate
import scala.collection.mutable
import scala.io.Source
import scala.sys.process.{ Process, ProcessLogger }
import scala.util.matching.Regex
import scala.collection.JavaConversions._
/** Biopet command line trait to auto check executable and cluster values */
trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
analysisName = configName
@Input(doc = "deps", required = false)
var deps: List[File] = Nil
@Output
var outputFiles: List[File] = Nil
var executable: String = _
/** This is the default shell for drmaa jobs */
def defaultRemoteCommand = "bash"
private val remoteCommand: String = config("remote_command", default = defaultRemoteCommand)
private def changeScript(file: File): Unit = {
val lines = Source.fromFile(file).getLines().toList
val writer = new PrintWriter(file)
writer.println("set -eubf")
writer.println("set -o pipefail")
lines.foreach(writer.println(_))
writer.close()
}
// This overrides the default "sh" from queue. For Biopet the default is "bash"
updateJobRun = {
case jt: JobTemplate => {
changeScript(new File(jt.getArgs.head.toString))
jt.setRemoteCommand(remoteCommand)
}
case ps: ProcessSettings => {
changeScript(new File(ps.getCommand.tail.head))
ps.setCommand(Array(remoteCommand) ++ ps.getCommand.tail)
}
}
/**
* Can override this method. This is executed just before the job is ready to run.
* Can check on run time files from pipeline here
*/
def beforeCmd() {}
/** Can override this method. This is executed after the script is done en queue starts to generate the graph */
def beforeGraph() {}
override def freezeFieldValues() {
preProcessExecutable()
beforeGraph()
internalBeforeGraph()
super.freezeFieldValues()
}
/** Set default output file, threads and vmem for current job */
final def internalBeforeGraph(): Unit = {
pipesJobs.foreach(_.beforeGraph())
pipesJobs.foreach(_.internalBeforeGraph())
}
/** can override this value is executable may not be converted to CanonicalPath */
val executableToCanonicalPath = true
/**
* Checks executable. Follow full CanonicalPath, checks if it is existing and do a md5sum on it to store in job report
*/
protected[core] def preProcessExecutable() {
if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) {
if (executable != null) {
if (!BiopetCommandLineFunction.executableCache.contains(executable)) {
try {
val oldExecutable = executable
val buffer = new StringBuffer()
val cmd = Seq("which", executable)
val process = Process(cmd).run(ProcessLogger(buffer.append(_)))
if (process.exitValue == 0) {
executable = buffer.toString
val file = new File(executable)
if (executableToCanonicalPath) executable = file.getCanonicalPath
else executable = file.getAbsolutePath
} else Logging.addError("executable: '" + executable + "' not found, please check config")
BiopetCommandLineFunction.executableCache += oldExecutable -> executable
BiopetCommandLineFunction.executableCache += executable -> executable
} catch {
case ioe: java.io.IOException =>
logger.warn(s"Could not use 'which' on '$executable', check on executable skipped: " + ioe)
}
} else executable = BiopetCommandLineFunction.executableCache(executable)
if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) {
if (new File(executable).exists()) {
val is = new FileInputStream(executable)
val cnt = is.available
val bytes = Array.ofDim[Byte](cnt)
is.read(bytes)
is.close()
val temp = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase
BiopetCommandLineFunction.executableMd5Cache += executable -> temp
} else BiopetCommandLineFunction.executableMd5Cache += executable -> "file_does_not_exist"
}
}
}
val md5 = BiopetCommandLineFunction.executableMd5Cache.get(executable)
addJobReportBinding("md5sum_exe", md5.getOrElse("None"))
}
/** executes checkExecutable method and fill job report */
final protected def preCmdInternal() {
preProcessExecutable()
beforeCmd()
addJobReportBinding("cores", nCoresRequest match {
case Some(n) if n > 0 => n
case _ => 1
})
addJobReportBinding("version", getVersion)
}
/** Command to get version of executable */
protected[core] def versionCommand: String = null
/** Regex to get version from version command output */
protected[core] def versionRegex: Regex = null
/** Allowed exit codes for the version command */
protected[core] def versionExitcode = List(0)
/** Executes the version command */
private[core] def getVersionInternal: Option[String] = {
if (versionCommand == null || versionRegex == null) None
else getVersionInternal(versionCommand, versionRegex)
}
/** Executes the version command */
private[core] def getVersionInternal(versionCommand: String, versionRegex: Regex): Option[String] = {
if (versionCommand == null || versionRegex == null) return None
val exe = new File(versionCommand.trim.split(" ")(0))
if (!exe.exists()) return None
val stdout = new StringBuffer()
val stderr = new StringBuffer()
def outputLog = "Version command: \n" + versionCommand +
"\n output log: \n stdout: \n" + stdout.toString +
"\n stderr: \n" + stderr.toString
val process = Process(versionCommand).run(ProcessLogger(stdout append _ + "\n", stderr append _ + "\n"))
if (!versionExitcode.contains(process.exitValue())) {
logger.warn("getVersion give exit code " + process.exitValue + ", version not found \n" + outputLog)
return None
}
for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) {
line match {
case versionRegex(m) => return Some(m)
case _ =>
}
}
logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog)
None
}
/** Get version from cache otherwise execute the version command */
def getVersion: Option[String] = {
if (!BiopetCommandLineFunction.executableCache.contains(executable))
preProcessExecutable()
if (!BiopetCommandLineFunction.versionCache.contains(versionCommand))
getVersionInternal match {
case Some(version) => BiopetCommandLineFunction.versionCache += versionCommand -> version
case _ =>
}
BiopetCommandLineFunction.versionCache.get(versionCommand)
}
private[core] var _inputAsStdin = false
def inputAsStdin = _inputAsStdin
private[core] var _outputAsStdout = false
def outputAsStsout = _outputAsStdout
/**
* This operator sends stdout to `that` and combine this into 1 command line function
* @param that Function that will read from stdin
* @return BiopetPipe function
*/
def |(that: BiopetCommandLineFunction): BiopetCommandLineFunction = {
this._outputAsStdout = true
that._inputAsStdin = true
this.beforeGraph()
this.internalBeforeGraph()
that.beforeGraph()
that.internalBeforeGraph()
this match {
case p: BiopetPipe => {
p.commands.last._outputAsStdout = true
new BiopetPipe(p.commands ::: that :: Nil)
}
case _ => new BiopetPipe(List(this, that))
}
}
/**
* This operator can be used to give a program a file as stdin
* @param file File that will become stdin for this program
* @return It's own class
*/
def :<:(file: File): BiopetCommandLineFunction = {
this._inputAsStdin = true
this.stdinFile = Some(file)
this
}
/**
* This operator can be used to give a program a file write it's atdout
* @param file File that will become stdout for this program
* @return It's own class
*/
def >(file: File): BiopetCommandLineFunction = {
this._outputAsStdout = true
this.stdoutFile = Some(file)
this
}
@Output(required = false)
private[core] var stdoutFile: Option[File] = None
@Input(required = false)
private[core] var stdinFile: Option[File] = None
/**
* This function needs to be implemented to define the command that is executed
* @return Command to run
*/
protected def cmdLine: String
protected[core] def cmdLine: String
/**
* implementing a final version of the commandLine from org.broadinstitute.gatk.queue.function.CommandLineFunction
* User needs to implement cmdLine instead
* @return Command to run
*/
final def commandLine: String = {
override final def commandLine: String = {
preCmdInternal()
val cmd = cmdLine
val cmd = cmdLine +
stdinFile.map(file => " < " + required(file.getAbsoluteFile)).getOrElse("") +
stdoutFile.map(file => " > " + required(file.getAbsoluteFile)).getOrElse("")
addJobReportBinding("command", cmd)
cmd
}
private[core] var pipesJobs: List[BiopetCommandLineFunction] = Nil
def addPipeJob(job: BiopetCommandLineFunction) {
pipesJobs :+= job
pipesJobs = pipesJobs.distinct
}
}
/** stores global caches */
object BiopetCommandLineFunction {
private[core] val versionCache: mutable.Map[String, String] = mutable.Map()
private[core] val executableMd5Cache: mutable.Map[String, String] = mutable.Map()
private[core] val executableCache: mutable.Map[String, String] = mutable.Map()
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.core
import java.io.{ File, FileInputStream }
import java.security.MessageDigest
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.function.CommandLineFunction
import org.broadinstitute.gatk.utils.commandline.Input
import scala.collection.mutable
import scala.sys.process.{ Process, ProcessLogger }
import scala.util.matching.Regex
/** Biopet command line trait to auto check executable and cluster values */
trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurable {
analysisName = configName
@Input(doc = "deps", required = false)
var deps: List[File] = Nil
var threads = 0
def defaultThreads = 1
var vmem: Option[String] = config("vmem")
protected def defaultCoreMemory: Double = 1.0
protected def defaultVmemFactor: Double = 1.4
var vmemFactor: Double = config("vmem_factor", default = defaultVmemFactor)
var residentFactor: Double = config("resident_factor", default = 1.2)
private var coreMemory: Double = _
var executable: String = _
/**
* Can override this method. This is executed just before the job is ready to run.
* Can check on run time files from pipeline here
*/
protected[core] def beforeCmd() {}
/** Can override this method. This is executed after the script is done en queue starts to generate the graph */
protected[core] def beforeGraph() {}
/** Set default output file, threads and vmem for current job */
override def freezeFieldValues() {
preProcessExecutable()
beforeGraph()
if (jobOutputFile == null) jobOutputFile = new File(firstOutput.getAbsoluteFile.getParent, "." + firstOutput.getName + "." + configName + ".out")
if (threads == 0) threads = getThreads(defaultThreads)
if (threads > 1) nCoresRequest = Option(threads)
coreMemory = config("core_memory", default = defaultCoreMemory).asDouble + (0.5 * retry)
if (config.contains("memory_limit")) memoryLimit = config("memory_limit")
else memoryLimit = Some(coreMemory * threads)
if (config.contains("resident_limit")) residentLimit = config("resident_limit")
else residentLimit = Some((coreMemory + (0.5 * retry)) * residentFactor)
if (!config.contains("vmem")) vmem = Some((coreMemory * (vmemFactor + (0.5 * retry))) + "G")
if (vmem.isDefined) jobResourceRequests :+= "h_vmem=" + vmem.get
jobName = configName + ":" + (if (firstOutput != null) firstOutput.getName else jobOutputFile)
super.freezeFieldValues()
}
var retry = 0
override def setupRetry(): Unit = {
super.setupRetry()
if (vmem.isDefined) jobResourceRequests = jobResourceRequests.filterNot(_.contains("h_vmem="))
logger.info("Auto raise memory on retry")
retry += 1
this.freeze()
}
/** can override this value is executable may not be converted to CanonicalPath */
val executableToCanonicalPath = true
/**
* Checks executable. Follow full CanonicalPath, checks if it is existing and do a md5sum on it to store in job report
*/
protected[core] def preProcessExecutable() {
if (!BiopetCommandLineFunctionTrait.executableMd5Cache.contains(executable)) {
try if (executable != null) {
if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable)) {
val oldExecutable = executable
val buffer = new StringBuffer()
val cmd = Seq("which", executable)
val process = Process(cmd).run(ProcessLogger(buffer.append(_)))
if (process.exitValue == 0) {