Commit 04c040d2 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'develop' of git.lumc.nl:biopet/biopet into feature-docs-0.5.0

parents 7f5d8873 911948ed
......@@ -5,11 +5,11 @@
*/
package nl.lumc.sasc.biopet.extensions.gatk.broad
import nl.lumc.sasc.biopet.core.{ CommandLineResources, Reference, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.core.{ Version, CommandLineResources, Reference, BiopetJavaCommandLineFunction }
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport
import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference {
trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference with Version {
memoryLimit = Option(3)
override def subPath = "gatk" :: super.subPath
......@@ -35,9 +35,9 @@ trait GatkGeneral extends CommandLineGATK with CommandLineResources with Referen
if (config.contains("gatk_key")) gatk_key = config("gatk_key")
if (config.contains("pedigree")) pedigree = config("pedigree")
//override def versionRegex = """(.*)""".r
//override def versionExitcode = List(0, 1)
//override def versionCommand = executable + " -jar " + jarFile + " -version"
def versionRegex = """(.*)""".r
override def versionExitcode = List(0, 1)
def versionCommand = "java" + " -jar " + jarFile + " -version"
//override def getVersion = super.getVersion.collect { case v => "Gatk " + v }
override def getVersion = super.getVersion.collect { case v => "Gatk " + v }
}
......@@ -146,59 +146,6 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
case Some(n) if n > 0 => n
case _ => 1
})
addJobReportBinding("version", getVersion)
}
/** Command to get version of executable */
protected[core] def versionCommand: String = null
/** Regex to get version from version command output */
protected[core] def versionRegex: Regex = null
/** Allowed exit codes for the version command */
protected[core] def versionExitcode = List(0)
/** Executes the version command */
private[core] def getVersionInternal: Option[String] = {
if (versionCommand == null || versionRegex == null) None
else getVersionInternal(versionCommand, versionRegex)
}
/** Executes the version command */
private[core] def getVersionInternal(versionCommand: String, versionRegex: Regex): Option[String] = {
if (versionCommand == null || versionRegex == null) return None
val exe = new File(versionCommand.trim.split(" ")(0))
if (!exe.exists()) return None
val stdout = new StringBuffer()
val stderr = new StringBuffer()
def outputLog = "Version command: \n" + versionCommand +
"\n output log: \n stdout: \n" + stdout.toString +
"\n stderr: \n" + stderr.toString
val process = Process(versionCommand).run(ProcessLogger(stdout append _ + "\n", stderr append _ + "\n"))
if (!versionExitcode.contains(process.exitValue())) {
logger.warn("getVersion give exit code " + process.exitValue + ", version not found \n" + outputLog)
return None
}
for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) {
line match {
case versionRegex(m) => return Some(m)
case _ =>
}
}
logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog)
None
}
/** Get version from cache otherwise execute the version command */
def getVersion: Option[String] = {
if (!BiopetCommandLineFunction.executableCache.contains(executable))
preProcessExecutable()
if (!BiopetCommandLineFunction.versionCache.contains(versionCommand))
getVersionInternal match {
case Some(version) => BiopetCommandLineFunction.versionCache += versionCommand -> version
case _ =>
}
BiopetCommandLineFunction.versionCache.get(versionCommand)
}
private[core] var _inputAsStdin = false
......@@ -284,7 +231,6 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
/** stores global caches */
object BiopetCommandLineFunction {
private[core] val versionCache: mutable.Map[String, String] = mutable.Map()
private[core] val executableMd5Cache: mutable.Map[String, String] = mutable.Map()
private[core] val executableCache: mutable.Map[String, String] = mutable.Map()
}
......@@ -51,6 +51,9 @@ class BiopetFifoPipe(val root: Configurable,
deps :::= inputs.values.toList.flatten.filter(!fifoFiles.contains(_))
deps = deps.distinct
pipesJobs :::= commands
pipesJobs = pipesJobs.distinct
}
override def beforeCmd(): Unit = {
......
......@@ -55,12 +55,12 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC
def getJavaVersion: Option[String] = {
if (!BiopetCommandLineFunction.executableCache.contains(executable))
preProcessExecutable()
if (!BiopetCommandLineFunction.versionCache.contains(javaVersionCommand))
getVersionInternal(javaVersionCommand, """java version "(.*)"""".r) match {
case Some(version) => BiopetCommandLineFunction.versionCache += javaVersionCommand -> version
if (!Version.versionCache.contains(javaVersionCommand))
Version.getVersionInternal(javaVersionCommand, """java version "(.*)"""".r) match {
case Some(version) => Version.versionCache += javaVersionCommand -> version
case _ =>
}
BiopetCommandLineFunction.versionCache.get(javaVersionCommand)
Version.versionCache.get(javaVersionCommand)
}
override def setupRetry(): Unit = {
......
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import org.broadinstitute.gatk.utils.commandline.Argument
/** This trait creates a structured way of use multisample pipelines */
......@@ -47,6 +47,8 @@ trait MultiSampleQScript extends SummaryQScript {
/** Adds the library jobs */
final def addAndTrackJobs(): Unit = {
if (nameRegex.findFirstIn(libId) == None)
Logging.addError(s"Library '$libId' $nameError")
currentSample = Some(sampleId)
currentLib = Some(libId)
addJobs()
......@@ -90,6 +92,8 @@ trait MultiSampleQScript extends SummaryQScript {
/** Adds sample jobs */
final def addAndTrackJobs(): Unit = {
if (nameRegex.findFirstIn(sampleId) == None)
Logging.addError(s"Sample '$sampleId' $nameError")
currentSample = Some(sampleId)
addJobs()
qscript.addSummarizable(this, "pipeline", Some(sampleId))
......@@ -129,6 +133,12 @@ trait MultiSampleQScript extends SummaryQScript {
/** Returns a list of all sampleIDs */
protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet
protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r
protected lazy val nameError = " name invalid." +
"Name must have at least 3 characters," +
"must begin and end with an alphanumeric character, " +
"and must not have whitespace."
/** Runs addAndTrackJobs method for each sample */
final def addSamplesJobs() {
if (onlySamples.isEmpty || samples.forall(x => onlySamples.contains(x._1))) {
......
......@@ -5,9 +5,12 @@ import nl.lumc.sasc.biopet.FullVersion
/**
* Created by pjvanthof on 11/09/15.
*/
trait ToolCommandFuntion extends BiopetJavaCommandLineFunction {
trait ToolCommandFunction extends BiopetJavaCommandLineFunction with Version {
def toolObject: Object
def versionCommand = ""
def versionRegex = "".r
override def getVersion = Some("Biopet " + FullVersion)
override def beforeGraph(): Unit = {
......
package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.queue.function.QFunction
import scala.collection.mutable
import scala.sys.process.{ Process, ProcessLogger }
import scala.util.matching.Regex
/**
* Created by pjvan_thof on 10/13/15.
*/
trait Version extends QFunction {
/** Command to get version of executable */
def versionCommand: String
/** Regex to get version from version command output */
def versionRegex: Regex
/** Allowed exit codes for the version command */
protected[core] def versionExitcode = List(0)
/** Executes the version command */
private[core] def getVersionInternal: Option[String] = {
if (versionCommand == null || versionRegex == null) None
else Version.getVersionInternal(versionCommand, versionRegex, versionExitcode)
}
/** Get version from cache otherwise execute the version command */
def getVersion: Option[String] = {
if (!Version.versionCache.contains(versionCommand))
getVersionInternal match {
case Some(version) => Version.versionCache += versionCommand -> version
case _ =>
}
Version.versionCache.get(versionCommand)
}
override def freezeFieldValues(): Unit = {
super.freezeFieldValues()
addJobReportBinding("version", getVersion.getOrElse("NA"))
}
}
object Version extends Logging {
private[core] val versionCache: mutable.Map[String, String] = mutable.Map()
/** Executes the version command */
private[core] def getVersionInternal(versionCommand: String,
versionRegex: Regex,
versionExitcode: List[Int] = List(0)): Option[String] = {
if (versionCache.contains(versionCommand)) return versionCache.get(versionCommand)
else if (versionCommand == null || versionRegex == null) return None
else {
val exe = new File(versionCommand.trim.split(" ")(0))
if (!exe.exists()) return None
val stdout = new StringBuffer()
val stderr = new StringBuffer()
def outputLog = "Version command: \n" + versionCommand +
"\n output log: \n stdout: \n" + stdout.toString +
"\n stderr: \n" + stderr.toString
val process = Process(versionCommand).run(ProcessLogger(stdout append _ + "\n", stderr append _ + "\n"))
if (!versionExitcode.contains(process.exitValue())) {
logger.warn("getVersion give exit code " + process.exitValue + ", version not found \n" + outputLog)
return None
}
for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) {
line match {
case versionRegex(m) => return Some(m)
case _ =>
}
}
logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog)
None
}
}
}
\ No newline at end of file
......@@ -17,12 +17,12 @@ package nl.lumc.sasc.biopet.core.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for md5sum */
class Md5sum(val root: Configurable) extends BiopetCommandLineFunction {
class Md5sum(val root: Configurable) extends BiopetCommandLineFunction with Version {
@Input(doc = "Input")
var input: File = _
......@@ -31,8 +31,8 @@ class Md5sum(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "md5sum")
override def versionRegex = """md5sum \(GNU coreutils\) (.*)""".r
override def versionCommand = executable + " --version"
def versionRegex = """md5sum \(GNU coreutils\) (.*)""".r
def versionCommand = executable + " --version"
/** return commandline to execute */
def cmdLine = required(executable) + required(input) + " > " + required(output)
......
......@@ -16,7 +16,7 @@
package nl.lumc.sasc.biopet.core.report
import java.io._
import nl.lumc.sasc.biopet.core.ToolCommandFuntion
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.summary.Summary
import nl.lumc.sasc.biopet.utils.{ ToolCommand, Logging, IoUtils }
import org.broadinstitute.gatk.utils.commandline.Input
......@@ -28,7 +28,7 @@ import scala.collection.mutable
*
* @author pjvan_thof
*/
trait ReportBuilderExtension extends ToolCommandFuntion {
trait ReportBuilderExtension extends ToolCommandFunction {
/** Report builder object */
val builder: ReportBuilder
......
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.core.summary
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, BiopetJavaCommandLineFunction, SampleLibraryTag }
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, BiopetJavaCommandLineFunction, SampleLibraryTag }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.{ LastCommitHash, Version }
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
......@@ -71,21 +71,32 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
val files = parseFiles(qscript.summaryFiles)
val settings = qscript.summarySettings
val executables: Map[String, Any] = {
(for (f <- qscript.functions if f.isInstanceOf[BiopetCommandLineFunction]) yield {
def fetchVersion(f: QFunction): Option[(String, Any)] = {
f match {
case f: BiopetJavaCommandLineFunction =>
f.configName -> Map("version" -> f.getVersion.getOrElse(None),
case f: BiopetJavaCommandLineFunction with Version =>
Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None),
"java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
"java_version" -> f.getJavaVersion,
"jar_path" -> f.jarFile)
case f: BiopetCommandLineFunction =>
f.configName -> Map("version" -> f.getVersion.getOrElse(None),
"jar_path" -> f.jarFile))
case f: BiopetCommandLineFunction with Version =>
Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None),
"md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
"path" -> f.executable)
case _ => throw new IllegalStateException("This should not be possible")
"path" -> f.executable))
case f: Configurable with Version =>
Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None)))
case _ => None
}
}).toMap
}
(
qscript.functions.flatMap(fetchVersion(_)) ++
qscript.functions
.flatMap {
case f: BiopetCommandLineFunction => f.pipesJobs
case _ => Nil
}.flatMap(fetchVersion(_))
).toMap
}
val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++
......@@ -113,7 +124,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
}).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
Map("meta" -> Map(
"last_commit_hash" -> LastCommitHash,
"pipeline_version" -> Version,
"pipeline_version" -> nl.lumc.sasc.biopet.Version,
"pipeline_name" -> qscript.summaryName,
"output_dir" -> qscript.outputDir,
"run_name" -> config("run_name", default = qSettings.runName).asString,
......
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference }
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
......@@ -26,7 +26,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
*
* Based on version 1.1.1
*/
class Bowtie(val root: Configurable) extends BiopetCommandLineFunction with Reference {
class Bowtie(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
@Input(doc = "Fastq file R1", shortName = "R1")
var R1: File = null
......@@ -40,9 +40,9 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction with Refe
var output: File = null
executable = config("exe", default = "bowtie", freeVar = false)
override def versionRegex = """.*[Vv]ersion:? (.*)""".r
def versionRegex = """.*[Vv]ersion:? (.*)""".r
override def versionExitcode = List(0, 1)
override def versionCommand = executable + " --version"
def versionCommand = executable + " --version"
override def defaultCoreMemory = 4.0
override def defaultThreads = 8
......
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -25,7 +25,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
* Wrapper for the cufflinks command line tool.
* Written based on cufflinks version v2.2.1 (md5: 07c831c4f8b4e161882731ea5694ff80)
*/
class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction with Version {
/** default executable */
executable = config("exe", default = "cufflinks")
......@@ -185,8 +185,8 @@ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction {
/** do not contact server to check for update availability [FALSE] */
var no_update_check: Boolean = config("no_update_check", default = false)
override def versionRegex = """cufflinks v(.*)""".r
override def versionCommand = executable
def versionRegex = """cufflinks v(.*)""".r
def versionCommand = executable
override def versionExitcode = List(0, 1)
def cmdLine =
......
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -25,7 +25,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
* Wrapper for the cuffquant command line tool.
* Written based on cuffquant version v2.2.1 (md5: 0765b82b11db9256f5be341a7da884d6)
*/
class Cuffquant(val root: Configurable) extends BiopetCommandLineFunction {
class Cuffquant(val root: Configurable) extends BiopetCommandLineFunction with Version {
/** default executable */
executable = config("exe", default = "cuffquant")
......@@ -117,8 +117,8 @@ class Cuffquant(val root: Configurable) extends BiopetCommandLineFunction {
/** Disable SCV correction */
var no_scv_correction: Boolean = config("no_scv_correction", default = false)
override def versionRegex = """cuffquant v(.*)""".r
override def versionCommand = executable
def versionRegex = """cuffquant v(.*)""".r
def versionCommand = executable
override def versionExitcode = List(0, 1)
def cmdLine =
......
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.summary.Summarizable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -29,7 +29,7 @@ import scala.io.Source
* Extension for cutadept
* Based on version 1.5
*/
class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable {
class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable with Version {
@Input(doc = "Input fastq file")
var fastq_input: File = _
......@@ -40,8 +40,8 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
var stats_output: File = _
executable = config("exe", default = "cutadapt")
override def versionCommand = executable + " --version"
override def versionRegex = """(.*)""".r
def versionCommand = executable + " --version"
def versionRegex = """(.*)""".r
var default_clip_mode: String = config("default_clip_mode", default = "3")
var opt_adapter: Set[String] = config("adapter", default = Nil)
......
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -25,7 +25,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
* Extension for fastqc
* Based on version 0.10.1 and 0.11.2
*/
class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
class Fastqc(val root: Configurable) extends BiopetCommandLineFunction with Version {
@Input(doc = "Contaminants", required = false)
var contaminants: Option[File] = None
......@@ -47,8 +47,8 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
var nogroup: Boolean = config("nogroup", default = false)
var extract: Boolean = config("extract", default = true)
override def versionRegex = """FastQC (.*)""".r
override def versionCommand = executable + " --version"
def versionRegex = """FastQC (.*)""".r
def versionCommand = executable + " --version"
override def defaultThreads = 4
/** Sets contaminants and adapters when not yet set */
......
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference }
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
......@@ -26,7 +26,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
*
* Created by pjvan_thof on 3/3/15.
*/
class Freebayes(val root: Configurable) extends BiopetCommandLineFunction with Reference {
class Freebayes(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
@Input(required = true)
var bamfiles: List[File] = Nil
......@@ -41,8 +41,8 @@ class Freebayes(val root: Configurable) extends BiopetCommandLineFunction with R
var haplotypeLength: Option[Int] = config("haplotype_length")
executable = config("exe", default = "freebayes")
override def versionRegex = """version: (.*)""".r
override def versionCommand = executable + " --version"
def versionRegex = """version: (.*)""".r
def versionCommand = executable + " --version"
override def beforeGraph(): Unit = {
super.beforeGraph()
......
......@@ -18,14 +18,14 @@ package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, Reference }
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference }
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Wrapper for the gsnap command line tool
* Written based on gsnap version 2014-05-15
*/
class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Reference {
class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
/** default executable */
executable = config("exe", default = "gsnap", freeVar = false)
......@@ -328,8 +328,8 @@ class Gsnap(val root: Configurable) extends BiopetCommandLineFunction with Refer
/** value to put into read-group library (rg-pl) field */
var read_group_platform: Option[String] = config("read_group_platform")