Commit d4afc77f authored by Sander van der Zeeuw's avatar Sander van der Zeeuw
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into fix-Shiva_bcftools_calling

parents a5a9f4b1 911948ed
#!/bin/bash
DIR=`readlink -f \`dirname $0\``
cp -r $DIR/../*/*/src/* $DIR/src
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>BiopetRoot</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.5.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>BiopetAggregate</artifactId>
<dependencies>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.9.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetProtectedPackage</artifactId>
<version>0.5.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>18.0</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
#!/bin/bash
DIR=`readlink -f \`dirname $0\``
rm -r $DIR/src/main $DIR/src/test
......@@ -17,5 +17,6 @@
<module>public</module>
<module>protected</module>
<module>external-example</module>
<!--<module>biopet-aggregate</module>-->
</modules>
</project>
......@@ -5,11 +5,11 @@
*/
package nl.lumc.sasc.biopet.extensions.gatk.broad
import nl.lumc.sasc.biopet.core.{ CommandLineResources, Reference, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.core.{ Version, CommandLineResources, Reference, BiopetJavaCommandLineFunction }
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport
import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference {
trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference with Version {
memoryLimit = Option(3)
override def subPath = "gatk" :: super.subPath
......@@ -35,9 +35,9 @@ trait GatkGeneral extends CommandLineGATK with CommandLineResources with Referen
if (config.contains("gatk_key")) gatk_key = config("gatk_key")
if (config.contains("pedigree")) pedigree = config("pedigree")
//override def versionRegex = """(.*)""".r
//override def versionExitcode = List(0, 1)
//override def versionCommand = executable + " -jar " + jarFile + " -version"
def versionRegex = """(.*)""".r
override def versionExitcode = List(0, 1)
def versionCommand = "java" + " -jar " + jarFile + " -version"
//override def getVersion = super.getVersion.collect { case v => "Gatk " + v }
override def getVersion = super.getVersion.collect { case v => "Gatk " + v }
}
......@@ -39,31 +39,27 @@ class ShivaTest extends TestNGSuite with Matchers {
val bool = Array(true, false)
for (
s1 <- bool; s2 <- bool; s3 <- bool; multi <- bool; single <- bool;
library <- bool; dbsnp <- bool; covariates <- bool; realign <- bool; baseRecalibration <- bool
) yield Array("", s1, s2, s3, multi, single, library, dbsnp, covariates, realign, baseRecalibration)
s1 <- bool; s2 <- bool; multi <- bool;
dbsnp <- bool; realign <- bool; baseRecalibration <- bool
) yield Array("", s1, s2, multi, dbsnp, realign, baseRecalibration)
}
@Test(dataProvider = "shivaOptions")
def testShiva(f: String, sample1: Boolean, sample2: Boolean, sample3: Boolean,
multi: Boolean, single: Boolean, library: Boolean, dbsnp: Boolean,
covariates: Boolean, realign: Boolean, baseRecalibration: Boolean): Unit = {
def testShiva(f: String, sample1: Boolean, sample2: Boolean,
multi: Boolean, dbsnp: Boolean,
realign: Boolean, baseRecalibration: Boolean): Unit = {
val map = {
var m: Map[String, Any] = ShivaTest.config
if (sample1) m = ConfigUtils.mergeMaps(ShivaTest.sample1, m)
if (sample2) m = ConfigUtils.mergeMaps(ShivaTest.sample2, m)
if (sample3) m = ConfigUtils.mergeMaps(ShivaTest.sample3, m)
if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp" -> "test"), m)
ConfigUtils.mergeMaps(Map("multisample_variantcalling" -> multi,
"single_sample_variantcalling" -> single,
"library_variantcalling" -> library,
"use_analyze_covariates" -> covariates,
"use_indel_realigner" -> realign,
"use_base_recalibration" -> baseRecalibration), m)
}
if (!sample1 && !sample2 && !sample3) { // When no samples
if (!sample1 && !sample2) { // When no samples
intercept[IllegalArgumentException] {
initPipeline(map).script()
}
......@@ -71,20 +67,18 @@ class ShivaTest extends TestNGSuite with Matchers {
val pipeline = initPipeline(map)
pipeline.script()
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0)
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample3) 1 else 0))
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs + (if (sample3) 1 else 0)) * (if (realign) 1 else 0)
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs + (if (sample3) 1 else 0)) * (if (realign) 1 else 0)
pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0) * (if (covariates) 2 else 1)
pipeline.functions.count(_.isInstanceOf[AnalyzeCovariates]) shouldBe (if (dbsnp && covariates && baseRecalibration) numberLibs else 0)
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)
pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)
pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (if (multi) 2 else 0) +
(if (single) numberSamples * 2 else 0) + (if (library) numberLibs * 2 else 0)
pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (if (multi) 2 else 0)
}
}
}
......@@ -115,7 +109,6 @@ object ShivaTest {
"dir" -> "test",
"vep_script" -> "test",
"output_dir" -> outputDir,
"reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test",
"samtools" -> Map("exe" -> "test"),
......@@ -147,23 +140,14 @@ object ShivaTest {
)))
val sample2 = Map(
"samples" -> Map("sample2" -> Map("libraries" -> Map(
"samples" -> Map("sample3" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> inputTouch("2_1_R1.fq"),
"R2" -> inputTouch("2_1_R2.fq")
)
)
)))
val sample3 = Map(
"samples" -> Map("sample3" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> inputTouch("3_1_R1.fq"),
"R2" -> inputTouch("3_1_R2.fq")
),
"lib2" -> Map(
"R1" -> inputTouch("3_2_R1.fq"),
"R2" -> inputTouch("3_2_R2.fq")
"R1" -> inputTouch("2_2_R1.fq"),
"R2" -> inputTouch("2_2_R2.fq")
)
)
)))
......
......@@ -135,7 +135,6 @@ object ShivaVariantcallingTest {
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"reference" -> (outputDir + File.separator + "ref.fa"),
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test",
"samtools" -> Map("exe" -> "test"),
......
......@@ -146,59 +146,6 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
case Some(n) if n > 0 => n
case _ => 1
})
addJobReportBinding("version", getVersion)
}
/** Command to get version of executable */
protected[core] def versionCommand: String = null
/** Regex to get version from version command output */
protected[core] def versionRegex: Regex = null
/** Allowed exit codes for the version command */
protected[core] def versionExitcode = List(0)
/** Executes the version command */
private[core] def getVersionInternal: Option[String] = {
if (versionCommand == null || versionRegex == null) None
else getVersionInternal(versionCommand, versionRegex)
}
/** Executes the version command */
private[core] def getVersionInternal(versionCommand: String, versionRegex: Regex): Option[String] = {
if (versionCommand == null || versionRegex == null) return None
val exe = new File(versionCommand.trim.split(" ")(0))
if (!exe.exists()) return None
val stdout = new StringBuffer()
val stderr = new StringBuffer()
def outputLog = "Version command: \n" + versionCommand +
"\n output log: \n stdout: \n" + stdout.toString +
"\n stderr: \n" + stderr.toString
val process = Process(versionCommand).run(ProcessLogger(stdout append _ + "\n", stderr append _ + "\n"))
if (!versionExitcode.contains(process.exitValue())) {
logger.warn("getVersion give exit code " + process.exitValue + ", version not found \n" + outputLog)
return None
}
for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) {
line match {
case versionRegex(m) => return Some(m)
case _ =>
}
}
logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog)
None
}
/** Get version from cache otherwise execute the version command */
def getVersion: Option[String] = {
if (!BiopetCommandLineFunction.executableCache.contains(executable))
preProcessExecutable()
if (!BiopetCommandLineFunction.versionCache.contains(versionCommand))
getVersionInternal match {
case Some(version) => BiopetCommandLineFunction.versionCache += versionCommand -> version
case _ =>
}
BiopetCommandLineFunction.versionCache.get(versionCommand)
}
private[core] var _inputAsStdin = false
......@@ -284,7 +231,6 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
/** stores global caches */
object BiopetCommandLineFunction {
private[core] val versionCache: mutable.Map[String, String] = mutable.Map()
private[core] val executableMd5Cache: mutable.Map[String, String] = mutable.Map()
private[core] val executableCache: mutable.Map[String, String] = mutable.Map()
}
......@@ -51,6 +51,9 @@ class BiopetFifoPipe(val root: Configurable,
deps :::= inputs.values.toList.flatten.filter(!fifoFiles.contains(_))
deps = deps.distinct
pipesJobs :::= commands
pipesJobs = pipesJobs.distinct
}
override def beforeCmd(): Unit = {
......
......@@ -55,12 +55,12 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC
def getJavaVersion: Option[String] = {
if (!BiopetCommandLineFunction.executableCache.contains(executable))
preProcessExecutable()
if (!BiopetCommandLineFunction.versionCache.contains(javaVersionCommand))
getVersionInternal(javaVersionCommand, """java version "(.*)"""".r) match {
case Some(version) => BiopetCommandLineFunction.versionCache += javaVersionCommand -> version
if (!Version.versionCache.contains(javaVersionCommand))
Version.getVersionInternal(javaVersionCommand, """java version "(.*)"""".r) match {
case Some(version) => Version.versionCache += javaVersionCommand -> version
case _ =>
}
BiopetCommandLineFunction.versionCache.get(javaVersionCommand)
Version.versionCache.get(javaVersionCommand)
}
override def setupRetry(): Unit = {
......
......@@ -85,9 +85,11 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
this.freeze()
}
var threadsCorrection = 0
protected def combineResources(commands: List[CommandLineResources]): Unit = {
commands.foreach(_.setResources())
nCoresRequest = Some(commands.map(_.threads).sum)
nCoresRequest = Some(commands.map(_.threads).sum + threadsCorrection)
_coreMemory = commands.map(cmd => cmd.coreMemory * (cmd.threads.toDouble / threads.toDouble)).sum
memoryLimit = Some(_coreMemory * threads)
......
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils }
import org.broadinstitute.gatk.utils.commandline.Argument
/** This trait creates a structured way of use multisample pipelines */
......@@ -47,6 +47,8 @@ trait MultiSampleQScript extends SummaryQScript {
/** Adds the library jobs */
final def addAndTrackJobs(): Unit = {
if (nameRegex.findFirstIn(libId) == None)
Logging.addError(s"Library '$libId' $nameError")
currentSample = Some(sampleId)
currentLib = Some(libId)
addJobs()
......@@ -90,6 +92,8 @@ trait MultiSampleQScript extends SummaryQScript {
/** Adds sample jobs */
final def addAndTrackJobs(): Unit = {
if (nameRegex.findFirstIn(sampleId) == None)
Logging.addError(s"Sample '$sampleId' $nameError")
currentSample = Some(sampleId)
addJobs()
qscript.addSummarizable(this, "pipeline", Some(sampleId))
......@@ -129,6 +133,12 @@ trait MultiSampleQScript extends SummaryQScript {
/** Returns a list of all sampleIDs */
protected def sampleIds: Set[String] = ConfigUtils.any2map(globalConfig.map("samples")).keySet
protected lazy val nameRegex = """^[a-zA-Z0-9][a-zA-Z0-9-_]+[a-zA-Z0-9]$""".r
protected lazy val nameError = " name invalid." +
"Name must have at least 3 characters," +
"must begin and end with an alphanumeric character, " +
"and must not have whitespace."
/** Runs addAndTrackJobs method for each sample */
final def addSamplesJobs() {
if (onlySamples.isEmpty || samples.forall(x => onlySamples.contains(x._1))) {
......
......@@ -5,9 +5,12 @@ import nl.lumc.sasc.biopet.FullVersion
/**
* Created by pjvanthof on 11/09/15.
*/
trait ToolCommandFuntion extends BiopetJavaCommandLineFunction {
trait ToolCommandFunction extends BiopetJavaCommandLineFunction with Version {
def toolObject: Object
def versionCommand = ""
def versionRegex = "".r
override def getVersion = Some("Biopet " + FullVersion)
override def beforeGraph(): Unit = {
......
package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.queue.function.QFunction
import scala.collection.mutable
import scala.sys.process.{ Process, ProcessLogger }
import scala.util.matching.Regex
/**
* Created by pjvan_thof on 10/13/15.
*/
trait Version extends QFunction {
/** Command to get version of executable */
def versionCommand: String
/** Regex to get version from version command output */
def versionRegex: Regex
/** Allowed exit codes for the version command */
protected[core] def versionExitcode = List(0)
/** Executes the version command */
private[core] def getVersionInternal: Option[String] = {
if (versionCommand == null || versionRegex == null) None
else Version.getVersionInternal(versionCommand, versionRegex, versionExitcode)
}
/** Get version from cache otherwise execute the version command */
def getVersion: Option[String] = {
if (!Version.versionCache.contains(versionCommand))
getVersionInternal match {
case Some(version) => Version.versionCache += versionCommand -> version
case _ =>
}
Version.versionCache.get(versionCommand)
}
override def freezeFieldValues(): Unit = {
super.freezeFieldValues()
addJobReportBinding("version", getVersion.getOrElse("NA"))
}
}
object Version extends Logging {
private[core] val versionCache: mutable.Map[String, String] = mutable.Map()
/** Executes the version command */
private[core] def getVersionInternal(versionCommand: String,
versionRegex: Regex,
versionExitcode: List[Int] = List(0)): Option[String] = {
if (versionCache.contains(versionCommand)) return versionCache.get(versionCommand)
else if (versionCommand == null || versionRegex == null) return None
else {
val exe = new File(versionCommand.trim.split(" ")(0))
if (!exe.exists()) return None
val stdout = new StringBuffer()
val stderr = new StringBuffer()
def outputLog = "Version command: \n" + versionCommand +
"\n output log: \n stdout: \n" + stdout.toString +
"\n stderr: \n" + stderr.toString
val process = Process(versionCommand).run(ProcessLogger(stdout append _ + "\n", stderr append _ + "\n"))
if (!versionExitcode.contains(process.exitValue())) {
logger.warn("getVersion give exit code " + process.exitValue + ", version not found \n" + outputLog)
return None
}
for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) {
line match {
case versionRegex(m) => return Some(m)
case _ =>
}
}
logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog)
None
}
}
}
\ No newline at end of file
......@@ -17,12 +17,12 @@ package nl.lumc.sasc.biopet.core.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for md5sum */
class Md5sum(val root: Configurable) extends BiopetCommandLineFunction {
class Md5sum(val root: Configurable) extends BiopetCommandLineFunction with Version {
@Input(doc = "Input")
var input: File = _
......@@ -31,8 +31,8 @@ class Md5sum(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "md5sum")
override def versionRegex = """md5sum \(GNU coreutils\) (.*)""".r
override def versionCommand = executable + " --version"
def versionRegex = """md5sum \(GNU coreutils\) (.*)""".r
def versionCommand = executable + " --version"
/** return commandline to execute */
def cmdLine = required(executable) + required(input) + " > " + required(output)
......
......@@ -16,7 +16,7 @@
package nl.lumc.sasc.biopet.core.report
import java.io._
import nl.lumc.sasc.biopet.core.ToolCommandFuntion
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.summary.Summary
import nl.lumc.sasc.biopet.utils.{ ToolCommand, Logging, IoUtils }
import org.broadinstitute.gatk.utils.commandline.Input
......@@ -28,7 +28,7 @@ import scala.collection.mutable
*
* @author pjvan_thof
*/
trait ReportBuilderExtension extends ToolCommandFuntion {
trait ReportBuilderExtension extends ToolCommandFunction {
/** Report builder object */
val builder: ReportBuilder
......
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.core.summary
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetCommandLineFunction, BiopetJavaCommandLineFunction, SampleLibraryTag }
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, BiopetJavaCommandLineFunction, SampleLibraryTag }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.{ LastCommitHash, Version }
import org.broadinstitute.gatk.queue.function.{ InProcessFunction, QFunction }
......@@ -71,21 +71,32 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
val files = parseFiles(qscript.summaryFiles)
val settings = qscript.summarySettings
val executables: Map[String, Any] = {
(for (f <- qscript.functions if f.isInstanceOf[BiopetCommandLineFunction]) yield {
def fetchVersion(f: QFunction): Option[(String, Any)] = {
f match {
case f: BiopetJavaCommandLineFunction =>
f.configName -> Map("version" -> f.getVersion.getOrElse(None),
case f: BiopetJavaCommandLineFunction with Version =>
Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None),
"java_md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
"java_version" -> f.getJavaVersion,
"jar_path" -> f.jarFile)
case f: BiopetCommandLineFunction =>
f.configName -> Map("version" -> f.getVersion.getOrElse(None),
"jar_path" -> f.jarFile))
case f: BiopetCommandLineFunction with Version =>
Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None),
"md5" -> BiopetCommandLineFunction.executableMd5Cache.getOrElse(f.executable, None),
"path" -> f.executable)
case _ => throw new IllegalStateException("This should not be possible")
"path" -> f.executable))
case f: Configurable with Version =>
Some(f.configName -> Map("version" -> f.getVersion.getOrElse(None)))
case _ => None
}
}).toMap
}
(
qscript.functions.flatMap(fetchVersion(_)) ++
qscript.functions
.flatMap {
case f: BiopetCommandLineFunction => f.pipesJobs
case _ => Nil
}.flatMap(fetchVersion(_))
).toMap
}
val map = Map(qscript.summaryName -> ((if (settings.isEmpty) Map[String, Any]() else Map("settings" -> settings)) ++
......@@ -113,7 +124,7 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
}).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b)) ++
Map("meta" -> Map(
"last_commit_hash" -> LastCommitHash