Commit 6b492ea4 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge commit '6c473b65' into feature-merge_alleles

parents 8cb8f61f 6c473b65
......@@ -54,29 +54,43 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
}
protected def checkExecutable {
try if (executable != null) {
val buffer = new StringBuffer()
val cmd = Seq("which", executable)
val process = Process(cmd).run(ProcessLogger(buffer.append(_)))
if (process.exitValue == 0) {
executable = buffer.toString
val file = new File(executable)
executable = file.getCanonicalPath
} else {
logger.error("executable: '" + executable + "' not found, please check config")
throw new QException("executable: '" + executable + "' not found, please check config")
if (!BiopetCommandLineFunctionTrait.executableMd5Cache.contains(executable)) {
try if (executable != null) {
if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable)) {
val oldExecutable = executable
val buffer = new StringBuffer()
val cmd = Seq("which", executable)
val process = Process(cmd).run(ProcessLogger(buffer.append(_)))
if (process.exitValue == 0) {
executable = buffer.toString
val file = new File(executable)
executable = file.getCanonicalPath
} else {
logger.error("executable: '" + executable + "' not found, please check config")
throw new QException("executable: '" + executable + "' not found, please check config")
}
BiopetCommandLineFunctionTrait.executableCache += oldExecutable -> executable
BiopetCommandLineFunctionTrait.executableCache += executable -> executable
} else {
executable = BiopetCommandLineFunctionTrait.executableCache(executable)
}
if (!BiopetCommandLineFunctionTrait.executableMd5Cache.contains(executable)) {
val is = new FileInputStream(executable)
val cnt = is.available
val bytes = Array.ofDim[Byte](cnt)
is.read(bytes)
is.close()
val temp = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase
BiopetCommandLineFunctionTrait.executableMd5Cache += executable -> temp
}
addJobReportBinding("md5sum_exe", BiopetCommandLineFunctionTrait.executableMd5Cache(executable))
} catch {
case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executable skipped: " + ioe)
}
val is = new FileInputStream(executable)
val cnt = is.available
val bytes = Array.ofDim[Byte](cnt)
is.read(bytes)
is.close()
val md5: String = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase
addJobReportBinding("md5sum_exe", md5)
} catch {
case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executable skipped: " + ioe)
} else {
addJobReportBinding("md5sum_exe", BiopetCommandLineFunctionTrait.executableMd5Cache(executable))
}
}
......@@ -138,4 +152,6 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
object BiopetCommandLineFunctionTrait {
import scala.collection.mutable.Map
private val versionCache: Map[String, String] = Map()
private val executableMd5Cache: Map[String, String] = Map()
private val executableCache: Map[String, String] = Map()
}
\ No newline at end of file
package nl.lumc.sasc.biopet.core
import java.io.File
import java.util.Properties
import nl.lumc.sasc.biopet.core.config.Config
import org.apache.log4j.Logger
object BiopetExecutable extends Logging {
......@@ -82,6 +84,11 @@ object BiopetExecutable extends Logging {
return command.get
}
// Read config files
for (t <- 0 until args.size) {
if (args(t) == "-config" || args(t) == "--config_ile") Config.global.loadConfigFile(new File(args(t + 1)))
}
args match {
case Array("version") => {
println("version: " + getVersion)
......
......@@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.config.{ Config, Configurable }
import org.broadinstitute.gatk.utils.commandline.Argument
import org.broadinstitute.gatk.queue.QSettings
import org.broadinstitute.gatk.queue.function.QFunction
......@@ -25,7 +25,6 @@ trait BiopetQScript extends Configurable {
var functions: Seq[QFunction]
final def script() {
for (file <- configfiles) globalConfig.loadConfigFile(file)
if (!outputDir.endsWith("/")) outputDir += "/"
init
biopetScript
......@@ -33,7 +32,7 @@ trait BiopetQScript extends Configurable {
case f: BiopetCommandLineFunctionTrait => f.afterGraph
case _ =>
}
val configReport = globalConfig.getReport
val configReport = Config.global.getReport
val configReportFile = new File(outputDir + qSettings.runName + ".configreport.txt")
configReportFile.getParentFile.mkdir
val writer = new PrintWriter(configReportFile)
......
......@@ -7,9 +7,13 @@ import org.apache.log4j.WriterAppender
import org.apache.log4j.helpers.DateLayout
trait Logging {
protected val logger = Logger.getLogger(getClass.getSimpleName.split("\\$").last)
def logger = Logging.logger
}
object Logging {
val logger = Logger.getLogger("Logging")
private[core] val logLayout = new DateLayout() {
val logLayout = new DateLayout() {
val ignoresThrowable = false
def format(event: org.apache.log4j.spi.LoggingEvent): String = {
val calendar: Calendar = Calendar.getInstance
......@@ -18,10 +22,13 @@ trait Logging {
val formattedDate: String = formatter.format(calendar.getTime)
var logLevel = event.getLevel.toString
while (logLevel.size < 6) logLevel += " "
logLevel + " [" + formattedDate + "] [" + event.getLoggerName + "] " + event.getMessage + "\n"
val className = event.getLocationInformation.getClassName.split("\\.").last.split("\\$").head
logLevel + " [" + formattedDate + "] [" + className + "] " + event.getMessage + "\n"
}
}
private[core] val stderrAppender = new WriterAppender(logLayout, sys.process.stderr)
val stderrAppender = new WriterAppender(logLayout, sys.process.stderr)
logger.setLevel(org.apache.log4j.Level.INFO)
logger.addAppender(stderrAppender)
}
logger.addAppender(Logging.stderrAppender)
}
\ No newline at end of file
package nl.lumc.sasc.biopet.core
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.config.{ Config, Configurable }
trait MultiSampleQScript extends BiopetQScript {
type LibraryOutput <: AbstractLibraryOutput
......@@ -20,7 +20,7 @@ trait MultiSampleQScript extends BiopetQScript {
final def runSamplesJobs() {
samplesConfig = config("samples")
if (samplesConfig == null) samplesConfig = Map()
if (globalConfig.contains("samples")) for ((key, value) <- samplesConfig) {
if (Config.global.contains("samples")) for ((key, value) <- samplesConfig) {
var sample = Configurable.any2map(value)
if (!sample.contains("ID")) sample += ("ID" -> key)
if (sample("ID") == key) {
......
......@@ -9,10 +9,10 @@ trait ToolCommand extends MainCommand with Logging {
abstract class AbstractOptParser extends scopt.OptionParser[Args](commandName) {
opt[Unit]("log_nostderr") foreach { _ =>
logger.removeAppender(stderrAppender)
logger.removeAppender(Logging.stderrAppender)
} text ("No output to stderr")
opt[File]("log_file") foreach { x =>
logger.addAppender(new WriterAppender(logLayout, new java.io.PrintStream(x)))
logger.addAppender(new WriterAppender(Logging.logLayout, new java.io.PrintStream(x)))
} text ("Log file") valueName ("<file>")
opt[String]('l', "log_level") foreach { x =>
x.toLowerCase match {
......
package nl.lumc.sasc.biopet.core.config
import java.io.File
import org.broadinstitute.gatk.queue.util.Logging
import nl.lumc.sasc.biopet.core.Logging
import argonaut._, Argonaut._
import scalaz._, Scalaz._
......@@ -104,7 +104,9 @@ class Config(var map: Map[String, Any]) extends Logging {
override def toString(): String = map.toString
}
object Config {
object Config extends Logging {
val global = new Config
def valueToMap(input: Any): Map[String, Any] = {
input match {
case m: Map[_, _] => return m.asInstanceOf[Map[String, Any]]
......
......@@ -6,16 +6,16 @@ import scala.language.implicitConversions
trait Configurable extends Logging {
val root: Configurable
val globalConfig: Config = if (root != null) root.globalConfig else new Config()
//val globalConfig: Config = if (root != null) root.globalConfig else new Config()
def configPath: List[String] = if (root != null) root.configFullPath else List()
protected lazy val configName = getClass.getSimpleName.toLowerCase
protected lazy val configFullPath = configName :: configPath
var defaults: scala.collection.mutable.Map[String, Any] = if (root != null) scala.collection.mutable.Map(root.defaults.toArray: _*)
else scala.collection.mutable.Map()
val config = new ConfigFuntions
val config = new ConfigFunctions
protected class ConfigFuntions {
protected class ConfigFunctions {
def apply(key: String, default: Any = null, submodule: String = null, required: Boolean = false, freeVar: Boolean = true): ConfigValue = {
val m = if (submodule != null) submodule else configName
val p = if (submodule != null) configName :: configPath else configPath
......@@ -29,15 +29,15 @@ trait Configurable extends Logging {
throw new IllegalStateException("Value in config could not be found but it is required, key: " + key + " module: " + m + " path: " + p)
} else return null
}
if (d == null) return globalConfig(m, p, key, freeVar)
else return globalConfig(m, p, key, d, freeVar)
if (d == null) return Config.global(m, p, key, freeVar)
else return Config.global(m, p, key, d, freeVar)
}
def contains(key: String, submodule: String = null, freeVar: Boolean = true) = {
val m = if (submodule != null) submodule else configName
val p = if (submodule != null) configName :: configPath else configPath
globalConfig.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None)
Config.global.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None)
}
}
......
......@@ -26,7 +26,6 @@ class BamMetrics(val root: Configurable) extends QScript with BiopetQScript {
var wholeGenome = false
def init() {
for (file <- configfiles) globalConfig.loadConfigFile(file)
if (outputDir == null) throw new IllegalStateException("Missing Output directory on BamMetrics module")
else if (!outputDir.endsWith("/")) outputDir += "/"
if (config.contains("target_bed")) {
......
......@@ -18,10 +18,10 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
var input_R2: File = _
@Argument(doc = "Skip Trim fastq files", shortName = "skiptrim", required = false)
var skipTrim: Boolean = false
var skipTrim: Boolean = config("skiptrim", default = false)
@Argument(doc = "Skip Clip fastq files", shortName = "skipclip", required = false)
var skipClip: Boolean = false
var skipClip: Boolean = config("skipclip", default = false)
@Argument(doc = "Sample name", shortName = "sample", required = true)
var sampleName: String = _
......@@ -43,9 +43,6 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript {
val summary = new FlexiprepSummary(this)
def init() {
for (file <- configfiles) globalConfig.loadConfigFile(file)
if (!skipTrim) skipTrim = config("skiptrim", default = false)
if (!skipClip) skipClip = config("skipclip", default = false)
if (input_R1 == null) throw new IllegalStateException("Missing R1 on flexiprep module")
if (outputDir == null) throw new IllegalStateException("Missing Output directory on flexiprep module")
if (sampleName == null) throw new IllegalStateException("Missing Sample name on flexiprep module")
......
......@@ -18,18 +18,15 @@ class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with Biope
@Input(doc = "Gvcf files", shortName = "I", required = false)
var gvcfFiles: List[File] = Nil
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = _
var reference: File = config("reference")
@Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
var dbsnp: File = _
var dbsnp: File = config("dbsnp")
def init() {
if (config.contains("gvcffiles")) for (file <- config("gvcffiles").getList) {
gvcfFiles ::= file.toString
}
if (reference == null) reference = config("reference")
if (dbsnp == null) dbsnp = config("dbsnp")
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
}
......
......@@ -13,10 +13,10 @@ class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript
var inputGvcfs: List[File] = Nil
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = _
var reference: File = config("reference")
@Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
var dbsnp: File = _
var dbsnp: File = config("dbsnp")
@Argument(doc = "OutputName", required = false)
var outputName: String = "genotype"
......@@ -28,8 +28,6 @@ class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript
var samples: List[String] = Nil
def init() {
if (reference == null) reference = config("reference")
if (dbsnp == null) dbsnp = config("dbsnp")
if (outputFile == null) outputFile = outputDir + outputName + ".vcf.gz"
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
......
......@@ -27,17 +27,17 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
var mergeGvcfs: Boolean = false
@Argument(doc = "Joint variantcalling", shortName = "jointVariantCalling", required = false)
var jointVariantcalling = false
var jointVariantcalling: Boolean = config("joint_variantcalling", default = false)
@Argument(doc = "Joint genotyping", shortName = "jointGenotyping", required = false)
var jointGenotyping = false
var jointGenotyping: Boolean = config("joint_genotyping", default = false)
var singleSampleCalling = true
var reference: File = _
var dbsnp: File = _
var singleSampleCalling = config("single_sample_calling", default = true)
var reference: File = config("reference", required = true)
var dbsnp: File = config("dbsnp")
var gvcfFiles: List[File] = Nil
var finalBamFiles: List[File] = Nil
var useAllelesOption: Boolean = _
var useAllelesOption: Boolean = config("use_alleles_option", default = false)
class LibraryOutput extends AbstractLibraryOutput {
var mappedBamFile: File = _
......@@ -49,15 +49,9 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
def init() {
useAllelesOption = config("use_alleles_option", default = false)
reference = config("reference", required = true)
dbsnp = config("dbsnp")
if (config.contains("target_bed")) {
defaults ++= Map("gatk" -> Map(("intervals" -> config("target_bed").getStringList)))
}
jointVariantcalling = config("joint_variantcalling", default = false)
jointGenotyping = config("joint_genotyping", default = false)
singleSampleCalling = config("single_sample_calling", default = true)
if (config.contains("gvcfFiles"))
for (file <- config("gvcfFiles").getList)
gvcfFiles :+= file.toString
......
......@@ -24,10 +24,10 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
var rawVcfInput: File = _
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = _
var reference: File = config("reference", required = true)
@Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
var dbsnp: File = _
var dbsnp: File = config("dbsnp")
@Argument(doc = "OutputName", required = false)
var outputName: String = _
......@@ -35,21 +35,14 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
@Argument(doc = "Sample name", required = false)
var sampleID: String = _
var preProcesBams: Option[Boolean] = None
var preProcesBams: Option[Boolean] = config("pre_proces_bams", default = true)
var variantcalling: Boolean = true
var doublePreProces: Option[Boolean] = None
var useHaplotypecaller: Option[Boolean] = None
var useUnifiedGenotyper: Option[Boolean] = None
var useAllelesOption: Option[Boolean] = None
var doublePreProces: Option[Boolean] = config("double_pre_proces", default = true)
var useHaplotypecaller: Option[Boolean] = config("use_haplotypecaller", default = true)
var useUnifiedGenotyper: Option[Boolean] = config("use_unifiedgenotyper", default = false)
var useAllelesOption: Option[Boolean] = config("use_alleles_option", default = false)
def init() {
if (useAllelesOption == None) useAllelesOption = config("use_alleles_option", default = false)
if (preProcesBams == None) preProcesBams = config("pre_proces_bams", default = true)
if (doublePreProces == None) doublePreProces = config("double_pre_proces", default = true)
if (useHaplotypecaller == None) useHaplotypecaller = config("use_haplotypecaller", default = true)
if (useUnifiedGenotyper == None) useUnifiedGenotyper = config("use_unifiedgenotyper", default = false)
if (reference == null) reference = config("reference", required = true)
if (dbsnp == null) dbsnp = config("dbsnp")
if (outputName == null && sampleID != null) outputName = sampleID
else if (outputName == null) outputName = "noname"
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
......
......@@ -16,7 +16,7 @@ class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQS
var vcfFiles: List[File] = _
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = _
var reference: File = config("reference")
@Argument(doc = "Target bed", shortName = "targetBed", required = false)
var targetBed: List[File] = Nil
......@@ -29,7 +29,6 @@ class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQS
def generalSampleDir = outputDir + "samples/"
def init() {
if (reference == null) reference = config("reference")
if (config.contains("target_bed"))
for (bed <- config("target_bed").getList)
targetBed :+= bed.toString
......
......@@ -69,7 +69,7 @@ class Gentrap(val root: Configurable) extends QScript with BiopetQScript {
var cExonBase: Boolean = _
def init() {
for (file <- configfiles) globalConfig.loadConfigFile(file)
}
def biopetScript() {
......
......@@ -36,13 +36,13 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
var skipMetrics: Boolean = false
@Argument(doc = "Aligner", shortName = "ALN", required = false)
var aligner: String = _
var aligner: String = config("aligner", default = "bwa")
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = _
var reference: File = config("reference")
@Argument(doc = "Chunking", shortName = "chunking", required = false)
var chunking: Boolean = false
var chunking: Boolean = config("chunking", false)
@ClassType(classOf[Int])
@Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false)
......@@ -50,62 +50,48 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript {
// Readgroup items
@Argument(doc = "Readgroup ID", shortName = "RGID", required = false)
var RGID: String = _
var RGID: String = config("RGID")
@Argument(doc = "Readgroup Library", shortName = "RGLB", required = false)
var RGLB: String = _
var RGLB: String = config("RGLB")
@Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false)
var RGPL: String = _
var RGPL: String = config("RGPL", default = "illumina")
@Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false)
var RGPU: String = _
var RGPU: String = config("RGPU", default = "na")
@Argument(doc = "Readgroup sample", shortName = "RGSM", required = false)
var RGSM: String = _
var RGSM: String = config("RGSM")
@Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false)
var RGCN: String = _
var RGCN: String = config("RGCN")
@Argument(doc = "Readgroup description", shortName = "RGDS", required = false)
var RGDS: String = _
var RGDS: String = config("RGDS")
@Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false)
var RGDT: Date = _
@Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false)
var RGPI: Int = _
var RGPI: Int = config("RGPI")
var paired: Boolean = false
var defaultAligner = "bwa"
val flexiprep = new Flexiprep(this)
def init() {
for (file <- configfiles) globalConfig.loadConfigFile(file)
if (aligner == null) aligner = config("aligner", default = defaultAligner)
if (reference == null) reference = config("reference")
if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
else if (!outputDir.endsWith("/")) outputDir += "/"
if (input_R1 == null) throw new IllegalStateException("Missing FastQ R1 on mapping module")
paired = (input_R2 != null)
if (RGLB == null && config.contains("RGLB")) RGLB = config("RGLB")
else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
if (RGSM == null && config.contains("RGSM")) RGSM = config("RGSM")
else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
if (RGID == null && config.contains("RGID")) RGID = config("RGID")
else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module")
if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module")
if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB
else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module")
if (RGPL == null) RGPL = config("RGPL", "illumina")
if (RGPU == null) RGPU = config("RGPU", "na")
if (RGCN == null && config.contains("RGCN")) RGCN = config("RGCN")
if (RGDS == null && config.contains("RGDS")) RGDS = config("RGDS")
if (outputName == null) outputName = RGID
if (!chunking && numberChunks.isDefined) chunking = true
if (!chunking) chunking = config("chunking", false)
if (chunking) {
if (numberChunks.isEmpty) {
if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None)
......@@ -311,10 +297,6 @@ object Mapping extends PipelineCommand {
val mapping = new Mapping(root)
logger.debug("Mapping runconfig: " + runConfig)
var inputType = ""
if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
else inputType = root.config("inputtype", "dna").getString
if (inputType == "rna") mapping.defaultAligner = "star-2pass"
if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
mapping.paired = (mapping.input_R2 != null)
......
......@@ -20,15 +20,15 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
def this() = this(null)
@Input(doc = "countBed", required = false)
var countBed: File = _
var countBed: File = config("count_bed")
@Input(doc = "squishedCountBed, by suppling this file the auto squish job will be skipped", required = false)