Commit 1dc0cde8 authored by bow's avatar bow
Browse files

Merge branch 'feature-improve_config_report' into 'develop'

Feature improve config report

For for #99

Also some preparations for #55

See merge request !71
parents bd732f66 96557540
......@@ -11,13 +11,15 @@ import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction {
memoryLimit = Option(3)
if (config.contains("gatk_jar")) jarFile = config("gatk_jar")
override def subPath = "gatk" :: super.subPath
jarFile = config("gatk_jar", required = true)
override val defaultVmem = "7G"
if (config.contains("intervals", submodule = "gatk")) intervals = config("intervals", submodule = "gatk").asFileList
if (config.contains("exclude_intervals", submodule = "gatk")) excludeIntervals = config("exclude_intervals", submodule = "gatk").asFileList
reference_sequence = config("reference", submodule = "gatk")
gatk_key = config("gatk_key", submodule = "gatk")
if (config.contains("pedigree", submodule = "gatk")) pedigree = config("pedigree", submodule = "gatk").asFileList
if (config.contains("intervals")) intervals = config("intervals").asFileList
if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList
reference_sequence = config("reference")
gatk_key = config("gatk_key")
if (config.contains("pedigree")) pedigree = config("pedigree").asFileList
}
......@@ -9,6 +9,7 @@ import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping._
import scala.collection.JavaConversions._
import java.io.File
import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, CombineGVCFs }
......@@ -63,7 +64,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val multisampleVariantcalling = new GatkVariantcalling(this) {
override def configName = "gatkvariantcalling"
override def configPath: List[String] = "multisample" :: super.configPath
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
}
def biopetScript() {
......@@ -98,7 +99,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val gatkVariantcalling = new GatkVariantcalling(this) {
override def configName = "gatkvariantcalling"
override def configPath: List[String] = "multisample" :: super.configPath
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
}
if (gatkVariantcalling.useMpileup) {
......@@ -132,7 +133,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
val sampleID: String = sampleConfig("ID").toString
val sampleID: String = getCurrentSample
sampleOutput.libraries = runLibraryJobs(sampleConfig)
val sampleDir = globalSampleDir + sampleID
for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
......@@ -162,25 +163,43 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val runID: String = getCurrentLibrary
val sampleID: String = getCurrentSample
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
var inputType = ""
if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
else inputType = config("inputtype", default = "dna").toString
if (runConfig.contains("R1")) {
val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
var inputType: String = config("inputtype", default = "dna")
def loadFromLibraryConfig(startJobs: Boolean = true): Mapping = {
val mapping = new Mapping(this)
mapping.input_R1 = config("R1")
mapping.input_R2 = config("R2")
mapping.RGLB = runID
mapping.RGSM = sampleID
mapping.RGPL = config("PL")
mapping.RGPU = config("PU")
mapping.RGCN = config("CN")
mapping.outputDir = runDir
if (startJobs) {
mapping.init
mapping.biopetScript
}
return mapping
}
if (config.contains("R1")) {
val mapping = loadFromLibraryConfig()
addAll(mapping.functions) // Add functions of mapping to curent function pool
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else if (runConfig.contains("bam")) {
var bamFile = new File(runConfig("bam").toString)
} else if (config.contains("bam")) {
var bamFile: File = config("bam")
if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile)
if (config("bam_to_fastq", default = false).asBoolean) {
val samToFastq = SamToFastq(this, bamFile, runDir + sampleID + "-" + runID + ".R1.fastq",
runDir + sampleID + "-" + runID + ".R2.fastq")
add(samToFastq, isIntermediate = true)
val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir, startJobs = false)
val mapping = loadFromLibraryConfig(startJobs = false)
mapping.input_R1 = samToFastq.fastqR1
mapping.input_R2 = samToFastq.fastqR2
mapping.init
......@@ -205,11 +224,9 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
aorrg.RGID = sampleID + "-" + runID
aorrg.RGLB = runID
aorrg.RGSM = sampleID
if (runConfig.contains("PL")) aorrg.RGPL = runConfig("PL").toString
else aorrg.RGPL = "illumina"
if (runConfig.contains("PU")) aorrg.RGPU = runConfig("PU").toString
else aorrg.RGPU = "na"
if (runConfig.contains("CN")) aorrg.RGCN = runConfig("CN").toString
aorrg.RGPL = config("PL", default = "illumina")
aorrg.RGPU = config("PU", default = "na")
aorrg.RGCN = config("CN")
add(aorrg, isIntermediate = true)
bamFile = aorrg.output
} else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile +
......@@ -219,7 +236,10 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
libraryOutput.mappedBamFile = bamFile
}
} else logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
} else {
logger.error("Sample: " + sampleID + ": No R1 found for run: " + runID)
return libraryOutput
}
val gatkVariantcalling = new GatkVariantcalling(this)
gatkVariantcalling.inputBams = List(libraryOutput.mappedBamFile)
......
......@@ -99,14 +99,13 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
val temp = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase
BiopetCommandLineFunctionTrait.executableMd5Cache += executable -> temp
}
addJobReportBinding("md5sum_exe", BiopetCommandLineFunctionTrait.executableMd5Cache(executable))
} catch {
case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executable skipped: " + ioe)
}
} else {
addJobReportBinding("md5sum_exe", BiopetCommandLineFunctionTrait.executableMd5Cache(executable))
}
val md5 = BiopetCommandLineFunctionTrait.executableMd5Cache(executable)
if (md5 == null) addJobReportBinding("md5sum_exe", md5)
else addJobReportBinding("md5sum_exe", "None")
}
final protected def preCmdInternal {
......
......@@ -57,13 +57,8 @@ trait BiopetQScript extends Configurable with GatkLogging {
case f: BiopetCommandLineFunctionTrait => f.afterGraph
case _ =>
}
val configReport = Config.global.getReport
val configReportFile = new File(outputDir + qSettings.runName + ".configreport.txt")
configReportFile.getParentFile.mkdir
val writer = new PrintWriter(configReportFile)
writer.write(configReport)
writer.close()
for (line <- configReport.split("\n")) logger.debug(line)
Config.global.writeReport(qSettings.runName, outputDir + ".log/" + qSettings.runName)
}
def add(functions: QFunction*) // Gets implemeted at org.broadinstitute.sting.queue.QScript
......
......@@ -16,6 +16,7 @@
package nl.lumc.sasc.biopet.core
import nl.lumc.sasc.biopet.core.config.{ ConfigValue, Config, Configurable }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.ConfigUtils._
trait MultiSampleQScript extends BiopetQScript {
......@@ -29,12 +30,12 @@ trait MultiSampleQScript extends BiopetQScript {
def getLibrary(key: String) = libraries(key)
}
if (!config.contains("samples")) logger.warn("No Samples found in config")
if (!Config.global.map.contains("samples")) logger.warn("No Samples found in config")
/**
* Returns a map with all sample configs
*/
val getSamplesConfig: Map[String, Any] = config("samples", default = Map())
val getSamplesConfig: Map[String, Any] = ConfigUtils.any2map(Config.global.map.getOrElse("samples", Map()))
/**
* Returns a list of all sampleIDs
......@@ -144,9 +145,7 @@ trait MultiSampleQScript extends BiopetQScript {
*/
def getCurrentLibrary = currentLibrary
/**
* Reset current library manual, only use this when not using runLibraryJobs method
*/
/** Reset current library manual, only use this when not using runLibraryJobs method */
def resetCurrentLibrary() {
logger.debug("Manual library reset")
currentLibrary = null
......@@ -158,23 +157,29 @@ trait MultiSampleQScript extends BiopetQScript {
super.configFullPath
}
protected class ConfigFunctions extends super.ConfigFunctions {
override val config = new ConfigFunctionsExt
protected class ConfigFunctionsExt extends super.ConfigFunctions {
override def apply(key: String,
default: Any = null,
submodule: String = null,
required: Boolean = false,
freeVar: Boolean = true,
sample: String = currentSample,
library: String = currentLibrary): ConfigValue = {
super.apply(key, default, submodule, required, freeVar, sample, library)
sample: String = null,
library: String = null): ConfigValue = {
val s = if (sample == null) currentSample else sample
val l = if (library == null) currentLibrary else library
super.apply(key, default, submodule, required, freeVar, s, l)
}
override def contains(key: String,
submodule: String = null,
freeVar: Boolean = true,
sample: String = currentSample,
library: String = currentLibrary) = {
super.contains(key, submodule, freeVar, sample, library)
sample: String = null,
library: String = null) = {
val s = if (sample == null) currentSample else sample
val l = if (library == null) currentLibrary else library
super.contains(key, submodule, freeVar, s, l)
}
}
}
......@@ -15,10 +15,13 @@
*/
package nl.lumc.sasc.biopet.core.config
import java.io.File
import java.io.{ PrintWriter, File }
import nl.lumc.sasc.biopet.core.Logging
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.ConfigUtils._
import scala.reflect.io.Directory
/**
* This class can store nested config values
* @param map Map with value for new config
......@@ -97,7 +100,7 @@ class Config(var map: Map[String, Any]) extends Logging {
else if (foundCache.contains(requestedIndex)) return true
else {
val value = Config.getValueFromMap(map, requestedIndex)
if (value.isDefined) {
if (value.isDefined && value.get.value != None) {
foundCache += (requestedIndex -> value.get)
return true
} else {
......@@ -137,36 +140,47 @@ class Config(var map: Map[String, Any]) extends Logging {
} else throw new IllegalStateException("Value in config could not be found but it seems required, index: " + requestedIndex)
}
//TODO: New version of report is needed
/**
* Makes report for all used values
* @return Config report
*/
def getReport: String = {
val output: StringBuilder = new StringBuilder
output.append("Config report, sorted on module:\n")
var modules: Map[String, StringBuilder] = Map()
for ((key, value) <- foundCache) {
val module = key.module
if (!modules.contains(module)) modules += (module -> new StringBuilder)
modules(module).append("Found: " + value.toString + "\n")
}
for ((key, value) <- defaultCache) {
val module = key.module
if (!modules.contains(module)) modules += (module -> new StringBuilder)
modules(module).append("Default used: " + value.toString + "\n")
def writeReport(id: String, directory: String): Unit = {
def convertIndexValuesToMap(input: List[(ConfigValueIndex, Any)], forceFreeVar: Option[Boolean] = None): Map[String, Any] = {
input.foldLeft(Map[String, Any]())(
(a: Map[String, Any], x: (ConfigValueIndex, Any)) => {
val v = {
if (forceFreeVar.getOrElse(x._1.freeVar)) Map(x._1.key -> x._2)
else Map(x._1.module -> Map(x._1.key -> x._2))
}
val newMap = x._1.path.foldRight(v)((p, map) => Map(p -> map))
ConfigUtils.mergeMaps(a, newMap)
})
}
for (value <- notFoundCache) {
val module = value.module
if (!modules.contains(module)) modules += (module -> new StringBuilder)
if (!defaultCache.contains(value)) modules(module).append("Not Found: " + value.toString + "\n")
}
for ((key, value) <- modules) {
output.append("Config options for module: " + key + "\n")
output.append(value.toString)
output.append("\n")
def writeMapToJsonFile(map: Map[String, Any], name: String): Unit = {
val file = new File(directory + "/" + id + "." + name + ".json")
file.getParentFile.mkdirs()
val writer = new PrintWriter(file)
writer.write(ConfigUtils.mapToJson(map).spaces2)
writer.close()
}
return output.toString
// Positions where values are found
val found = convertIndexValuesToMap(foundCache.filter(!_._2.default).toList.map(x => (x._2.foundIndex, x._2.value)))
// Positions where to start searching
val effectiveFound = convertIndexValuesToMap(foundCache.filter(!_._2.default).toList.map(x => (x._2.requestIndex, x._2.value)), Some(false))
val effectiveDefaultFound = convertIndexValuesToMap(defaultCache.filter(_._2.default).toList.map(x => (x._2.requestIndex, x._2.value)), Some(false))
val notFound = convertIndexValuesToMap(notFoundCache.map((_, None)), Some(false))
// Merged maps
val fullEffective = ConfigUtils.mergeMaps(effectiveFound, effectiveDefaultFound)
val fullEffectiveWithNotFound = ConfigUtils.mergeMaps(fullEffective, notFound)
writeMapToJsonFile(Config.global.map, "input")
writeMapToJsonFile(found, "found")
writeMapToJsonFile(effectiveFound, "effective.found")
writeMapToJsonFile(effectiveDefaultFound, "effective.defaults")
writeMapToJsonFile(notFound, "not.found")
writeMapToJsonFile(fullEffective, "effective.full")
writeMapToJsonFile(fullEffectiveWithNotFound, "effective.full.notfound")
}
override def toString(): String = map.toString
......
......@@ -20,34 +20,26 @@ import nl.lumc.sasc.biopet.core.Logging
import nl.lumc.sasc.biopet.utils.ConfigUtils.ImplicitConversions
trait Configurable extends ImplicitConversions {
/**
* Should be object of parant object
*/
/** Should be object of parant object */
val root: Configurable
/**
* Get default path to search config values for current object
* @return
*/
def configPath: List[String] = if (root != null) root.configFullPath else List()
/** subfix to the path */
def subPath: List[String] = Nil
/**
* Gets name of module for config
* @return
*/
/** Get default path to search config values for current object */
def configPath: List[String] = if (root != null) root.configFullPath ::: subPath else subPath
/** Gets name of module for config */
protected[core] def configName = getClass.getSimpleName.toLowerCase
/**
* Full path with module in there
* @return
*/
/** ull path with module in there */
protected[core] def configFullPath: List[String] = configPath ::: configName :: Nil
/**
* Map to store defaults for config
*/
var defaults: scala.collection.mutable.Map[String, Any] = if (root != null) scala.collection.mutable.Map(root.defaults.toArray: _*)
else scala.collection.mutable.Map()
/** Map to store defaults for config */
var defaults: scala.collection.mutable.Map[String, Any] = {
if (root != null) scala.collection.mutable.Map(root.defaults.toArray: _*)
else scala.collection.mutable.Map()
}
val config = new ConfigFunctions
......@@ -62,7 +54,7 @@ trait Configurable extends ImplicitConversions {
def path(sample: String = null, library: String = null, submodule: String = null) = {
(if (sample != null) "samples" :: sample :: Nil else Nil) :::
(if (library != null) "libraries" :: library :: Nil else Nil) :::
(if (submodule != null) configName :: configPath else configPath)
(if (submodule != null) configPath ::: configName :: Nil else configPath)
}
/**
......
......@@ -13,12 +13,13 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.aligners
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Fastq file R1", shortName = "R1")
......
......@@ -42,7 +42,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction {
var opt_front: Set[String] = Set()
if (config.contains("front")) for (adapter <- config("front").asList) opt_front += adapter.toString
var opt_discard: Boolean = config("discard")
var opt_discard: Boolean = config("discard", default = false)
var opt_minimum_length: String = config("minimum_length", 1)
var opt_maximum_length: String = config("maximum_length")
......
......@@ -39,9 +39,9 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "fastqc")
var java_exe: String = config("exe", default = "java", submodule = "java", freeVar = false)
var kmers: Option[Int] = config("kmers")
var quiet: Boolean = config("quiet")
var noextract: Boolean = config("noextract")
var nogroup: Boolean = config("nogroup")
var quiet: Boolean = config("quiet", default = false)
var noextract: Boolean = config("noextract", default = false)
var nogroup: Boolean = config("nogroup", default = false)
var extract: Boolean = config("extract", default = true)
override val versionRegex = """FastQC (.*)""".r
......
......@@ -42,10 +42,10 @@ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction {
var iterations: Option[Int] = config("iterations")
var minSnps: Option[Int] = config("min_snps")
var convergeMethod: String = config("converge_method")
var useTimeStamp: Boolean = config("use_time_stamp")
var useTimeStamp: Boolean = config("use_time_stamp", default = false)
var prefix: String = config("prefix")
var verbose: Boolean = config("verbose")
var noCleanup: Boolean = config("no_cleanup")
var verbose: Boolean = config("verbose", default = false)
var noCleanup: Boolean = config("no_cleanup", default = false)
override def afterGraph: Unit = {
super.afterGraph
......
......@@ -46,9 +46,9 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction {
var qualityType: String = config("qualitytype")
var qualityThreshold: Option[Int] = config("qualityThreshold")
var lengthThreshold: Option[Int] = config("lengthThreshold")
var noFiveprime: Boolean = config("noFiveprime")
var discardN: Boolean = config("discardN")
var quiet: Boolean = config("quiet")
var noFiveprime: Boolean = config("noFiveprime", default = false)
var discardN: Boolean = config("discardN", default = false)
var quiet: Boolean = config("quiet", default = false)
var defaultQualityType: String = config("defaultqualitytype", default = "sanger")
override val versionRegex = """sickle version (.*)""".r
override def versionCommand = executable + " --version"
......
......@@ -13,12 +13,13 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.aligners
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "FastQ file R1", shortName = "R1")
......
......@@ -13,12 +13,13 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.aligners
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import java.io.File
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class Star(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "The reference file for the bam files.", required = false)
......
......@@ -13,12 +13,13 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.aligners
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import java.io.File
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class TopHat(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "FastQ file R1", shortName = "R1")
......
......@@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.extensions.bedtools
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
abstract class Bedtools extends BiopetCommandLineFunction {
override def subPath = "bedtools" :: super.subPath
executable = config("exe", default = "bedtools", submodule = "bedtools")
override def versionCommand = executable + " --version"
override val versionRegex = """bedtools (.*)""".r
......
package nl.lumc.sasc.biopet.extensions.bwa
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
/**
* Created by pjvan_thof on 1/16/15.
*/
abstract class Bwa extends BiopetCommandLineFunction {