Commit 0c9b920b authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'develop' into feature-report

Conflicts:
	public/biopet-framework/pom.xml
parents 72d9f008 f7e873b0
......@@ -42,7 +42,7 @@ Usage: VcfFilter [options]
Filter when there are only ref calls
--filterNoCalls
Filter when there are only no calls
--minQualscore <value>
--minQualScore <value>
Min qual score
~~~
......
......@@ -115,5 +115,10 @@
<artifactId>scalate-core_2.10</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
<version>1.15</version>
</dependency>
</dependencies>
</project>
......@@ -25,19 +25,19 @@ import org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFun
import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging }
import scala.collection.mutable.ListBuffer
/**
* Base for biopet pipeline
*/
/** Base for biopet pipeline */
trait BiopetQScript extends Configurable with GatkLogging {
@Argument(doc = "JSON config file(s)", fullName = "config_file", shortName = "config", required = false)
@Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false)
val configfiles: List[File] = Nil
@Argument(doc = "Config values, value should be formatted like 'key=value' or 'path:path:key=value'", fullName = "config_value", shortName = "cv", required = false)
val configValues: List[String] = Nil
/** Output directory of pipeline */
var outputDir: File = {
Config.getValueFromMap(globalConfig.map, ConfigValueIndex(this.configName, configPath, "output_dir")) match {
case Some(value) => new File(value.asString).getAbsoluteFile
case _ => new File(".")
}
if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
else new File(".")
}
@Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
......@@ -57,11 +57,10 @@ trait BiopetQScript extends Configurable with GatkLogging {
/** Pipeline itself */
def biopetScript
/**
* Script from queue itself, final to force some checks for each pipeline and write report
*/
/** Script from queue itself, final to force some checks for each pipeline and write report */
final def script() {
outputDir = config("output_dir").asFile.getAbsoluteFile
outputDir = config("output_dir")
outputDir = outputDir.getAbsoluteFile
init
biopetScript
......
......@@ -20,9 +20,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core.config.Config
import nl.lumc.sasc.biopet.core.workaround.BiopetQCommandLine
/**
* Wrapper around executable from Queue
*/
/** Wrapper around executable from Queue */
trait PipelineCommand extends MainCommand with GatkLogging {
/**
......@@ -31,10 +29,7 @@ trait PipelineCommand extends MainCommand with GatkLogging {
*/
def pipeline = "/" + getClass.getName.stripSuffix("$").replaceAll("\\.", "/") + ".class"
/**
* Class can be used directly from java with -cp option
* @param args
*/
/** Class can be used directly from java with -cp option */
def main(args: Array[String]): Unit = {
val argsSize = args.size
for (t <- 0 until argsSize) {
......@@ -42,6 +37,17 @@ trait PipelineCommand extends MainCommand with GatkLogging {
if (t >= argsSize) throw new IllegalStateException("-config needs a value")
Config.global.loadConfigFile(new File(args(t + 1)))
}
if (args(t) == "-cv" || args(t) == "--config_value") {
val v = args(t + 1).split("=")
require(v.size == 2, "Value should be formatted like 'key=value' or 'path:path:key=value'")
val value = v(1)
val p = v(0).split(":")
val key = p.last
val path = p.dropRight(1).toList
Config.global.addValue(key, value, path)
}
if (args(t) == "--logging_level" || args(t) == "-l") {
args(t + 1).toLowerCase match {
case "debug" => Logging.logger.setLevel(org.apache.log4j.Level.DEBUG)
......
......@@ -20,19 +20,16 @@ import nl.lumc.sasc.biopet.core.Logging
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.ConfigUtils._
import scala.reflect.io.Directory
/**
* This class can store nested config values
* @param map Map with value for new config
* @constructor Load config with existing map
*/
class Config(var map: Map[String, Any]) extends Logging {
class Config(var map: Map[String, Any],
protected[core] var defaults: Map[String, Any] = Map()) extends Logging {
logger.debug("Init phase of config")
/**
* Default constructor
*/
/** Default constructor */
def this() = {
this(Map())
loadDefaultConfig()
......@@ -41,15 +38,16 @@ class Config(var map: Map[String, Any]) extends Logging {
/**
* Loading a environmental variable as location of config files to merge into the config
* @param valueName Name of value
* @param default if true files are added to default instead of normal map
*/
def loadConfigEnv(valueName: String) {
def loadConfigEnv(valueName: String, default: Boolean) {
sys.env.get(valueName) match {
case Some(globalFiles) => {
for (globalFile <- globalFiles.split(":")) {
val file: File = new File(globalFile)
if (file.exists) {
logger.info("Loading config file: " + file)
loadConfigFile(file)
loadConfigFile(file, default)
} else logger.warn(valueName + " value found but file '" + file + "' does not exist, no global config is loaded")
}
}
......@@ -57,23 +55,39 @@ class Config(var map: Map[String, Any]) extends Logging {
}
}
/**
* Loading default value for biopet
*/
/** Loading default value for biopet */
def loadDefaultConfig() {
loadConfigEnv("BIOPET_CONFIG")
loadConfigEnv("BIOPET_CONFIG", true)
}
/**
* Merge a json file into the config
* @param configFile Location of file
*/
def loadConfigFile(configFile: File) {
def loadConfigFile(configFile: File, default: Boolean = false) {
val configMap = fileToConfigMap(configFile)
if (default) {
if (defaults.isEmpty) defaults = configMap
else defaults = mergeMaps(configMap, defaults)
logger.debug("New defaults: " + defaults)
} else {
if (map.isEmpty) map = configMap
else map = mergeMaps(configMap, map)
logger.debug("New config: " + map)
}
}
if (map.isEmpty) map = configMap
else map = mergeMaps(configMap, map)
logger.debug("New config: " + map)
/**
* Add a single vallue to the config
* @param key key of value
* @param value value itself
* @param path Path to value
* @param default if true value is put in default map
*/
def addValue(key: String, value: Any, path: List[String] = Nil, default: Boolean = false): Unit = {
val valueMap = path.foldRight(Map(key -> value))((a, b) => Map(a -> b))
if (default) defaults = mergeMaps(valueMap, defaults)
else map = mergeMaps(valueMap, map)
}
protected[config] var notFoundCache: List[ConfigValueIndex] = List()
......
......@@ -19,64 +19,34 @@ import java.io.File
import nl.lumc.sasc.biopet.utils.ConfigUtils._
class ConfigValue(val requestIndex: ConfigValueIndex, val foundIndex: ConfigValueIndex, val value: Any, val default: Boolean) {
/**
* Get value as String
* @return value as String
*/
/** Get value as String */
def asString = any2string(value)
/**
* Get value as File
* @return value as File
*/
/** Get value as File */
def asFile = new File(any2string(value))
/**
* Get value as Int
* @return value as Int
*/
/** Get value as Int */
def asInt = any2int(value)
/**
* Get value as Double
* @return value as Double
*/
/** Get value as Double */
def asDouble = any2double(value)
/**
* Get value as List[Any]
* @return value as List[Any]
*/
/** Get value as List[Any] */
def asList = any2list(value)
/**
* Get value as List[File]
* @return value as List[File]
*/
/** Get value as List[File] */
def asFileList: List[File] = for (file <- any2stringList(value)) yield new File(file)
/**
* Get value as List[String]
* @return value as List[String]
*/
/** Get value as List[String] */
def asStringList: List[String] = any2stringList(value)
/**
* Get value as Map
* @return value as Map
*/
/** Get value as Map */
def asMap = any2map(value)
/**
* Get value as Boolean
* @return value as Boolean
*/
/** Get value as Boolean */
def asBoolean = any2boolean(value)
/**
* Readable output of indexes and value, just for debug
* @return
*/
/** Readable output of indexes and value, just for debug */
override def toString: String = {
var output = "key = " + requestIndex.key
output += ", value = " + value
......
......@@ -15,9 +15,7 @@
*/
package nl.lumc.sasc.biopet.core.config
import nl.lumc.sasc.biopet.core.Logging
import nl.lumc.sasc.biopet.utils.ConfigUtils.ImplicitConversions
import scala.collection.JavaConversions._
trait Configurable extends ImplicitConversions {
/** Should be object of parant object */
......@@ -39,7 +37,7 @@ trait Configurable extends ImplicitConversions {
/** Map to store defaults for config */
def defaults: Map[String, Any] = {
if (root != null) root.defaults
else Map()
else globalConfig.defaults
}
val config = new ConfigFunctions
......@@ -52,15 +50,13 @@ trait Configurable extends ImplicitConversions {
* @param submodule
* @return
*/
def path(sample: String = null, library: String = null, submodule: String = null) = {
def getConfigPath(sample: String = null, library: String = null, submodule: String = null) = {
(if (sample != null) "samples" :: sample :: Nil else Nil) :::
(if (library != null) "libraries" :: library :: Nil else Nil) :::
(if (submodule != null) configPath ::: configName :: Nil else configPath)
}
/**
* Class is used for retrieval of config values
*/
/** Class is used for retrieval of config values */
protected class ConfigFunctions(val defaultSample: Option[String] = None, val defaultLibrary: Option[String] = None) {
def this(defaultSample: String, defaultLibrary: String) = {
this(defaultSample = Some(defaultSample), defaultLibrary = Some(defaultLibrary))
......@@ -91,11 +87,12 @@ trait Configurable extends ImplicitConversions {
submodule: String = null,
freeVar: Boolean = true,
sample: String = null,
library: String = null): ConfigValue = {
library: String = null,
path: List[String] = null): ConfigValue = {
val s = if (sample != null || defaultSample.isEmpty) sample else defaultSample.get
val l = if (library != null || defaultLibrary.isEmpty) library else defaultLibrary.get
val m = if (submodule != null) submodule else configName
val p = path(s, l, submodule)
val p = if (path == null) getConfigPath(s, l, submodule) else path
val d = {
val value = Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar))
if (value.isDefined) value.get.value else default
......@@ -117,11 +114,12 @@ trait Configurable extends ImplicitConversions {
submodule: String = null,
freeVar: Boolean = true,
sample: String = null,
library: String = null) = {
library: String = null,
path: List[String] = null) = {
val s = if (sample != null || defaultSample.isEmpty) sample else defaultSample.get
val l = if (library != null || defaultLibrary.isEmpty) library else defaultLibrary.get
val m = if (submodule != null) submodule else configName
val p = path(s, l, submodule)
val p = if (path == null) getConfigPath(s, l, submodule) else path
globalConfig.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None)
}
......
......@@ -15,17 +15,17 @@
*/
package nl.lumc.sasc.biopet.core.summary
import java.io.{ FileInputStream, PrintWriter, File }
import java.security.MessageDigest
import java.io.{ File, PrintWriter }
import scala.collection.mutable
import scala.io.Source
import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, BiopetCommandLineFunction, BiopetCommandLineFunctionTrait, SampleLibraryTag }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.function.{ QFunction, InProcessFunction }
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.mutable
import scala.io.Source
import nl.lumc.sasc.biopet.{ LastCommitHash, Version }
import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, BiopetCommandLineFunction, BiopetCommandLineFunctionTrait, SampleLibraryTag }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.utils.ConfigUtils
/**
* Created by pjvan_thof on 2/14/15.
......@@ -106,9 +106,13 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
(v1: Any, v2: Any, key: String) => summarizable.resolveSummaryConflict(v1, v2, key))
}).foldRight(pipelineMap)((a, b) => ConfigUtils.mergeMaps(a._1, b, a._2))
val combinedMap = (for (qscript <- qscript.summaryQScripts) yield {
ConfigUtils.fileToConfigMap(qscript.summaryFile)
}).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b))
val combinedMap = Map("meta" -> Map(
"last_commit_hash" -> LastCommitHash,
"pipeline_version" -> Version
)) ++
(for (qscript <- qscript.summaryQScripts) yield {
ConfigUtils.fileToConfigMap(qscript.summaryFile)
}).foldRight(jobsMap)((a, b) => ConfigUtils.mergeMaps(a, b))
val writer = new PrintWriter(out)
writer.println(ConfigUtils.mapToJson(combinedMap).spaces4)
......
......@@ -57,6 +57,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
var maxbts: Option[Int] = config("maxbts")
var strata: Boolean = config("strata", default = false)
var maqerr: Option[Int] = config("maqerr")
var maxins: Option[Int] = config("maxins")
/** return commandline to execute */
def cmdLine = {
......@@ -72,6 +73,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
optional("-m", m) +
optional("--maxbts", maxbts) +
optional("--maqerr", maqerr) +
optional("--maxins", maxins) +
required(reference) +
(R2 match {
case Some(r2) => {
......
......@@ -30,7 +30,7 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools {
@Input(doc = "Reference fasta")
var reference: File = config("reference")
@Input(doc = "Interval bed")
@Input(doc = "Interval bed", required = false)
var intervalBed: Option[File] = config("interval_bed")
var disableBaq: Boolean = config("disable_baq", default = false)
......
......@@ -56,10 +56,10 @@ object AnnotateVcfWithBed extends ToolCommand {
class OptParser extends AbstractOptParser {
opt[File]('I', "inputFile") required () unbounded () valueName ("<vcf file>") action { (x, c) =>
c.copy(inputFile = x)
} text ("out is a required file property")
} text ("Input is a required file property")
opt[File]('B', "bedFile") required () unbounded () valueName ("<bed file>") action { (x, c) =>
c.copy(bedFile = x)
} text ("out is a required file property")
} text ("Bedfile is a required file property")
opt[File]('o', "output") required () unbounded () valueName ("<vcf file>") action { (x, c) =>
c.copy(outputFile = x)
} text ("out is a required file property")
......@@ -106,7 +106,11 @@ object AnnotateVcfWithBed extends ToolCommand {
val reader = new VCFFileReader(commandArgs.inputFile, false)
val header = reader.getFileHeader
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputFile).build)
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
setOutputFile(commandArgs.outputFile).
setReferenceDictionary(header.getSequenceDictionary).
build)
val fieldType = commandArgs.fieldType match {
case "Integer" => VCFHeaderLineType.Integer
case "Flag" => VCFHeaderLineType.Flag
......
......@@ -81,13 +81,14 @@ object CheckAllelesVcfInBam extends ToolCommand {
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
if (commandArgs.bamFiles.size != commandArgs.samples.size)
logger.warn("Number of samples is diffrent then number of bam files, left over will be removed")
logger.warn("Number of samples is different from number of bam files: additional samples or bam files will not be used")
val samReaderFactory = SamReaderFactory.makeDefault
val bamReaders: Map[String, SamReader] = Map(commandArgs.samples zip commandArgs.bamFiles.map(x => samReaderFactory.open(x)): _*)
val bamHeaders = bamReaders.map(x => (x._1, x._2.getFileHeader))
val reader = new VCFFileReader(commandArgs.inputFile, false)
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputFile).build)
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputFile).
setReferenceDictionary(reader.getFileHeader.getSequenceDictionary).build)
val header = reader.getFileHeader
for ((sample, _) <- bamReaders) {
......
......@@ -146,9 +146,8 @@ object ExtractAlignedFastq extends ToolCommand {
def extractReads(memFunc: FastqInput => Boolean,
inputFastq1: FastqReader, outputFastq1: BasicFastqWriter): Unit =
inputFastq1.iterator.asScala
.zip(Iterator.continually(None))
.filter(rec => memFunc(rec._1, rec._2))
.foreach(rec => outputFastq1.write(rec._1))
.filter(rec => memFunc((rec, None)))
.foreach(rec => outputFastq1.write(rec))
/**
* Extracts reads from the given input Fastq pairs and writes to new output Fastq pair files
......@@ -261,17 +260,25 @@ object ExtractAlignedFastq extends ToolCommand {
logger.info("Writing to output file(s) ...")
(commandArgs.inputFastq2, commandArgs.outputFastq2) match {
case (None, None) => extractReads(memFunc,
new FastqReader(commandArgs.inputFastq1),
new BasicFastqWriter(commandArgs.inputFastq1))
case (Some(i2), Some(o2)) => extractReads(memFunc,
new FastqReader(commandArgs.inputFastq1),
new BasicFastqWriter(commandArgs.outputFastq1),
new FastqReader(i2),
new BasicFastqWriter(o2))
case _ => // handled by the command line config check above
case (None, None) =>
val in = new FastqReader(commandArgs.inputFastq1)
val out = new BasicFastqWriter(commandArgs.outputFastq1)
extractReads(memFunc, in, out)
in.close()
out.close()
case (Some(i2), Some(o2)) =>
val in1 = new FastqReader(commandArgs.inputFastq1)
val in2 = new FastqReader(i2)
val out1 = new BasicFastqWriter(commandArgs.outputFastq1)
val out2 = new BasicFastqWriter(o2)
extractReads(memFunc, in1, out1, in2, out2)
in1.close()
in2.close()
out1.close()
out2.close()
case _ => ; // handled by the command line config check above
}
}
}
......@@ -93,7 +93,10 @@ object MergeAlleles extends ToolCommand {
val readers = commandArgs.inputFiles.map(new VCFFileReader(_, true))
val referenceFile = new FastaSequenceFile(commandArgs.reference, true)
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputFile).build)
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
setReferenceDictionary(referenceFile.getSequenceDictionary).
setOutputFile(commandArgs.outputFile).
build)
val header = new VCFHeader
val referenceDict = referenceFile.getSequenceDictionary
header.setSequenceDictionary(referenceDict)
......
......@@ -53,7 +53,10 @@ object SamplesTsvToJson extends ToolCommand {
val values = tsvLine.split("\t")
val sample = values(sampleColumn)
val library = if (libraryColumn != -1) values(libraryColumn) else null
val valuesMap = (for (t <- 0 until values.size if t != sampleColumn if t != libraryColumn) yield (header(t) -> values(t))).toMap
val valuesMap = (for (
t <- 0 until values.size;
if !values(t).isEmpty && t != sampleColumn && t != libraryColumn
) yield (header(t) -> values(t))).toMap
val map: Map[String, Any] = if (library != null) {
Map("samples" -> Map(sample -> Map("libraries" -> Map(library -> valuesMap))))
} else {
......
......@@ -59,14 +59,14 @@ object VcfFilter extends ToolCommand {
case class Args(inputVcf: File = null,
outputVcf: File = null,
invertedOutputVcf: Option[File] = None,
minQualscore: Option[Double] = None,
minQualScore: Option[Double] = None,
minSampleDepth: Int = -1,
minTotalDepth: Int = -1,
minAlternateDepth: Int = -1,
minSamplesPass: Int = 0,
minBamAlternateDepth: Int = 0,
mustHaveVariant: List[String] = Nil,
denovoInSample: String = null,
deNovoInSample: String = null,
diffGenotype: List[(String, String)] = Nil,
filterHetVarToHomVar: List[(String, String)] = Nil,
filterRefCalls: Boolean = false,
......@@ -98,8 +98,8 @@ object VcfFilter extends ToolCommand {
opt[Int]("minBamAlternateDepth") unbounded () valueName ("<int>") action { (x, c) =>
c.copy(minBamAlternateDepth = x)
} // TODO: Convert this to more generic filter
opt[String]("denovoInSample") maxOccurs (1) unbounded () valueName ("<sample>") action { (x, c) =>
c.copy(denovoInSample = x)