Commit 2754750c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into fix-BIOPET-486

parents 6e79e1f6 bae8c53d
......@@ -18,9 +18,8 @@ import java.io.File
import htsjdk.samtools.reference.IndexedFastaSequenceFile
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.utils.{ LazyCheck, BamUtils, ConfigUtils, FastaUtils, Logging }
import nl.lumc.sasc.biopet.utils._
import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, FastaUtils, Logging }
import scala.collection.JavaConversions._
......@@ -177,4 +176,33 @@ object Reference {
if (!dict.exists()) Logging.addError("Reference is missing a dict file")
}
}
def askReference: Map[String, Any] = {
val warn = "If you use a non-standard reference, please make sure that you have generated all required indexes for this reference"
val globalSpecies = Config.global.defaults.getOrElse("references", Map()).asInstanceOf[Map[String, Any]]
val species = Question.string("species",
description = Some(if (globalSpecies.nonEmpty)
s"""Species found in general config:
|- ${globalSpecies.keys.toList.sorted.mkString("\n- ")}
|$warn
|""".stripMargin
else s"No references found in global config. $warn"))
val globalReferences = globalSpecies.getOrElse(species, Map()).asInstanceOf[Map[String, Any]]
val referenceName = Question.string("reference_name",
description = Some(if (globalReferences.nonEmpty)
s"""Reference for $species found in general config:
|- ${globalReferences.keys.toList.sorted.mkString("\n- ")}
|$warn
|""".stripMargin
else s"No references found in global config. $warn"))
val reference = globalReferences.getOrElse(referenceName, Map()).asInstanceOf[Map[String, Any]]
val referenceFasta: Option[String] = if (reference.contains("reference_fasta")) None else {
Some(Question.string("Reference Fasta", validation = List(TemplateTool.isAbsolutePath, TemplateTool.mustExist),
description = Some(s"No fasta file found for $species -> $referenceName")))
}
Map("species" -> species, "reference_name" -> referenceName) ++ referenceFasta.map("reference_fasta" -> _)
}
}
package nl.lumc.sasc.biopet.core
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.utils.summary.Summary
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Question, ToolCommand }
import scala.io.Source
/**
* Created by pjvanthof on 17/12/2016.
*/
trait TemplateTool extends ToolCommand {
import TemplateTool._
case class Args(outputConfig: File = null,
runScript: Option[File] = None,
expert: Boolean = false,
template: Option[File] = None) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('o', "outputConfig") required () valueName "<file>" action { (x, c) =>
c.copy(outputConfig = x)
} text "Path to output config"
opt[File]('s', "outputScript") valueName "<file>" action { (x, c) =>
c.copy(runScript = Some(x))
} text "Path to output script"
opt[File]('t', "template") valueName "<file>" action { (x, c) =>
c.copy(template = Some(x))
} text "Path to template. By default it will try to fetch this from the ENV value 'BIOPET_SCRIPT_TEMPLATE'"
opt[Unit]("expert") action { (x, c) =>
c.copy(expert = true)
} text "This enables expert options / questions"
}
/**
* Program will split fastq file in multiple fastq files
*
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
cmdArgs.runScript.foreach(writeScript(_, cmdArgs.outputConfig, sampleConfigs, cmdArgs.template))
val standard: Map[String, Any] = Map("output_dir" -> Question.string("Output directory",
validation = List(isAbsolutePath, parentIsWritable)))
val config = pipelineMap(standard, cmdArgs.expert)
val configWriter = new PrintWriter(cmdArgs.outputConfig)
configWriter.println(ConfigUtils.mapToYaml(config))
configWriter.close()
}
def writeScript(outputFile: File, config: File, samples: List[File], t: Option[File]): Unit = {
val template = t match {
case Some(f) => f
case _ => sys.env.get("BIOPET_SCRIPT_TEMPLATE") match {
case Some(file) => new File(file)
case _ => throw new IllegalArgumentException("No template found on argument or 'BIOPET_SCRIPT_TEMPLATE'")
}
}
val templateReader = Source.fromFile(template)
val scriptWriter = new PrintWriter(outputFile)
val biopetArgs: String = (config :: samples).map(_.getAbsolutePath).mkString("-config ", " \\\n-config ", "")
templateReader.getLines().mkString("\n").format(pipelineName, biopetArgs).foreach(scriptWriter.print)
templateReader.close()
scriptWriter.close()
outputFile.setExecutable(true, false)
}
def pipelineName: String
def pipelineMap(map: Map[String, Any], expert: Boolean): Map[String, Any]
def sampleConfigs: List[File] = Nil
}
object TemplateTool {
def isAbsolutePath(value: String): Boolean = {
if (new File(value).isAbsolute) true else {
println(s"'$value' must be a absulute path")
false
}
}
def mustExist(value: String): Boolean = {
if (new File(value).exists()) true else {
println(s"'$value' does not exist")
false
}
}
def parentIsWritable(value: String): Boolean = {
val parent = new File(value).getParentFile
if (!parent.exists()) {
println(s"$parent does not exist")
false
} else if (!parent.canRead) {
println(s"No permission to read $parent")
false
} else if (!parent.canWrite) {
println(s"No permission to write $parent")
false
} else true
}
def askSampleConfigs(currentList: List[File] = Nil): List[File] = {
val configFile = new File(Question.string("Sample config file", validation = List(mustExist, isAbsolutePath)))
val configMap = new Summary(configFile)
println(s"${configMap.samples.size} samples found in config " +
s"with in total ${configMap.libraries.map(_._2.size).sum} libraries for '$configFile'")
if (Question.boolean("Is this correct?")) {
if (Question.boolean("Add more sample configs?")) askSampleConfigs(configFile :: currentList)
else {
val files = configFile :: currentList
if (files.size > 1) {
val configs = files.map(f => new Summary(ConfigUtils.fileToConfigMap(f)))
val sizes = configs.map(x => (x.samples.size, x.libraries.map(_._2.size).sum))
val samples = configs.flatMap(_.samples.toList)
val libs = configs.flatMap(_.libraries.flatMap { case (s, libs) => libs.toList.map(l => (s, l)) })
val mergedConfig = new Summary(configs.foldLeft(Map[String, Any]())((a, b) => ConfigUtils.mergeMaps(a, b.map)))
val mergesSamples = mergedConfig.samples.size
val mergesLibraries = mergedConfig.libraries.map(_._2.size).sum
if (mergesSamples != samples.size) {
val overlappingSamples = samples.groupBy(s1 => samples.count(s2 => s1 == s2)).filter(_._1 > 1).flatMap(_._2).toList.distinct
println("WARNING: Overlapping samples detected:")
overlappingSamples.foreach(s => println(s" - $s"))
}
if (mergesLibraries != libs.size) {
val overlappingLibs = libs.groupBy(l1 => libs.count(l2 => l1 == l2)).filter(_._1 > 1).flatMap(_._2).toList.distinct
println("WARNING: Overlapping libraries detected")
overlappingLibs.foreach(l => println(s" - ${l._1} -> ${l._2}"))
}
println(s"$mergesSamples samples found in merged config with in total $mergesLibraries libraries")
if (Question.boolean("Is this correct?")) files
else {
println("Resetting sample configs")
askSampleConfigs()
}
} else files
}
} else askSampleConfigs(currentList)
}
}
\ No newline at end of file
......@@ -43,4 +43,10 @@ object BiopetExecutableMain extends BiopetExecutable {
)
def tools: List[MainCommand] = PipelineStatus :: BiopetToolsExecutable.tools
def templates: List[MainCommand] = List(
nl.lumc.sasc.biopet.pipelines.mapping.template.MultiSampleMapping,
nl.lumc.sasc.biopet.pipelines.shiva.template.Shiva,
nl.lumc.sasc.biopet.pipelines.gentrap.template.Gentrap
)
}
......@@ -53,4 +53,6 @@ object BiopetToolsExecutable extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.VepNormalizer,
nl.lumc.sasc.biopet.tools.WipeReads,
nl.lumc.sasc.biopet.tools.DownloadNcbiAssembly)
def templates: List[MainCommand] = List()
}
......@@ -30,9 +30,12 @@ trait BiopetExecutable extends Logging {
def tools: List[MainCommand]
def templates: List[MainCommand]
val modules: Map[String, List[MainCommand]] = Map(
"pipeline" -> pipelines,
"tool" -> tools)
"tool" -> tools,
"template" -> templates)
/**
* @param args the command line arguments
......
......@@ -65,22 +65,20 @@ object ConfigUtils extends Logging {
*/
def mergeMaps(map1: Map[String, Any], map2: Map[String, Any],
resolveConflict: (Any, Any, String) => Any = (m1, m2, key) => m1): Map[String, Any] = {
var newMap: Map[String, Any] = Map()
for (key <- map1.keySet.++(map2.keySet)) {
if (!map2.contains(key)) newMap += (key -> map1(key))
else if (!map1.contains(key)) newMap += (key -> map2(key))
(for (key <- map1.keySet.++(map2.keySet)) yield {
if (!map2.contains(key)) (key -> map1(key))
else if (!map1.contains(key)) (key -> map2(key))
else {
map1(key) match {
case m1: Map[_, _] =>
map2(key) match {
case m2: Map[_, _] => newMap += (key -> mergeMaps(any2map(m1), any2map(m2), resolveConflict))
case _ => newMap += (key -> map1(key))
case m2: Map[_, _] => (key -> mergeMaps(any2map(m1), any2map(m2), resolveConflict))
case _ => (key -> map1(key))
}
case _ => newMap += (key -> resolveConflict(map1(key), map2(key), key))
case _ => (key -> resolveConflict(map1(key), map2(key), key))
}
}
}
newMap
}).toMap
}
/**
......
package nl.lumc.sasc.biopet.utils
/**
* Created by pjvanthof on 16/12/2016.
*/
object Question {
def string(name: String,
default: Option[String] = None,
description: Option[String] = None,
posibleValues: List[String] = Nil,
validation: List[(String) => Boolean] = Nil): String = {
description.foreach(println)
if (posibleValues.nonEmpty) println(s"possible values: ${posibleValues.mkString(", ")}")
default.foreach(x => println(s"Default value: $x"))
print(s"$name > ")
(Console.readLine.trim, default) match {
case (a, Some(d)) if a.isEmpty => d
case (a, None) if a.isEmpty =>
println("ERROR: Value is required")
string(name, default, description, posibleValues, validation)
case (a, _) =>
if (!validation.forall(_(a))) {
println("ERROR: Validation of failed")
string(name, default, description, posibleValues, validation)
} else if (posibleValues.nonEmpty && !posibleValues.contains(a)) {
println("ERROR: Value not allowed")
string(name, default, description, posibleValues, validation)
} else a
}
}
def boolean(name: String,
default: Option[Boolean] = None,
description: Option[String] = None): Boolean = {
description.foreach(println)
default.foreach(x => println(s"Default value: $x"))
print(s"$name (y/n) > ")
Console.readLine.trim.toLowerCase match {
case "" => default match {
case Some(d) => d
case _ =>
println("ERROR: Value is required")
boolean(name, default, description)
}
case "y" | "yes" | "true" => true
case "n" | "no" | "false" => false
case _ =>
println("ERROR: Value is a boolean value, please select 'y' of 'n'")
boolean(name, default, description)
}
}
def list(name: String,
default: Option[List[String]] = None,
description: Option[String] = None,
posibleValues: List[String] = Nil,
validation: (String) => Boolean = String => true): List[String] = {
description.foreach(println)
if (posibleValues.nonEmpty) println(s"possible values: ${posibleValues.mkString(", ")}")
default.foreach(x => println(s"Default value: $x"))
print(s"$name > ")
(Console.readLine.split(",").toList.map(_.trim), default) match {
case (List(""), Some(d)) => d
case (List(""), None) =>
println("ERROR: Value is required")
list(name, default, description, posibleValues, validation)
case (a, _) =>
if (!a.forall(validation)) {
println("ERROR: Validation of failed")
list(name, default, description, posibleValues, validation)
} else if (posibleValues.nonEmpty && !a.forall(posibleValues.contains)) {
println("ERROR: Value not allowed")
list(name, default, description, posibleValues, validation)
} else a
}
}
}
......@@ -47,11 +47,11 @@ class Config(protected var _map: Map[String, Any],
for (globalFile <- globalFiles.split(":")) {
val file: File = new File(globalFile)
if (file.exists) {
logger.info("Loading config file: " + file)
logger.debug("Loading config file: " + file)
loadConfigFile(file, default)
} else logger.warn(valueName + " value found but file '" + file + "' does not exist, no global config is loaded")
}
case _ => logger.info(valueName + " value not found, no global config is loaded")
case _ => logger.debug(valueName + " value not found, no global config is loaded")
}
}
......
......@@ -101,6 +101,30 @@ package object utils {
}
}
/** Converts string with underscores into camel-case strings */
def camelize(ustring: String): String = ustring
.split("_")
.map(_.toLowerCase.capitalize)
.mkString("")
/** Split camelcase to separated words */
def camelizeToWords(string: String, current: List[String] = Nil): List[String] = {
if (string.nonEmpty) {
val char = string.tail.find(!_.isLower)
char match {
case Some(c) =>
val index = string.indexOf(c, 1)
camelizeToWords(string.drop(index), current ::: List(string.take(index)))
case _ => current ::: List(string)
}
} else current
}
/** Convert camelcase to underscores */
def unCamelize(string: String): String = {
camelizeToWords(string).map(_.toLowerCase).mkString("_")
}
/** Function to sort Any values */
def sortAnyAny(a: Any, b: Any): Boolean = {
a match {
......
......@@ -23,8 +23,9 @@ import nl.lumc.sasc.biopet.utils.ConfigUtils
*
* Created by pjvan_thof on 3/26/15.
*/
class Summary(file: File) {
val map = ConfigUtils.fileToConfigMap(file)
class Summary(val map: Map[String, Any]) {
def this(file: File) = this(ConfigUtils.fileToConfigMap(file))
/** List of all samples in the summary */
lazy val samples: Set[String] = {
......
......@@ -30,4 +30,6 @@ object ExecutableExample extends BiopetExecutable {
/** This list defines the (biopet)tools that are usable from the executable */
def tools: List[MainCommand] = Nil
def templates: List[MainCommand] = List()
}
......@@ -204,7 +204,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
fqSync.outputFastq2 = new File(outDir, fastqR2Qc.get.getName)
fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
val pipe = new BiopetFifoPipe(this, fqSync :: Nil) with Summarizable {
val pipe = new BiopetFifoPipe(this, fqSync :: qcCmdR1.jobs ::: qcCmdR2.jobs) with Summarizable {
override def configNamespace = "qc_cmd"
override def beforeGraph(): Unit = {
......@@ -233,6 +233,8 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
pipe.deps ::= fastqcR1.output
pipe.deps ::= fastqcR2.output
pipe.deps ::= R1_in
pipe.deps ::= R2_in.get
pipe.isIntermediate = !keepQcFastqFiles
addSummarizable(pipe, "qc_cmd")
add(pipe)
......
......@@ -24,6 +24,7 @@ import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling
import nl.lumc.sasc.biopet.utils.{ LazyCheck, Logging }
import nl.lumc.sasc.biopet.utils.config._
import nl.lumc.sasc.biopet.utils.camelize
import org.broadinstitute.gatk.queue.QScript
import picard.analysis.directed.RnaSeqMetricsCollector.StrandSpecificity
import java.io.File
......@@ -54,7 +55,7 @@ class Gentrap(val root: Configurable) extends QScript
// see the enumeration below for valid modes
lazy val expMeasures = new LazyCheck({
config("expression_measures", default = Nil).asStringList.map(value =>
ExpMeasures.values.find(_.toString == Gentrap.camelize(value)) match {
ExpMeasures.values.find(_.toString == camelize(value)) match {
case Some(v) => v
case _ => throw new IllegalArgumentException(s"'$value' is not a valid Expression measurement")
}
......@@ -64,7 +65,7 @@ class Gentrap(val root: Configurable) extends QScript
/** Strandedness modes */
lazy val strandProtocol = new LazyCheck({
val value: String = config("strand_protocol")
StrandProtocol.values.find(_.toString == Gentrap.camelize(value)) match {
StrandProtocol.values.find(_.toString == camelize(value)) match {
case Some(v) => v
case other =>
Logging.addError(s"'$other' is no strand_protocol or strand_protocol is not given")
......@@ -240,7 +241,7 @@ object Gentrap extends PipelineCommand {
/** Enumeration of available expression measures */
object ExpMeasures extends Enumeration {
val FragmentsPerGene, FragmentsPerExon, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value
val FragmentsPerGene, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value
}
/** Enumeration of available strandedness */
......@@ -248,10 +249,4 @@ object Gentrap extends PipelineCommand {
// for now, only non-strand specific and dUTP stranded protocol is supported
val NonSpecific, Dutp = Value
}
/** Converts string with underscores into camel-case strings */
private[gentrap] def camelize(ustring: String): String = ustring
.split("_")
.map(_.toLowerCase.capitalize)
.mkString("")
}
package nl.lumc.sasc.biopet.pipelines.gentrap.template
import java.io.File
import nl.lumc.sasc.biopet.core.TemplateTool
import nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap.{ ExpMeasures, StrandProtocol }
import nl.lumc.sasc.biopet.pipelines.mapping.template.MultiSampleMapping
import nl.lumc.sasc.biopet.pipelines.shiva.template.Shiva
import nl.lumc.sasc.biopet.utils._
/**
* Created by pjvanthof on 17/12/2016.
*/
object Gentrap extends TemplateTool {
def pipelineName = "Gentrap"
override def sampleConfigs: List[File] = TemplateTool.askSampleConfigs()
def pipelineMap(map: Map[String, Any], expert: Boolean): Map[String, Any] = {
val aligner = Question.string("Aligner", posibleValues = MultiSampleMapping.possibleAligners,
default = Some("gsnap"))
val mappingConfig = MultiSampleMapping.pipelineMap(map ++ Map("aligner" -> aligner), expert)
val expressionMeasures = Question.list("ExpressionMeasures",
posibleValues = ExpMeasures.values.map(x => unCamelize(x.toString)).toList)
val strandProtocol = Question.list("StrandProtocol",
posibleValues = StrandProtocol.values.map(x => unCamelize(x.toString)).toList)
val annotationRefFlat = Question.string("annotationRefFlat",
validation = List(TemplateTool.isAbsolutePath, TemplateTool.mustExist))
val annotationGtf = if (expressionMeasures.contains(unCamelize(ExpMeasures.FragmentsPerGene.toString)) ||
expressionMeasures.exists(_.startsWith("cufflinks")))
Some(Question.string("annotationGtf",
validation = List(TemplateTool.isAbsolutePath, TemplateTool.mustExist)))
else None
mappingConfig ++ annotationGtf.map("annotation_gtf" -> _) ++ Map(
"expression_measures" -> expressionMeasures,
"strand_protocol" -> strandProtocol,
"annotation_refflat" -> annotationRefFlat
) ++ (if (Question.boolean("Call variants")) {
val variantCallers = Question.list("Variantcallers", posibleValues = Shiva.possibleVariantcallers,
default = Some(List("varscan_cns_singlesample")))
Map("call_variants" -> true, "variantcallers" -> variantCallers)
} else Map("call_variants" -> false))
}
}
......@@ -24,6 +24,7 @@ import nl.lumc.sasc.biopet.extensions.hisat.Hisat2
import nl.lumc.sasc.biopet.extensions.tools.{ BaseCounter, WipeReads }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Config
import nl.lumc.sasc.biopet.utils.camelize
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
......@@ -97,10 +98,10 @@ abstract class GentrapTestAbstract(val expressionMeasures: List[String]) extends
gentrap.shivaVariantcalling.isDefined shouldBe callVariants.getOrElse(false)
gentrap.summarySettings.getOrElse("expression_measures", List()).asInstanceOf[List[String]].sorted shouldBe
expressionMeasures.map(Gentrap.camelize(_)).sorted
expressionMeasures.map(camelize(_)).sorted
gentrap.summarySettings.get("call_variants") shouldBe Some(callVariants.getOrElse(false))
gentrap.summarySettings.get("remove_ribosomal_reads") shouldBe Some(removeRiboReads.getOrElse(false))
gentrap.summarySettings.get("strand_protocol") shouldBe Some(Gentrap.camelize(strandProtocol))
gentrap.summarySettings.get("strand_protocol") shouldBe Some(camelize(strandProtocol))
if (expressionMeasures.contains("fragments_per_gene"))
assert(gentrap.functions.exists(_.isInstanceOf[HtseqCount]))
......
......@@ -19,11 +19,11 @@ import java.util.Date
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.bowtie.{ Bowtie, Bowtie2 }
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaAln, BwaMem, BwaSampe, BwaSamse }
import nl.lumc.sasc.biopet.extensions.bowtie.{Bowtie, Bowtie2}
import nl.lumc.sasc.biopet.extensions.bwa.{BwaAln, BwaMem, BwaSampe, BwaSamse}
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap