diff --git a/.idea/libraries/Maven__org_scala_lang_scala_library_2_11_2.xml b/.idea/libraries/Maven__org_scala_lang_scala_library_2_11_2.xml index 4974624050ae58e9934ee891dcf7d7c92dd06127..c2e5403fca7594c3ebd3a04af4eb51aacde2b881 100644 --- a/.idea/libraries/Maven__org_scala_lang_scala_library_2_11_2.xml +++ b/.idea/libraries/Maven__org_scala_lang_scala_library_2_11_2.xml @@ -1,5 +1,12 @@ <component name="libraryTable"> - <library name="Maven: org.scala-lang:scala-library:2.11.2"> + <library name="Maven: org.scala-lang:scala-library:2.11.2" type="Scala"> + <properties> + <compiler-classpath> + <root url="file://$MAVEN_REPOSITORY$/org/scala-lang/scala-compiler/2.11.2/scala-compiler-2.11.2.jar" /> + <root url="file://$MAVEN_REPOSITORY$/org/scala-lang/scala-library/2.11.2/scala-library-2.11.2.jar" /> + <root url="file://$MAVEN_REPOSITORY$/org/scala-lang/scala-reflect/2.11.2/scala-reflect-2.11.2.jar" /> + </compiler-classpath> + </properties> <CLASSES> <root url="jar://$MAVEN_REPOSITORY$/org/scala-lang/scala-library/2.11.2/scala-library-2.11.2.jar!/" /> </CLASSES> diff --git a/.idea/libraries/Maven__org_scalaz_scalaz_core_2_11_7_1_0.xml b/.idea/libraries/Maven__org_scalaz_scalaz_core_2_11_7_1_0.xml index 3c580f0594f1d9639ee664960bcd99689b6cfb36..8cd45ed1bd94557efd9d1bb7c6bbc4c9ffd3835d 100644 --- a/.idea/libraries/Maven__org_scalaz_scalaz_core_2_11_7_1_0.xml +++ b/.idea/libraries/Maven__org_scalaz_scalaz_core_2_11_7_1_0.xml @@ -5,6 +5,7 @@ </CLASSES> <JAVADOC> <root url="jar://$MAVEN_REPOSITORY$/org/scalaz/scalaz-core_2.11/7.1.0/scalaz-core_2.11-7.1.0-javadoc.jar!/" /> + <root url="http://docs.typelevel.org/api/scalaz/stable/7.1.0-M3/doc/#package" /> </JAVADOC> <SOURCES> <root url="jar://$MAVEN_REPOSITORY$/org/scalaz/scalaz-core_2.11/7.1.0/scalaz-core_2.11-7.1.0-sources.jar!/" /> diff --git a/.idea/scala_compiler.xml b/.idea/scala_compiler.xml new file mode 100644 index 0000000000000000000000000000000000000000..d79c187ea191fa6da573822d1baaf01c03ce5590 --- /dev/null +++ b/.idea/scala_compiler.xml @@ -0,0 +1,11 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ScalaCompilerConfiguration"> + <option name="deprecationWarnings" value="true" /> + <parameters> + <parameter value="-dependencyfile" /> + <parameter value="$MODULE_DIR$/target/.scala_dependencies" /> + <parameter value="-feature" /> + </parameters> + </component> +</project> \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala index 928f14329ef0747da3f1d7652c35beb560d9751d..e04af3d5b469330e9f0b72f0f265dd0d50004086 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala @@ -54,29 +54,43 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab } protected def checkExecutable { - try if (executable != null) { - val buffer = new StringBuffer() - val cmd = Seq("which", executable) - val process = Process(cmd).run(ProcessLogger(buffer.append(_))) - if (process.exitValue == 0) { - executable = buffer.toString - val file = new File(executable) - executable = file.getCanonicalPath - } else { - logger.error("executable: '" + executable + "' not found, please check config") - throw new QException("executable: '" + executable + "' not found, please check config") + if (!BiopetCommandLineFunctionTrait.executableMd5Cache.contains(executable)) { + try if (executable != null) { + if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable)) { + val oldExecutable = executable + val buffer = new StringBuffer() + val cmd = Seq("which", executable) + val process = Process(cmd).run(ProcessLogger(buffer.append(_))) + if (process.exitValue == 0) { + executable = buffer.toString + val file = new File(executable) + executable = file.getCanonicalPath + } else { + logger.error("executable: '" + executable + "' not found, please check config") + throw new QException("executable: '" + executable + "' not found, please check config") + } + BiopetCommandLineFunctionTrait.executableCache += oldExecutable -> executable + BiopetCommandLineFunctionTrait.executableCache += executable -> executable + } else { + executable = BiopetCommandLineFunctionTrait.executableCache(executable) + } + + if (!BiopetCommandLineFunctionTrait.executableMd5Cache.contains(executable)) { + val is = new FileInputStream(executable) + val cnt = is.available + val bytes = Array.ofDim[Byte](cnt) + is.read(bytes) + is.close() + val temp = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase + BiopetCommandLineFunctionTrait.executableMd5Cache += executable -> temp + } + + addJobReportBinding("md5sum_exe", BiopetCommandLineFunctionTrait.executableMd5Cache(executable)) + } catch { + case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executable skipped: " + ioe) } - - val is = new FileInputStream(executable) - val cnt = is.available - val bytes = Array.ofDim[Byte](cnt) - is.read(bytes) - is.close() - val md5: String = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase - - addJobReportBinding("md5sum_exe", md5) - } catch { - case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executable skipped: " + ioe) + } else { + addJobReportBinding("md5sum_exe", BiopetCommandLineFunctionTrait.executableMd5Cache(executable)) } } @@ -138,4 +152,6 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab object BiopetCommandLineFunctionTrait { import scala.collection.mutable.Map private val versionCache: Map[String, String] = Map() + private val executableMd5Cache: Map[String, String] = Map() + private val executableCache: Map[String, String] = Map() } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala index 235b7af2d2b2152c8f23eaabd3bfbc03327e319f..7769b1b85808a9e145cf388077ffb7ed1a7066d4 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala @@ -1,6 +1,8 @@ package nl.lumc.sasc.biopet.core +import java.io.File import java.util.Properties +import nl.lumc.sasc.biopet.core.config.Config import org.apache.log4j.Logger object BiopetExecutable extends Logging { @@ -35,7 +37,9 @@ object BiopetExecutable extends Logging { nl.lumc.sasc.biopet.tools.SageCountFastq, nl.lumc.sasc.biopet.tools.SageCreateLibrary, nl.lumc.sasc.biopet.tools.SageCreateTagCounts, - nl.lumc.sasc.biopet.tools.BastyGenerateFasta) + nl.lumc.sasc.biopet.tools.BastyGenerateFasta, + nl.lumc.sasc.biopet.tools.MergeAlleles, + nl.lumc.sasc.biopet.tools.SamplesTsvToJson) ) /** @@ -88,6 +92,14 @@ object BiopetExecutable extends Logging { println("version: " + getVersion) } case Array(module, name, passArgs @ _*) => { + // Reading config files + val argsSize = passArgs.size + for (t <- 0 until argsSize) { + if (passArgs(t) == "-config" || args(t) == "--config_file") { + if (t >= argsSize) throw new IllegalStateException("-config needs a value") + Config.global.loadConfigFile(new File(passArgs(t + 1))) + } + } getCommand(module, name).main(passArgs.toArray) } case Array(module) => { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala index 1cbddafe6b999ded0579608de75c35e136f4a053..127f2991608612f105fcfe81e3cea2048262f1b4 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala @@ -5,6 +5,10 @@ import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetCommandLineFunctionTrait { executable = "java" + javaGCThreads = config("java_gc_threads") + javaGCHeapFreeLimit = config("java_gc_heap_freelimit") + javaGCTimeLimit = config("java_gc_timelimit") + override def afterGraph { memoryLimit = config("memory_limit") } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 015c48dd1525dde3e91d49099a7e48f89c709e69..cf4ee286b0a7c40aa128f3d1fc807e8badce9515 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.core import java.io.File import java.io.PrintWriter -import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.core.config.{ Config, Configurable } import org.broadinstitute.gatk.utils.commandline.Argument import org.broadinstitute.gatk.queue.QSettings import org.broadinstitute.gatk.queue.function.QFunction @@ -25,7 +25,6 @@ trait BiopetQScript extends Configurable { var functions: Seq[QFunction] final def script() { - for (file <- configfiles) globalConfig.loadConfigFile(file) if (!outputDir.endsWith("/")) outputDir += "/" init biopetScript @@ -33,7 +32,7 @@ trait BiopetQScript extends Configurable { case f: BiopetCommandLineFunctionTrait => f.afterGraph case _ => } - val configReport = globalConfig.getReport + val configReport = Config.global.getReport val configReportFile = new File(outputDir + qSettings.runName + ".configreport.txt") configReportFile.getParentFile.mkdir val writer = new PrintWriter(configReportFile) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/Logging.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/Logging.scala index 9b8197de1acdc1ca492760b931180e09176068bb..32977c6f615aa3cfda69539afee31c330d8e418a 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/Logging.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/Logging.scala @@ -7,9 +7,13 @@ import org.apache.log4j.WriterAppender import org.apache.log4j.helpers.DateLayout trait Logging { - protected val logger = Logger.getLogger(getClass.getSimpleName.split("\\$").last) + def logger = Logging.logger +} + +object Logging { + val logger = Logger.getLogger("Logging") - private[core] val logLayout = new DateLayout() { + val logLayout = new DateLayout() { val ignoresThrowable = false def format(event: org.apache.log4j.spi.LoggingEvent): String = { val calendar: Calendar = Calendar.getInstance @@ -18,10 +22,13 @@ trait Logging { val formattedDate: String = formatter.format(calendar.getTime) var logLevel = event.getLevel.toString while (logLevel.size < 6) logLevel += " " - logLevel + " [" + formattedDate + "] [" + event.getLoggerName + "] " + event.getMessage + "\n" + val className = event.getLocationInformation.getClassName.split("\\.").last.split("\\$").head + logLevel + " [" + formattedDate + "] [" + className + "] " + event.getMessage + "\n" } } - private[core] val stderrAppender = new WriterAppender(logLayout, sys.process.stderr) + + val stderrAppender = new WriterAppender(logLayout, sys.process.stderr) + logger.setLevel(org.apache.log4j.Level.INFO) - logger.addAppender(stderrAppender) -} + logger.addAppender(Logging.stderrAppender) +} \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index 97bc2e5e6b1d244ec8ee1b0fc3d96ad028421192..81f81f26aafed8ba2126217acec768232077084d 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -1,6 +1,6 @@ package nl.lumc.sasc.biopet.core -import nl.lumc.sasc.biopet.core.config.Configurable +import nl.lumc.sasc.biopet.core.config.{ Config, Configurable } trait MultiSampleQScript extends BiopetQScript { type LibraryOutput <: AbstractLibraryOutput @@ -20,7 +20,7 @@ trait MultiSampleQScript extends BiopetQScript { final def runSamplesJobs() { samplesConfig = config("samples") if (samplesConfig == null) samplesConfig = Map() - if (globalConfig.contains("samples")) for ((key, value) <- samplesConfig) { + if (Config.global.contains("samples")) for ((key, value) <- samplesConfig) { var sample = Configurable.any2map(value) if (!sample.contains("ID")) sample += ("ID" -> key) if (sample("ID") == key) { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala index 6a245cdfd121fc45b2be6d153397d7f88bb06072..b1ffde56f2d371788f6cfb1d309a63c94b5d1b3d 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/PipelineCommand.scala @@ -1,6 +1,7 @@ package nl.lumc.sasc.biopet.core import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging } +import nl.lumc.sasc.biopet.core.workaround.BiopetQCommandLine trait PipelineCommand extends MainCommand with GatkLogging { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala index 0a5dc8428cec541fd6cf1b967893e7266bcb1575..6cbef5b674f11d7bdffd7e5e9f766214932a8ba2 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala @@ -9,10 +9,10 @@ trait ToolCommand extends MainCommand with Logging { abstract class AbstractOptParser extends scopt.OptionParser[Args](commandName) { opt[Unit]("log_nostderr") foreach { _ => - logger.removeAppender(stderrAppender) + logger.removeAppender(Logging.stderrAppender) } text ("No output to stderr") opt[File]("log_file") foreach { x => - logger.addAppender(new WriterAppender(logLayout, new java.io.PrintStream(x))) + logger.addAppender(new WriterAppender(Logging.logLayout, new java.io.PrintStream(x))) } text ("Log file") valueName ("<file>") opt[String]('l', "log_level") foreach { x => x.toLowerCase match { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala index c35a1fdd71501b3ecddd721185ee99a271de9434..f6f8e0cf7ada60b11d50e8671b804350b54c5ad5 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala @@ -1,7 +1,7 @@ package nl.lumc.sasc.biopet.core.config import java.io.File -import org.broadinstitute.gatk.queue.util.Logging +import nl.lumc.sasc.biopet.core.Logging import argonaut._, Argonaut._ import scalaz._, Scalaz._ @@ -104,7 +104,9 @@ class Config(var map: Map[String, Any]) extends Logging { override def toString(): String = map.toString } -object Config { +object Config extends Logging { + val global = new Config + def valueToMap(input: Any): Map[String, Any] = { input match { case m: Map[_, _] => return m.asInstanceOf[Map[String, Any]] @@ -197,4 +199,28 @@ object Config { return None } } + + def mapToJson(map: Map[String, Any]): Json = { + map.foldLeft(jEmptyObject)((acc, kv) => (kv._1 := { + kv._2 match { + case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2))) + case _ => anyToJson(kv._2) + } + }) ->: acc) + } + + def anyToJson(any: Any): Json = { + any match { + case j: Json => j + case m: Map[_, _] => mapToJson(m.map(m => m._1.toString -> anyToJson(m._2))) + case l: List[_] => Json.array(l.map(anyToJson(_)): _*) + case n: Int => Json.jNumberOrString(n) + case n: Double => Json.jNumberOrString(n) + case n: Long => Json.jNumberOrString(n) + case n: Short => Json.jNumberOrString(n) + case n: Float => Json.jNumberOrString(n) + case n: Byte => Json.jNumberOrString(n) + case _ => jString(any.toString) + } + } } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala index 62cd27d6bc12f12283a43e065441306c6eecc178..7f6f6cb8e3e958a3ad121411dbe133603f45fdb4 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala @@ -6,16 +6,16 @@ import scala.language.implicitConversions trait Configurable extends Logging { val root: Configurable - val globalConfig: Config = if (root != null) root.globalConfig else new Config() + //val globalConfig: Config = if (root != null) root.globalConfig else new Config() def configPath: List[String] = if (root != null) root.configFullPath else List() protected lazy val configName = getClass.getSimpleName.toLowerCase protected lazy val configFullPath = configName :: configPath var defaults: scala.collection.mutable.Map[String, Any] = if (root != null) scala.collection.mutable.Map(root.defaults.toArray: _*) else scala.collection.mutable.Map() - val config = new ConfigFuntions + val config = new ConfigFunctions - protected class ConfigFuntions { + protected class ConfigFunctions { def apply(key: String, default: Any = null, submodule: String = null, required: Boolean = false, freeVar: Boolean = true): ConfigValue = { val m = if (submodule != null) submodule else configName val p = if (submodule != null) configName :: configPath else configPath @@ -29,15 +29,15 @@ trait Configurable extends Logging { throw new IllegalStateException("Value in config could not be found but it is required, key: " + key + " module: " + m + " path: " + p) } else return null } - if (d == null) return globalConfig(m, p, key, freeVar) - else return globalConfig(m, p, key, d, freeVar) + if (d == null) return Config.global(m, p, key, freeVar) + else return Config.global(m, p, key, d, freeVar) } def contains(key: String, submodule: String = null, freeVar: Boolean = true) = { val m = if (submodule != null) submodule else configName val p = if (submodule != null) configName :: configPath else configPath - globalConfig.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None) + Config.global.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None) } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala similarity index 98% rename from biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala rename to biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala index 02c17e2f8038f811423d34d6ddc0e864ba09c3f1..0df441caadb1a2906bcc215ac3e90aa61da963aa 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/workaround/BiopetQCommandLine.scala @@ -24,7 +24,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package nl.lumc.sasc.biopet.core +package nl.lumc.sasc.biopet.core.workaround import java.io.File import org.broadinstitute.gatk.utils.commandline._ @@ -32,6 +32,7 @@ import org.broadinstitute.gatk.queue.util._ import org.broadinstitute.gatk.queue.QCommandPlugin import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScriptManager +import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging } import org.broadinstitute.gatk.queue.engine.{ QStatusMessenger, QGraphSettings, QGraph } import collection.JavaConversions._ import org.broadinstitute.gatk.utils.classloader.PluginManager @@ -43,11 +44,12 @@ import java.net.URL import java.util.{ ResourceBundle, Arrays } import org.broadinstitute.gatk.utils.text.TextFormattingUtils import org.apache.commons.io.FilenameUtils +import nl.lumc.sasc.biopet.core.BiopetExecutable /** * Entry point of Queue. Compiles and runs QScripts passed in to the command line. */ -object BiopetQCommandLine extends Logging { +object BiopetQCommandLine extends GatkLogging { /** * Main. * @param argv Arguments. diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala index f9d1dd1467630060f9cb162511eddf9044fec8a3..00904552adbaa21985d96c5090e5c2f24eb629d2 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala @@ -11,7 +11,7 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") this.sample_ploidy = config("ploidy") - nct = config("threads", default = 3) + nct = config("threads", default = 1) bamOutput = config("bamOutput") memoryLimit = Option(nct.getOrElse(1) * 2) if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala index 96cc081492fd65b350d8f28b2e75e7d776779811..75105f1b200592926c1461b9722ad0a5a35b6a7c 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala @@ -10,7 +10,7 @@ class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.q if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") this.sample_ploidy = config("ploidy") - nct = config("threads", default = 3) + nct = config("threads", default = 1) memoryLimit = Option(nct.getOrElse(1) * 2) if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs") if (config.contains("output_mode")) { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index fdb2493a6b48e05f3bca9d7f79760887016a81b6..f30960a2b0932e59dc6cf3ba3881cf5291fda62d 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -26,7 +26,6 @@ class BamMetrics(val root: Configurable) extends QScript with BiopetQScript { var wholeGenome = false def init() { - for (file <- configfiles) globalConfig.loadConfigFile(file) if (outputDir == null) throw new IllegalStateException("Missing Output directory on BamMetrics module") else if (!outputDir.endsWith("/")) outputDir += "/" if (config.contains("target_bed")) { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index b0366e5f5bcf0036ed5e64c8ee5c7f3dcb87c8ef..a120a1374f82b2b748571c15e9b40750730192a9 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -18,10 +18,10 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { var input_R2: File = _ @Argument(doc = "Skip Trim fastq files", shortName = "skiptrim", required = false) - var skipTrim: Boolean = false + var skipTrim: Boolean = config("skiptrim", default = false) @Argument(doc = "Skip Clip fastq files", shortName = "skipclip", required = false) - var skipClip: Boolean = false + var skipClip: Boolean = config("skipclip", default = false) @Argument(doc = "Sample name", shortName = "sample", required = true) var sampleName: String = _ @@ -43,9 +43,6 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { val summary = new FlexiprepSummary(this) def init() { - for (file <- configfiles) globalConfig.loadConfigFile(file) - if (!skipTrim) skipTrim = config("skiptrim", default = false) - if (!skipClip) skipClip = config("skipclip", default = false) if (input_R1 == null) throw new IllegalStateException("Missing R1 on flexiprep module") if (outputDir == null) throw new IllegalStateException("Missing Output directory on flexiprep module") if (sampleName == null) throw new IllegalStateException("Missing Sample name on flexiprep module") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala index d26f27e9c07fbc2fb5c6df5117fb89c6f7c8b470..23740daf48fd2f7c4e04e17d7aafe01b24950781 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala @@ -18,18 +18,15 @@ class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with Biope @Input(doc = "Gvcf files", shortName = "I", required = false) var gvcfFiles: List[File] = Nil - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = _ + var reference: File = config("reference") @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = _ + var dbsnp: File = config("dbsnp") def init() { if (config.contains("gvcffiles")) for (file <- config("gvcffiles").getList) { gvcfFiles ::= file.toString } - if (reference == null) reference = config("reference") - if (dbsnp == null) dbsnp = config("dbsnp") if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala index 37adabdc507c686256fbd732d39e72558e079d0b..5ed4f062982c6b64516d651b50c9008953f4b7a2 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala @@ -13,10 +13,10 @@ class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript var inputGvcfs: List[File] = Nil @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = _ + var reference: File = config("reference") @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = _ + var dbsnp: File = config("dbsnp") @Argument(doc = "OutputName", required = false) var outputName: String = "genotype" @@ -28,8 +28,6 @@ class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript var samples: List[String] = Nil def init() { - if (reference == null) reference = config("reference") - if (dbsnp == null) dbsnp = config("dbsnp") if (outputFile == null) outputFile = outputDir + outputName + ".vcf.gz" if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala index 29964c6b76501aa76440a41bd2756faf6bb6d1c2..be951c2f15af843a0cc0860c51e01d425e8366e7 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala @@ -27,17 +27,17 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri var mergeGvcfs: Boolean = false @Argument(doc = "Joint variantcalling", shortName = "jointVariantCalling", required = false) - var jointVariantcalling = false + var jointVariantcalling: Boolean = config("joint_variantcalling", default = false) @Argument(doc = "Joint genotyping", shortName = "jointGenotyping", required = false) - var jointGenotyping = false + var jointGenotyping: Boolean = config("joint_genotyping", default = false) - var singleSampleCalling = true - var reference: File = _ - var dbsnp: File = _ + var singleSampleCalling = config("single_sample_calling", default = true) + var reference: File = config("reference", required = true) + var dbsnp: File = config("dbsnp") var gvcfFiles: List[File] = Nil var finalBamFiles: List[File] = Nil - var useAllelesOption: Boolean = _ + var useAllelesOption: Boolean = config("use_alleles_option", default = false) class LibraryOutput extends AbstractLibraryOutput { var mappedBamFile: File = _ @@ -49,15 +49,9 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri } def init() { - useAllelesOption = config("use_alleles_option", default = false) - reference = config("reference", required = true) - dbsnp = config("dbsnp") if (config.contains("target_bed")) { defaults ++= Map("gatk" -> Map(("intervals" -> config("target_bed").getStringList))) } - jointVariantcalling = config("joint_variantcalling", default = false) - jointGenotyping = config("joint_genotyping", default = false) - singleSampleCalling = config("single_sample_calling", default = true) if (config.contains("gvcfFiles")) for (file <- config("gvcfFiles").getList) gvcfFiles :+= file.toString diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index e4899798f13141998c9f8f81fe4988c31f4f9660..b016e17dc071f4a7704334bc5f81882263e2ceba 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import java.io.File -import nl.lumc.sasc.biopet.tools.{ MpileupToVcf, VcfFilter } +import nl.lumc.sasc.biopet.tools.{ MpileupToVcf, VcfFilter, MergeAlleles } import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper } import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates @@ -24,10 +24,10 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr var rawVcfInput: File = _ @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = _ + var reference: File = config("reference", required = true) @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = _ + var dbsnp: File = config("dbsnp") @Argument(doc = "OutputName", required = false) var outputName: String = _ @@ -35,21 +35,14 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr @Argument(doc = "Sample name", required = false) var sampleID: String = _ - var preProcesBams: Option[Boolean] = None + var preProcesBams: Option[Boolean] = config("pre_proces_bams", default = true) var variantcalling: Boolean = true - var doublePreProces: Option[Boolean] = None - var useHaplotypecaller: Option[Boolean] = None - var useUnifiedGenotyper: Option[Boolean] = None - var useAllelesOption: Option[Boolean] = None + var doublePreProces: Option[Boolean] = config("double_pre_proces", default = true) + var useHaplotypecaller: Option[Boolean] = config("use_haplotypecaller", default = true) + var useUnifiedGenotyper: Option[Boolean] = config("use_unifiedgenotyper", default = false) + var useAllelesOption: Option[Boolean] = config("use_alleles_option", default = false) def init() { - if (useAllelesOption == None) useAllelesOption = config("use_alleles_option", default = false) - if (preProcesBams == None) preProcesBams = config("pre_proces_bams", default = true) - if (doublePreProces == None) doublePreProces = config("double_pre_proces", default = true) - if (useHaplotypecaller == None) useHaplotypecaller = config("use_haplotypecaller", default = true) - if (useUnifiedGenotyper == None) useUnifiedGenotyper = config("use_unifiedgenotyper", default = false) - if (reference == null) reference = config("reference", required = true) - if (dbsnp == null) dbsnp = config("dbsnp") if (outputName == null && sampleID != null) outputName = sampleID else if (outputName == null) outputName = "noname" if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") @@ -142,26 +135,14 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile) if (useAllelesOption.get) { - val tempFile = if (mergeList.toList.size > 1) { - val allelesTemp = CombineVariants(this, mergeList.toList, outputDir + outputName + ".alleles_temp.vcf.gz") - allelesTemp.genotypemergeoption = org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.GenotypeMergeType.UNSORTED - add(allelesTemp, isIntermediate = true) - allelesTemp.out - } else mergeList.toList.head - - val alleleOnly = new CommandLineFunction { - @Input val input: File = tempFile - @Output val output: File = outputDir + "raw.allele_only.vcf.gz" - @Output val outputindex: File = outputDir + "raw.allele__temp_only.vcf.gz.tbi" - def commandLine = "zcat " + input + " | cut -f1,2,3,4,5,6,7,8 | bgzip -c > " + output + " && tabix -pvcf " + output - } - add(alleleOnly, isIntermediate = true) + val mergeAlleles = MergeAlleles(this, mergeList.toList, outputDir + "raw.allele__temp_only.vcf.gz") + add(mergeAlleles, isIntermediate = true) if (useHaplotypecaller.get) { val hcAlleles = new HaplotypeCaller(this) hcAlleles.input_file = scriptOutput.bamFiles hcAlleles.out = outputDir + outputName + ".hc.allele.vcf.gz" - hcAlleles.alleles = alleleOnly.output + hcAlleles.alleles = mergeAlleles.output hcAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES add(hcAlleles) scriptOutput.hcAlleleVcf = hcAlleles.out @@ -172,7 +153,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr val ugAlleles = new UnifiedGenotyper(this) ugAlleles.input_file = scriptOutput.bamFiles ugAlleles.out = outputDir + outputName + ".ug.allele.vcf.gz" - ugAlleles.alleles = alleleOnly.output + ugAlleles.alleles = mergeAlleles.output ugAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES add(ugAlleles) scriptOutput.ugAlleleVcf = ugAlleles.out diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala index 067012e443ee81958910e07fa0eb8c8361293bd8..2669dc2b38ce5751f14208e41ca64714f1489759 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala @@ -16,7 +16,7 @@ class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQS var vcfFiles: List[File] = _ @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = _ + var reference: File = config("reference") @Argument(doc = "Target bed", shortName = "targetBed", required = false) var targetBed: List[File] = Nil @@ -29,7 +29,6 @@ class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQS def generalSampleDir = outputDir + "samples/" def init() { - if (reference == null) reference = config("reference") if (config.contains("target_bed")) for (bed <- config("target_bed").getList) targetBed :+= bed.toString diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala index fb159e9b8b05b83e5f08fa0664a6ae9ecdd41b7a..d254b51d7baabafbbb7cc0302f8abb5c25a660b2 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/Gentrap.scala @@ -69,7 +69,7 @@ class Gentrap(val root: Configurable) extends QScript with BiopetQScript { var cExonBase: Boolean = _ def init() { - for (file <- configfiles) globalConfig.loadConfigFile(file) + } def biopetScript() { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index ad1406f566dab1c061e7cb52f7feae8e9c260df9..68194858396b0e34b35ed85cc04d31c369d39e03 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -36,13 +36,13 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { var skipMetrics: Boolean = false @Argument(doc = "Aligner", shortName = "ALN", required = false) - var aligner: String = _ + var aligner: String = config("aligner", default = "bwa") @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = _ + var reference: File = config("reference") @Argument(doc = "Chunking", shortName = "chunking", required = false) - var chunking: Boolean = false + var chunking: Boolean = config("chunking", false) @ClassType(classOf[Int]) @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false) @@ -50,62 +50,48 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { // Readgroup items @Argument(doc = "Readgroup ID", shortName = "RGID", required = false) - var RGID: String = _ + var RGID: String = config("RGID") @Argument(doc = "Readgroup Library", shortName = "RGLB", required = false) - var RGLB: String = _ + var RGLB: String = config("RGLB") @Argument(doc = "Readgroup Platform", shortName = "RGPL", required = false) - var RGPL: String = _ + var RGPL: String = config("RGPL", default = "illumina") @Argument(doc = "Readgroup platform unit", shortName = "RGPU", required = false) - var RGPU: String = _ + var RGPU: String = config("RGPU", default = "na") @Argument(doc = "Readgroup sample", shortName = "RGSM", required = false) - var RGSM: String = _ + var RGSM: String = config("RGSM") @Argument(doc = "Readgroup sequencing center", shortName = "RGCN", required = false) - var RGCN: String = _ + var RGCN: String = config("RGCN") @Argument(doc = "Readgroup description", shortName = "RGDS", required = false) - var RGDS: String = _ + var RGDS: String = config("RGDS") @Argument(doc = "Readgroup sequencing date", shortName = "RGDT", required = false) var RGDT: Date = _ @Argument(doc = "Readgroup predicted insert size", shortName = "RGPI", required = false) - var RGPI: Int = _ + var RGPI: Int = config("RGPI") var paired: Boolean = false - var defaultAligner = "bwa" val flexiprep = new Flexiprep(this) def init() { - for (file <- configfiles) globalConfig.loadConfigFile(file) - if (aligner == null) aligner = config("aligner", default = defaultAligner) - if (reference == null) reference = config("reference") if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module") else if (!outputDir.endsWith("/")) outputDir += "/" if (input_R1 == null) throw new IllegalStateException("Missing FastQ R1 on mapping module") paired = (input_R2 != null) - if (RGLB == null && config.contains("RGLB")) RGLB = config("RGLB") - else if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module") - if (RGSM == null && config.contains("RGSM")) RGSM = config("RGSM") - else if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module") - if (RGID == null && config.contains("RGID")) RGID = config("RGID") - else if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB + if (RGLB == null) throw new IllegalStateException("Missing Readgroup library on mapping module") + if (RGLB == null) throw new IllegalStateException("Missing Readgroup sample on mapping module") + if (RGID == null && RGSM != null && RGLB != null) RGID = RGSM + "-" + RGLB else if (RGID == null) throw new IllegalStateException("Missing Readgroup ID on mapping module") - if (RGPL == null) RGPL = config("RGPL", "illumina") - if (RGPU == null) RGPU = config("RGPU", "na") - if (RGCN == null && config.contains("RGCN")) RGCN = config("RGCN") - if (RGDS == null && config.contains("RGDS")) RGDS = config("RGDS") - if (outputName == null) outputName = RGID - if (!chunking && numberChunks.isDefined) chunking = true - if (!chunking) chunking = config("chunking", false) if (chunking) { if (numberChunks.isEmpty) { if (config.contains("numberchunks")) numberChunks = config("numberchunks", default = None) @@ -311,10 +297,6 @@ object Mapping extends PipelineCommand { val mapping = new Mapping(root) logger.debug("Mapping runconfig: " + runConfig) - var inputType = "" - if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString - else inputType = root.config("inputtype", "dna").getString - if (inputType == "rna") mapping.defaultAligner = "star-2pass" if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString) if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString) mapping.paired = (mapping.input_R2 != null) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala index 937b84261a20935025aeb79c4f555a7fdfb6827b..7a826e9bb6412d053ff240b1c7e5bc08e9258388 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala @@ -20,15 +20,15 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) @Input(doc = "countBed", required = false) - var countBed: File = _ + var countBed: File = config("count_bed") @Input(doc = "squishedCountBed, by suppling this file the auto squish job will be skipped", required = false) - var squishedCountBed: File = _ + var squishedCountBed: File = config("squished_count_bed") @Input(doc = "Transcriptome, used for generation of tag library", required = false) - var transcriptome: File = _ + var transcriptome: File = config("transcriptome") - var tagsLibrary: File = _ + var tagsLibrary: File = config("tags_library") defaults ++= Map("bowtie" -> Map( "m" -> 1, @@ -50,10 +50,6 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { def init() { if (!outputDir.endsWith("/")) outputDir += "/" - if (countBed == null) countBed = config("count_bed") - if (squishedCountBed == null) squishedCountBed = config("squished_count_bed") - if (tagsLibrary == null) tagsLibrary = config("tags_library") - if (transcriptome == null) transcriptome = config("transcriptome") if (transcriptome == null && tagsLibrary == null) throw new IllegalStateException("No transcriptome or taglib found") if (countBed == null && squishedCountBed == null) @@ -140,7 +136,7 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { val mapping = new Mapping(this) mapping.skipFlexiprep = true mapping.skipMarkduplicates = true - mapping.defaultAligner = "bowtie" + mapping.aligner = config("aligner", default = "bowtie") mapping.input_R1 = prefixFastq.output mapping.RGLB = runConfig("ID").toString mapping.RGSM = sampleConfig("ID").toString diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala index 10aef464b610c43571398d6285d3ddd7bc9c5b3c..a428eb693211f1787c72990a7f68ef8df225cc95 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala @@ -21,7 +21,7 @@ import org.broadinstitute.gatk.queue.engine.JobRunInfo class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) - var reference: File = _ + var reference: File = config("reference", required = true) var finalBamFiles: List[File] = Nil class LibraryOutput extends AbstractLibraryOutput { @@ -34,8 +34,6 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { } override def init() { - for (file <- configfiles) globalConfig.loadConfigFile(file) - reference = config("reference", required = true) if (outputDir == null) throw new IllegalStateException("Output directory is not specified in the config / argument") else if (!outputDir.endsWith("/")) @@ -124,7 +122,7 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { if (runConfig.contains("R1")) { val mapping = new Mapping(this) - mapping.defaultAligner = "stampy" + mapping.aligner = config("aligner", default = "stampy") mapping.skipFlexiprep = false mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala new file mode 100644 index 0000000000000000000000000000000000000000..454f9a41fb27ece87ae193ed64185acd5ea02b69 --- /dev/null +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MergeAlleles.scala @@ -0,0 +1,134 @@ +package nl.lumc.sasc.biopet.tools + +import htsjdk.samtools.reference.FastaSequenceFile +import htsjdk.variant.variantcontext.Allele +import htsjdk.variant.variantcontext.VariantContext +import htsjdk.variant.variantcontext.VariantContextBuilder +import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter +import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder +import htsjdk.variant.vcf.VCFFileReader +import htsjdk.variant.vcf.VCFHeader +import java.io.File +import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction +import nl.lumc.sasc.biopet.core.ToolCommand +import scala.collection.SortedMap +import scala.collection.mutable.{ Map, Set } +import nl.lumc.sasc.biopet.core.config.Configurable +import scala.collection.JavaConversions._ +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class MergeAlleles(val root: Configurable) extends BiopetJavaCommandLineFunction { + javaMainClass = getClass.getName + + @Input(doc = "Input vcf files", shortName = "input", required = true) + var input: List[File] = Nil + + @Output(doc = "Output vcf file", shortName = "output", required = true) + var output: File = _ + + @Output(doc = "Output vcf file index", shortName = "output", required = true) + private var outputIndex: File = _ + + var reference: File = config("reference") + + override val defaultVmem = "8G" + memoryLimit = Option(4) + + override def afterGraph { + super.afterGraph + if (output.getName.endsWith(".gz")) outputIndex = new File(output.getAbsolutePath + ".tbi") + if (output.getName.endsWith(".vcf")) outputIndex = new File(output.getAbsolutePath + ".idx") + } + + override def commandLine = super.commandLine + + repeat("-I", input) + + required("-o", output) + + required("-R", reference) +} + +object MergeAlleles extends ToolCommand { + def apply(root: Configurable, input: List[File], output: File): MergeAlleles = { + val mergeAlleles = new MergeAlleles(root) + mergeAlleles.input = input + mergeAlleles.output = output + return mergeAlleles + } + + case class Args(inputFiles: List[File] = Nil, outputFile: File = null, reference: File = null) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('I', "inputVcf") minOccurs (2) required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(inputFiles = x :: c.inputFiles) + } + opt[File]('o', "outputVcf") required () unbounded () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(outputFile = x) + } + opt[File]('R', "reference") required () unbounded () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(reference = x) + } + } + + private val chunkSize = 50000 + + /** + * @param args the command line arguments + */ + def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + + val readers = commandArgs.inputFiles.map(new VCFFileReader(_, true)) + val referenceFile = new FastaSequenceFile(commandArgs.reference, true) + val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputFile).build) + val header = new VCFHeader + val referenceDict = referenceFile.getSequenceDictionary + header.setSequenceDictionary(referenceDict) + writer.writeHeader(header) + + for (chr <- referenceDict.getSequences; chunk <- (0 to (chr.getSequenceLength / chunkSize))) { + val output: Map[Int, List[VariantContext]] = Map() + + val chrName = chr.getSequenceName + val begin = chunk * chunkSize + 1 + val end = { + val e = (chunk + 1) * chunkSize + if (e > chr.getSequenceLength) chr.getSequenceLength else e + } + + for (reader <- readers; variant <- reader.query(chrName, begin, end)) { + val start = variant.getStart + if (output.contains(start)) output += variant.getStart -> (variant :: output(start)) + else output += variant.getStart -> List(variant) + } + + for ((k, v) <- SortedMap(output.toSeq: _*)) { + writer.add(mergeAlleles(v)) + } + } + writer.close + readers.foreach(_.close) + } + + def mergeAlleles(records: List[VariantContext]): VariantContext = { + val longestRef = { + var l: Array[Byte] = Array() + for (a <- records.map(_.getReference.getBases) if (a.length > l.size)) l = a + Allele.create(l, true) + } + val alleles: Set[Allele] = Set() + val builder = new VariantContextBuilder + builder.chr(records.head.getChr) + builder.start(records.head.getStart) + + for (record <- records) { + if (record.getReference == longestRef) alleles ++= record.getAlternateAlleles + else { + val suffix = longestRef.getBaseString.stripPrefix(record.getReference.getBaseString) + for (r <- record.getAlternateAlleles) alleles += Allele.create(r.getBaseString + suffix) + } + } + builder.alleles(longestRef :: alleles.toList) + builder.computeEndFromAlleles(longestRef :: alleles.toList, records.head.getStart) + builder.make + } +} diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index 9c364cf62da705beaa94cb682b59a33c7e3329ef..3d11e5452d63b6c87a472ec22ee05454a66d7496 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -74,16 +74,16 @@ object MpileupToVcf extends ToolCommand { opt[String]('s', "sample") required () action { (x, c) => c.copy(sample = x) } - opt[Int]("minDP") required () action { (x, c) => + opt[Int]("minDP") action { (x, c) => c.copy(minDP = x) } - opt[Int]("minAP") required () action { (x, c) => + opt[Int]("minAP") action { (x, c) => c.copy(minAP = x) } - opt[Double]("homoFraction") required () action { (x, c) => + opt[Double]("homoFraction") action { (x, c) => c.copy(homoFraction = x) } - opt[Int]("ploidy") required () action { (x, c) => + opt[Int]("ploidy") action { (x, c) => c.copy(ploidy = x) } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala new file mode 100644 index 0000000000000000000000000000000000000000..da69c171a3e788b747c002917936fa2053a58c62 --- /dev/null +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SamplesTsvToJson.scala @@ -0,0 +1,46 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import nl.lumc.sasc.biopet.core.ToolCommand +import scala.io.Source +import nl.lumc.sasc.biopet.core.config.Config + +object SamplesTsvToJson extends ToolCommand { + case class Args(inputFiles: List[File] = Nil) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('i', "inputFiles") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(inputFiles = x :: c.inputFiles) + } text ("Input must be a tsv file, first line is seen as header and must at least have a 'sample' column, 'library' column is optional, multiple files allowed") + } + def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + + val fileMaps = for (inputFile <- commandArgs.inputFiles) yield { + val reader = Source.fromFile(inputFile) + val lines = reader.getLines.toList + val header = lines.head.split("\t") + val sampleColumn = header.indexOf("sample") + val libraryColumn = header.indexOf("library") + if (sampleColumn == -1) throw new IllegalStateException("sample column does not exist in: " + inputFile) + + val librariesValues: List[Map[String, Any]] = for (tsvLine <- lines.tail) yield { + val values = tsvLine.split("\t") + val sample = values(sampleColumn) + val library = if (libraryColumn != -1) values(libraryColumn) else null + val valuesMap = (for (t <- 0 until values.size if t != sampleColumn if t != libraryColumn) yield (header(t) -> values(t))).toMap + val map: Map[String, Any] = if (library != null) { + Map("samples" -> Map(sample -> Map("libraries" -> Map(library -> valuesMap)))) + } else { + Map("samples" -> Map(sample -> valuesMap)) + } + map + } + librariesValues.foldLeft(Map[String, Any]())((acc, kv) => Config.mergeMaps(acc, kv)) + } + val map = fileMaps.foldLeft(Map[String, Any]())((acc, kv) => Config.mergeMaps(acc, kv)) + val json = Config.mapToJson(map) + println(json.spaces2) + } +} diff --git a/biopet-framework/src/main/scripts/nl/lumc/sasc/biopet/scripts/bedtools_cov_stats.py b/biopet-framework/src/main/scripts/nl/lumc/sasc/biopet/scripts/bedtools_cov_stats.py index 2939abea3bf28e1b9e7b2d43863b5cda9a0e1b56..dd56e5edc246c26265c6d72e87a665539f602753 100755 --- a/biopet-framework/src/main/scripts/nl/lumc/sasc/biopet/scripts/bedtools_cov_stats.py +++ b/biopet-framework/src/main/scripts/nl/lumc/sasc/biopet/scripts/bedtools_cov_stats.py @@ -208,9 +208,9 @@ class Coverage(object): for x in itertools.chain(bp['boxes'], bp['medians'], bp['whiskers'], bp['caps']): x.set(color=BLUE, linewidth=1.6) - bp['fliers'][0].set(color=RED, alpha=0.5) - bp['fliers'][1].set(color=GREEN, alpha=0.5) - + for flier in bp['fliers']: + plt.setp(flier, color='GREEN', alpha=0.5) + upper_limit = np.percentile(self.cov_counts, percentile_show) if x_data: space = (upper_limit - min(x_data)) / 40 @@ -312,4 +312,4 @@ if __name__ == '__main__': files = {} json.dump({'stats': stats, 'files': files}, sys.stdout, sort_keys=True, indent=4, - separators=(',', ': ')) + separators=(',', ': ')) \ No newline at end of file diff --git a/extras/git.pre-commit b/extras/git.pre-commit deleted file mode 100755 index 5a627198950f191e4c2c64fe44d23cf170cfcf50..0000000000000000000000000000000000000000 --- a/extras/git.pre-commit +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python - -# Adapted from: http://tech.yipit.com/2011/11/16/183772396/ -# Changes by Wibowo Arindrarto -# Changes: -# - Allow code modification by linters to be comitted -# - Updated CHECKS -# - Python 3 calls + code style updates -# -# Usage: save this file into your .git/hooks directory as `pre-commit` -# and set it to executable - -import os -import re -import subprocess -import sys - -modified = re.compile(r"^[MA]\s+(?P<name>.*)$") - -CHECKS = [ - { - "exe": "scalariform", - "output": "Formatting code with scalariform ...", - # Remove lines without filenames - "command": "scalariform -s=2.11.1 -p=scalariformStyle.properties --quiet %s", - "match_files": [".*scala$"], - "print_filename": False, - "commit_changes": True, - }, -] - - -def matches_file(file_name, match_files): - return any(re.compile(match_file).match(file_name) for match_file - in match_files) - - -def check_files(files, check): - result = 0 - print(check["output"]) - for file_name in files: - - if not "match_files" in check or \ - matches_file(file_name, check["match_files"]): - - if not "ignore_files" in check or \ - not matches_file(file_name, check["ignore_files"]): - - process = subprocess.Popen(check["command"] % file_name, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - shell=True) - - out, err = process.communicate() - if out or err: - if check["print_filename"]: - prefix = "\t%s:" % file_name - else: - prefix = "\t" - output_lines = ["%s%s" % (prefix, line) for - line in out.splitlines()] - print("\n".join(output_lines)) - if err: - print(err) - result = 1 - elif check["commit_changes"]: - p = subprocess.Popen(["git", "add", file_name], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - p.communicate() - return result - - -def main(all_files): - # Check that the required linters and code checkers are all present - for check in CHECKS: - p = subprocess.Popen(["which", check["exe"]], stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - out, err = p.communicate() - if not out: - print("Required commit hook executable '%s' not found." % check["exe"]) - sys.exit(1) - - # Stash any changes to the working tree that are not going to be committed - subprocess.call(["git", "stash", "-u", "--keep-index"], stdout=subprocess.PIPE) - - files = [] - if all_files: - for root, dirs, file_names in os.walk("."): - for file_name in file_names: - files.append(os.path.join(root, file_name)) - else: - p = subprocess.Popen(["git", "status", "--porcelain"], - stdout=subprocess.PIPE) - out, err = p.communicate() - for line in out.splitlines(): - match = modified.match(line) - if match: - files.append(match.group("name")) - - result = 0 - for check in CHECKS: - result = check_files(files, check) or result - - # Strategy: - # - Check if the linters made any changes - # - If there are no changes, pop the stash and commit - # - Otherwise: - # - Stash the change - # - Pop stash@{1} - # - Checkout stash@{0} - # - Drop stash@{0} (cannot pop directly since stash may conflict) - # - Commit - # This is because the initial stash will conflict with any possible - # changes made by the linters - p = subprocess.Popen(["git", "status", "--porcelain"], - stdout=subprocess.PIPE) - out, err = p.communicate() - if not out.strip(): - subprocess.call(["git", "stash", "pop"], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - else: - subprocess.call(["git", "stash"], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - subprocess.call(["git", "stash", "pop", "--quiet", "--index", "stash@{1}"], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - subprocess.call(["git", "checkout", "stash", "--", "."], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - subprocess.call(["git", "stash", "drop"], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - sys.exit(result) - - -if __name__ == "__main__": - - all_files = False - - if len(sys.argv) > 1 and sys.argv[1] == "--all-files": - all_files = True - - main(all_files) diff --git a/scalariformStyle.properties b/scalariformStyle.properties deleted file mode 100644 index 20a1fc52db1272de9a3aae302b4a351eb1efa293..0000000000000000000000000000000000000000 --- a/scalariformStyle.properties +++ /dev/null @@ -1,19 +0,0 @@ -#Scalariform formatter preferences -alignParameters=true -alignSingleLineCaseStatements.maxArrowIndent=40 -alignSingleLineCaseStatements=true -compactStringConcatenation=false -compactControlReadability=false -doubleIndentClassDeclaration=false -formatXml=true -indentLocalDefs=false -indentPackageBlocks=true -indentSpaces=2 -placeScaladocAsterisksBeneathSecondAsterisk=false -preserveDanglingCloseParenthesis=true -preserveSpaceBeforeArguments=false -rewriteArrowSymbols=false -spaceBeforeColon=false -spaceInsideBrackets=false -spaceInsideParentheses=false -spacesWithinPatternBinders=true