diff --git a/biopet-framework/nb-configuration.xml b/biopet-framework/nb-configuration.xml index 60b959e673d5805e869ae4f91a006ba5d81184a4..c2f0518e2a30c8af3fb854bd8e16e70f23679e5f 100644 --- a/biopet-framework/nb-configuration.xml +++ b/biopet-framework/nb-configuration.xml @@ -23,7 +23,6 @@ Any value defined here will override the pom.xml file value but is only applicab <com-junichi11-netbeans-changelf.use-project>true</com-junichi11-netbeans-changelf.use-project> <com-junichi11-netbeans-changelf.lf-kind>LF</com-junichi11-netbeans-changelf.lf-kind> <com-junichi11-netbeans-changelf.use-global>false</com-junichi11-netbeans-changelf.use-global> - <netbeans.checkstyle.format>true</netbeans.checkstyle.format> <com-junichi11-netbeans-changelf.show-dialog>true</com-junichi11-netbeans-changelf.show-dialog> <org-netbeans-modules-javascript2-requirejs.enabled>true</org-netbeans-modules-javascript2-requirejs.enabled> <netbeans.hint.jdkPlatform>JDK_1.8</netbeans.hint.jdkPlatform> diff --git a/biopet-framework/pom.xml b/biopet-framework/pom.xml index f116688f95fc697f13afafa47dd191f347679c45..69c3c3ff4f7ac147c87ade7ec0c9a0c790090a33 100644 --- a/biopet-framework/pom.xml +++ b/biopet-framework/pom.xml @@ -216,6 +216,40 @@ <showDeprecation>true</showDeprecation> </configuration> </plugin> + <plugin> + <groupId>org.scalariform</groupId> + <artifactId>scalariform-maven-plugin</artifactId> + <version>0.1.4</version> + <executions> + <execution> + <phase>process-sources</phase> + <goals> + <goal>format</goal> + </goals> + <configuration> + <rewriteArrowSymbols>false</rewriteArrowSymbols> + <alignParameters>true</alignParameters> + <alignSingleLineCaseStatements_maxArrowIndent>40</alignSingleLineCaseStatements_maxArrowIndent> + <alignSingleLineCaseStatements>true</alignSingleLineCaseStatements> + <compactStringConcatenation>false</compactStringConcatenation> + <compactControlReadability>false</compactControlReadability> + <doubleIndentClassDeclaration>false</doubleIndentClassDeclaration> + <formatXml>true</formatXml> + <indentLocalDefs>false</indentLocalDefs> + <indentPackageBlocks>true</indentPackageBlocks> + <indentSpaces>2</indentSpaces> + <placeScaladocAsterisksBeneathSecondAsterisk>false</placeScaladocAsterisksBeneathSecondAsterisk> + <preserveDanglingCloseParenthesis>true</preserveDanglingCloseParenthesis> + <preserveSpaceBeforeArguments>false</preserveSpaceBeforeArguments> + <rewriteArrowSymbols>false</rewriteArrowSymbols> + <spaceBeforeColon>false</spaceBeforeColon> + <spaceInsideBrackets>false</spaceInsideBrackets> + <spaceInsideParentheses>false</spaceInsideParentheses> + <spacesWithinPatternBinders>true</spacesWithinPatternBinders> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> </project> diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala index e9243dc391454c77e22da875151039253220974e..921f07cfac1c3af94dd100ecc3793e96e1afa9ce 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunctionTrait.scala @@ -66,13 +66,13 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab logger.error("executable: '" + executable + "' not found, please check config") throw new QException("executable: '" + executable + "' not found, please check config") } - + val is = new FileInputStream(executable) val cnt = is.available val bytes = Array.ofDim[Byte](cnt) is.read(bytes) is.close() - val md5: String = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase + val md5: String = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase addJobReportBinding("md5sum_exe", md5) } catch { @@ -107,7 +107,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) { line match { case versionRegex(m) => return m - case _ => + case _ => } } logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala index 80308d9f6393c38a5fb278c2fba39fb67fc2f6fe..ea0098b9f6a8dd4b79371a2317cf9c6f9ccf61fd 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala @@ -35,7 +35,7 @@ object BiopetExecutable { nl.lumc.sasc.biopet.tools.SageCreateLibrary, nl.lumc.sasc.biopet.tools.SageCreateTagCounts) ) - + /** * @param args the command line arguments */ @@ -83,7 +83,7 @@ object BiopetExecutable { args match { case Array("version") => { - println("version: " + getVersion) + println("version: " + getVersion) } case Array(module, name, passArgs @ _*) => { getCommand(module, name).main(passArgs.toArray) @@ -98,11 +98,11 @@ object BiopetExecutable { } } } - + def getVersion = { getClass.getPackage.getImplementationVersion + " (" + getCommitHash + ")" } - + def getCommitHash = { val prop = new Properties() prop.load(getClass.getClassLoader.getResourceAsStream("git.properties")) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala index a02c679e90ddf26c03c27fb3a143f8a98b599e27..1cbddafe6b999ded0579608de75c35e136f4a053 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetJavaCommandLineFunction.scala @@ -8,7 +8,7 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC override def afterGraph { memoryLimit = config("memory_limit") } - + override def commandLine: String = { preCmdInternal val cmd = super.commandLine diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala index be66dabeb24369c32b50743f7e655c231b64aec6..02c17e2f8038f811423d34d6ddc0e864ba09c3f1 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQCommandLine.scala @@ -274,16 +274,16 @@ class BiopetQCommandLine extends CommandLineProgram with Logging { } private def createQueueHeader(): Seq[String] = { - Seq("Biopet version: " + BiopetExecutable.getVersion,"", - "Based on GATK Queue", -// String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp), - "Copyright (c) 2012 The Broad Institute", - "For support and documentation go to http://www.broadinstitute.org/gatk") + Seq("Biopet version: " + BiopetExecutable.getVersion, "", + "Based on GATK Queue", + // String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp), + "Copyright (c) 2012 The Broad Institute", + "For support and documentation go to http://www.broadinstitute.org/gatk") } private def getQueueVersion: String = { val stingResources: ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText") - + if (stingResources.containsKey("org.broadinstitute.sting.queue.QueueVersion.version")) { stingResources.getString("org.broadinstitute.sting.queue.QueueVersion.version") } else { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 0ace5448d997012fd10548c2483a9d2fa1ff9a3a..015c48dd1525dde3e91d49099a7e48f89c709e69 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -23,15 +23,15 @@ trait BiopetQScript extends Configurable { def biopetScript var functions: Seq[QFunction] - + final def script() { for (file <- configfiles) globalConfig.loadConfigFile(file) if (!outputDir.endsWith("/")) outputDir += "/" init biopetScript for (function <- functions) function match { - case f:BiopetCommandLineFunctionTrait => f.afterGraph - case _ => + case f: BiopetCommandLineFunctionTrait => f.afterGraph + case _ => } val configReport = globalConfig.getReport val configReportFile = new File(outputDir + qSettings.runName + ".configreport.txt") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index e41764b46a59dfbc432303306637b3033deb4f47..97bc2e5e6b1d244ec8ee1b0fc3d96ad028421192 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -5,18 +5,18 @@ import nl.lumc.sasc.biopet.core.config.Configurable trait MultiSampleQScript extends BiopetQScript { type LibraryOutput <: AbstractLibraryOutput type SampleOutput <: AbstractSampleOutput - + abstract class AbstractLibraryOutput abstract class AbstractSampleOutput { var libraries: Map[String, LibraryOutput] = Map() def getAllLibraries = libraries - def getLibrary(key:String) = libraries(key) + def getLibrary(key: String) = libraries(key) } - + var samplesConfig: Map[String, Any] = Map() var samplesOutput: Map[String, SampleOutput] = Map() def globalSampleDir: String = outputDir + "samples/" - + final def runSamplesJobs() { samplesConfig = config("samples") if (samplesConfig == null) samplesConfig = Map() @@ -25,8 +25,7 @@ trait MultiSampleQScript extends BiopetQScript { if (!sample.contains("ID")) sample += ("ID" -> key) if (sample("ID") == key) { samplesOutput += key -> runSingleSampleJobs(sample) - } - else logger.warn("Key is not the same as ID on value for sample") + } else logger.warn("Key is not the same as ID on value for sample") } else logger.warn("No Samples found in config") } @@ -46,8 +45,7 @@ trait MultiSampleQScript extends BiopetQScript { if (!library.contains("ID")) library += ("ID" -> key) if (library("ID") == key) { output += key -> runSingleLibraryJobs(library, sampleConfig) - } - else logger.warn("Key is not the same as ID on value for run of sample: " + sampleID) + } else logger.warn("Key is not the same as ID on value for run of sample: " + sampleID) } } else logger.warn("No runs found in config for sample: " + sampleID) return output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala index 98e59c41372c1dfcb573faf5293300bbe6574701..067194d3175c91563c041fd3cec06e2fa2bd7678 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/ToolCommand.scala @@ -11,51 +11,55 @@ import java.io.File trait ToolCommand extends MainCommand { abstract class AbstractArgs { } - + abstract class AbstractOptParser extends scopt.OptionParser[Args](commandName) { opt[Unit]("log_nostderr") foreach { _ => - logger.removeAppender(stderrAppender) } text("No output to stderr") + logger.removeAppender(stderrAppender) + } text ("No output to stderr") opt[File]("log_file") foreach { x => - logger.addAppender(new WriterAppender(logLayout, new java.io.PrintStream(x))) } text("Log file") valueName("<file>") + logger.addAppender(new WriterAppender(logLayout, new java.io.PrintStream(x))) + } text ("Log file") valueName ("<file>") opt[String]('l', "log_level") foreach { x => - x.toLowerCase match { - case "debug" => logger.setLevel(org.apache.log4j.Level.DEBUG) - case "info" => logger.setLevel(org.apache.log4j.Level.INFO) - case "warn" => logger.setLevel(org.apache.log4j.Level.WARN) - case "error" => logger.setLevel(org.apache.log4j.Level.ERROR) - case _ => - } } text("Log level") validate { x => x match { - case "debug" | "info" | "warn" | "error" => success - case _ => failure("Log level must be <debug/info/warn/error>") - } - } + x.toLowerCase match { + case "debug" => logger.setLevel(org.apache.log4j.Level.DEBUG) + case "info" => logger.setLevel(org.apache.log4j.Level.INFO) + case "warn" => logger.setLevel(org.apache.log4j.Level.WARN) + case "error" => logger.setLevel(org.apache.log4j.Level.ERROR) + case _ => + } + } text ("Log level") validate { x => + x match { + case "debug" | "info" | "warn" | "error" => success + case _ => failure("Log level must be <debug/info/warn/error>") + } + } opt[Unit]('h', "help") foreach { _ => System.err.println(this.usage) sys.exit(1) - } text("Print usage") + } text ("Print usage") opt[Unit]('v', "version") foreach { _ => System.err.println("Version: " + BiopetExecutable.getVersion) sys.exit(1) - } text("Print version") + } text ("Print version") } - + type Args <: AbstractArgs type OptParser <: AbstractOptParser - + protected val logger = Logger.getLogger(commandName) - + private val logLayout = new DateLayout() { - val ignoresThrowable = false - def format(event:org.apache.log4j.spi.LoggingEvent): String = { - val calendar: Calendar = Calendar.getInstance - calendar.setTimeInMillis(event.getTimeStamp) - val formatter: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - val formattedDate: String = formatter.format(calendar.getTime) - var logLevel = event.getLevel.toString - while (logLevel.size < 6) logLevel += " " - logLevel + " [" + formattedDate + "] [" + event.getLoggerName + "] " + event.getMessage + "\n" - } + val ignoresThrowable = false + def format(event: org.apache.log4j.spi.LoggingEvent): String = { + val calendar: Calendar = Calendar.getInstance + calendar.setTimeInMillis(event.getTimeStamp) + val formatter: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + val formattedDate: String = formatter.format(calendar.getTime) + var logLevel = event.getLevel.toString + while (logLevel.size < 6) logLevel += " " + logLevel + " [" + formattedDate + "] [" + event.getLoggerName + "] " + event.getMessage + "\n" } + } private val stderrAppender = new WriterAppender(logLayout, sys.process.stderr) logger.setLevel(org.apache.log4j.Level.INFO) logger.addAppender(stderrAppender) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala index fc7550ea9f52ebbd0712932ccac469da512ac908..c35a1fdd71501b3ecddd721185ee99a271de9434 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Config.scala @@ -44,9 +44,9 @@ class Config(var map: Map[String, Any]) extends Logging { protected[config] var defaultCache: Map[ConfigValueIndex, ConfigValue] = Map() def contains(s: String): Boolean = map.contains(s) - def contains(requestedIndex: ConfigValueIndex, freeVar:Boolean): Boolean = contains(requestedIndex.module, requestedIndex.path, requestedIndex.key, freeVar) + def contains(requestedIndex: ConfigValueIndex, freeVar: Boolean): Boolean = contains(requestedIndex.module, requestedIndex.path, requestedIndex.key, freeVar) def contains(requestedIndex: ConfigValueIndex): Boolean = contains(requestedIndex.module, requestedIndex.path, requestedIndex.key, true) - def contains(module: String, path: List[String], key: String, freeVar:Boolean = true): Boolean = { + def contains(module: String, path: List[String], key: String, freeVar: Boolean = true): Boolean = { val requestedIndex = ConfigValueIndex(module, path, key, freeVar) if (notFoundCache.contains(requestedIndex)) return false else if (foundCache.contains(requestedIndex)) return true @@ -62,7 +62,7 @@ class Config(var map: Map[String, Any]) extends Logging { } } - protected[config] def apply(module: String, path: List[String], key: String, default: Any = null, freeVar:Boolean = true): ConfigValue = { + protected[config] def apply(module: String, path: List[String], key: String, default: Any = null, freeVar: Boolean = true): ConfigValue = { val requestedIndex = ConfigValueIndex(module, path, key) if (contains(requestedIndex, freeVar)) return foundCache(requestedIndex) else if (default != null) { @@ -133,7 +133,7 @@ object Config { } def mergeConfigs(config1: Config, config2: Config): Config = new Config(mergeMaps(config1.map, config2.map)) - + private def jsonToMap(json: Json): Map[String, Any] = { var output: Map[String, Any] = Map() if (json.isObject) { @@ -159,8 +159,8 @@ object Config { else return num.toLong } else throw new IllegalStateException("Config value type not supported, value: " + json) } - - private def getMapFromPath(map:Map[String,Any], path: List[String]): Map[String, Any] = { + + private def getMapFromPath(map: Map[String, Any], path: List[String]): Map[String, Any] = { var returnMap: Map[String, Any] = map for (m <- path) { if (!returnMap.contains(m)) return Map() @@ -168,8 +168,8 @@ object Config { } return returnMap } - - def getValueFromMap(map:Map[String,Any], index:ConfigValueIndex): Option[ConfigValue] = { + + def getValueFromMap(map: Map[String, Any], index: ConfigValueIndex): Option[ConfigValue] = { var submodules = index.path.reverse while (!submodules.isEmpty) { var submodules2 = submodules diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValueIndex.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValueIndex.scala index 4d515b347d9522d62facfa9c53dea7d43e27e5a0..348780c0dd01e48ff005d67134935d694d8bcc0d 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValueIndex.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/ConfigValueIndex.scala @@ -1,13 +1,13 @@ package nl.lumc.sasc.biopet.core.config -class ConfigValueIndex(val module: String, val path: List[String], val key: String, val freeVar:Boolean = true) { +class ConfigValueIndex(val module: String, val path: List[String], val key: String, val freeVar: Boolean = true) { override def toString = "Module = " + module + ", path = " + path + ", key = " + key + ", freeVar = " + freeVar } object ConfigValueIndex { private var cache: Map[(String, List[String], String), ConfigValueIndex] = Map() - def apply(module: String, path: List[String], key: String, freeVar:Boolean = true): ConfigValueIndex = { + def apply(module: String, path: List[String], key: String, freeVar: Boolean = true): ConfigValueIndex = { if (!cache.contains(module, path, key)) cache += ((module, path, key) -> new ConfigValueIndex(module, path, key, freeVar)) return cache(module, path, key) } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala index 9122a5946a276a794da52185163f2ace85b68b45..b9eb07879c75039fb347672c797661a145ad3b9a 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/config/Configurable.scala @@ -9,13 +9,13 @@ trait Configurable extends Logging { def configPath: List[String] = if (root != null) root.configFullPath else List() protected lazy val configName = getClass.getSimpleName.toLowerCase protected lazy val configFullPath = configName :: configPath - var defaults: scala.collection.mutable.Map[String,Any] = if (root != null) scala.collection.mutable.Map(root.defaults.toArray:_*) - else scala.collection.mutable.Map() - + var defaults: scala.collection.mutable.Map[String, Any] = if (root != null) scala.collection.mutable.Map(root.defaults.toArray: _*) + else scala.collection.mutable.Map() + val config = new ConfigFuntions - + protected class ConfigFuntions { - def apply(key: String, default: Any = null, submodule: String = null, required: Boolean = false, freeVar:Boolean = true): ConfigValue = { + def apply(key: String, default: Any = null, submodule: String = null, required: Boolean = false, freeVar: Boolean = true): ConfigValue = { val m = if (submodule != null) submodule else configName val p = if (submodule != null) configName :: configPath else configPath val d = { @@ -31,15 +31,15 @@ trait Configurable extends Logging { if (d == null) return globalConfig(m, p, key, freeVar) else return globalConfig(m, p, key, d, freeVar) } - - def contains(key: String, submodule: String = null, freeVar:Boolean = true) = { + + def contains(key: String, submodule: String = null, freeVar: Boolean = true) = { val m = if (submodule != null) submodule else configName val p = if (submodule != null) configName :: configPath else configPath globalConfig.contains(m, p, key, freeVar) || !(Config.getValueFromMap(defaults.toMap, ConfigValueIndex(m, p, key, freeVar)) == None) } } - + implicit def configValue2file(value: ConfigValue): File = if (value != null) new File(Configurable.any2string(value.value)) else null implicit def configValue2string(value: ConfigValue): String = if (value != null) Configurable.any2string(value.value) else null implicit def configValue2long(value: ConfigValue): Long = if (value != null) Configurable.any2long(value.value) else 0 @@ -104,7 +104,7 @@ object Configurable extends Logging { case _ => throw new IllegalStateException("Value '" + any + "' is not an int") } } - + def any2float(any: Any): Float = { any match { case f: Double => return f.toFloat diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cufflinks.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cufflinks.scala index dcf482ac502628e257c7f83bd98bc194a7be7d64..b0f0ba321b4934a7a3a0b3093058a9682ce60a73 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cufflinks.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Cufflinks.scala @@ -16,7 +16,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable * Written based on cufflinks version v2.2.1. */ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction { - + /** default executable */ executable = config("exe", default = "cufflinks") @@ -177,7 +177,7 @@ class Cufflinks(val root: Configurable) extends BiopetCommandLineFunction { override def versionCommand = executable def cmdLine = { - required(executable) + + required(executable) + required("--output-dir", output_dir) + optional("--num-threads", num_threads) + optional("--seed", seed) + diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala index b8ca496fc1ccb835fb04608ca7c35f26949a22f7..0f6b664a0d6f6c5aef9db2a90e7cc9d645a19fce 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Fastqc.scala @@ -11,7 +11,7 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { @Input(doc = "Contaminants", required = false) var contaminants: File = _ - + @Input(doc = "Adapters", required = false) var adapters: File = _ @@ -39,11 +39,11 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction { val fastqcDir = executable.substring(0, executable.lastIndexOf("/")) val defaultContams = getVersion match { case "v0.11.2" => new File(fastqcDir + "/Configuration/contaminant_list.txt") - case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt") + case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt") } val defaultAdapters = getVersion match { case "v0.11.2" => new File(fastqcDir + "/Configuration/adapter_list.txt") - case _ => null + case _ => null } contaminants = config("contaminants", default = defaultContams) } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala index 0d8324338f012c30740aa0660188711ebad35b05..253cab16b78908eee3e236bc323e0ed437449690 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Ln.scala @@ -83,12 +83,12 @@ class Ln(val root: Configurable) extends InProcessFunction with Configurable { } } - object Ln { - def apply(root: Configurable, input:File, output:File, relative:Boolean = true): Ln = { - val ln = new Ln(root) - ln.in = input - ln.out = output - ln.relative = relative - return ln - } - } +object Ln { + def apply(root: Configurable, input: File, output: File, relative: Boolean = true): Ln = { + val ln = new Ln(root) + ln.in = input + ln.out = output + ln.relative = relative + return ln + } +} diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala index 1197448c42db71c23e3154987abbe9c88d3a490d..9a25a11cc30f85ae9c7d2f3ddf34e52116eb6627 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Seqstat.scala @@ -27,7 +27,7 @@ class Seqstat(val root: Configurable) extends BiopetCommandLineFunction { executable = config("exe", default = "fastq-seqstat") def cmdLine = required(executable) + required(input) + " > " + required(output) - + def getSummary: Json = { val json = Parse.parseOption(Source.fromFile(output).mkString) if (json.isEmpty) return jNull @@ -42,7 +42,7 @@ object Seqstat { seqstat.output = output return seqstat } - + def apply(root: Configurable, fastqfile: File, outDir: String): Seqstat = { val seqstat = new Seqstat(root) val ext = fastqfile.getName.substring(fastqfile.getName.lastIndexOf(".")) @@ -52,7 +52,7 @@ object Seqstat { } def mergeSummaries(jsons: List[Json]): Json = { - def addJson(json:Json, total:Map[String, Long]) { + def addJson(json: Json, total: Map[String, Long]) { for (key <- json.objectFieldsOrEmpty) { if (json.field(key).get.isObject) addJson(json.field(key).get, total) else if (json.field(key).get.isNumber) { @@ -63,53 +63,52 @@ object Seqstat { } else if (key == "len_max") { if (total(key) < number) total(key) = number } else total(key) += number - } - else total += (key -> number) + } else total += (key -> number) } } } - + var basesTotal: Map[String, Long] = Map() var readsTotal: Map[String, Long] = Map() var encoding: Set[Json] = Set() for (json <- jsons) { encoding += json.fieldOrEmptyString("qual_encoding") - + val bases = json.fieldOrEmptyObject("bases") addJson(bases, basesTotal) - + val reads = json.fieldOrEmptyObject("reads") addJson(reads, readsTotal) } return ("bases" := ( - ("num_n" := basesTotal("num_n")) ->: - ("num_total" := basesTotal("num_total")) ->: - ("num_qual_gte" := ( - ("1" := basesTotal("1")) ->: - ("10" := basesTotal("10")) ->: - ("20" := basesTotal("20")) ->: - ("30" := basesTotal("30")) ->: - ("40" := basesTotal("40")) ->: - ("50" := basesTotal("50")) ->: - ("60" := basesTotal("60")) ->: - jEmptyObject - ) ) ->: jEmptyObject)) ->: - ("reads" := ( - ("num_with_n" := readsTotal("num_with_n")) ->: - ("num_total" := readsTotal("num_total")) ->: - ("len_min" := readsTotal("len_min")) ->: - ("len_max" := readsTotal("len_max")) ->: - ("num_mean_qual_gte" := ( - ("1" := readsTotal("1")) ->: - ("10" := readsTotal("10")) ->: - ("20" := readsTotal("20")) ->: - ("30" := readsTotal("30")) ->: - ("40" := readsTotal("40")) ->: - ("50" := readsTotal("50")) ->: - ("60" := readsTotal("60")) ->: - jEmptyObject - ) ) ->: jEmptyObject)) ->: - ("qual_encoding" := encoding.head) ->: - jEmptyObject + ("num_n" := basesTotal("num_n")) ->: + ("num_total" := basesTotal("num_total")) ->: + ("num_qual_gte" := ( + ("1" := basesTotal("1")) ->: + ("10" := basesTotal("10")) ->: + ("20" := basesTotal("20")) ->: + ("30" := basesTotal("30")) ->: + ("40" := basesTotal("40")) ->: + ("50" := basesTotal("50")) ->: + ("60" := basesTotal("60")) ->: + jEmptyObject + )) ->: jEmptyObject)) ->: + ("reads" := ( + ("num_with_n" := readsTotal("num_with_n")) ->: + ("num_total" := readsTotal("num_total")) ->: + ("len_min" := readsTotal("len_min")) ->: + ("len_max" := readsTotal("len_max")) ->: + ("num_mean_qual_gte" := ( + ("1" := readsTotal("1")) ->: + ("10" := readsTotal("10")) ->: + ("20" := readsTotal("20")) ->: + ("30" := readsTotal("30")) ->: + ("40" := readsTotal("40")) ->: + ("50" := readsTotal("50")) ->: + ("60" := readsTotal("60")) ->: + jEmptyObject + )) ->: jEmptyObject)) ->: + ("qual_encoding" := encoding.head) ->: + jEmptyObject } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Bowtie.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Bowtie.scala index 285e960f5b79a2c4ece37b0a259b4f7395c1caa8..dc80575b9d274ff659aa35b75ce6cdc54a3df690 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Bowtie.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Bowtie.scala @@ -22,7 +22,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction { override val versionRegex = """.*[Vv]ersion:? (.*)""".r override val versionExitcode = List(0, 1) override def versionCommand = executable + " --version" - + override val defaultVmem = "6G" override val defaultThreads = 8 @@ -36,7 +36,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction { var maxbts: Option[Int] = config("maxbts") var strata: Boolean = config("strata") var maqerr: Option[Int] = config("maqerr") - + def cmdLine = { required(executable) + optional("--threads", nCoresRequest) + diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Star.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Star.scala index 82bb495298b71f45e988be30eae3960ad4e775da..01cab5aaff9e69224addd9b79f2370cd0c152e82 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Star.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/aligners/Star.scala @@ -81,7 +81,7 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction { } object Star { - def apply(configurable: Configurable, R1: File, R2: File, outputDir: String, isIntermediate: Boolean = false, deps:List[File] = Nil): Star = { + def apply(configurable: Configurable, R1: File, R2: File, outputDir: String, isIntermediate: Boolean = false, deps: List[File] = Nil): Star = { val star = new Star(configurable) star.R1 = R1 if (R2 != null) star.R2 = R2 @@ -92,7 +92,7 @@ object Star { return star } - def _2pass(configurable: Configurable, R1: File, R2: File, outputDir: String, isIntermediate: Boolean = false, deps:List[File] = Nil): (File, List[Star]) = { + def _2pass(configurable: Configurable, R1: File, R2: File, outputDir: String, isIntermediate: Boolean = false, deps: List[File] = Nil): (File, List[Star]) = { val outDir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" val starCommand_pass1 = Star(configurable, R1, if (R2 != null) R2 else null, outDir + "aln-pass1/") starCommand_pass1.isIntermediate = isIntermediate diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala index 916ead166b617f78667dc6c40563214bfa8f24e5..aebf9d3637261abdd2f08fc69f8141e0e7ad2e5f 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsCoverage.scala @@ -16,10 +16,10 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools { @Argument(doc = "dept", required = false) var depth: Boolean = false - + @Argument(doc = "sameStrand", required = false) var sameStrand: Boolean = false - + @Argument(doc = "diffStrand", required = false) var diffStrand: Boolean = false diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala index a3b8fcf942408dff7c9166e3301f66d706df7071..525c357a9b7ef31e72f43adf5a50b98304bf858f 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala @@ -5,9 +5,9 @@ import nl.lumc.sasc.biopet.core.config.Configurable class AnalyzeCovariates(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.AnalyzeCovariates with GatkGeneral { } - + object AnalyzeCovariates { - def apply(root: Configurable, before:File, after:File, plots:File): AnalyzeCovariates = { + def apply(root: Configurable, before: File, after: File, plots: File): AnalyzeCovariates = { val ac = new AnalyzeCovariates(root) ac.before = before ac.after = after diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala index 2e1d5de232ee1c9e2412e1605ddb0dab6e36f021..b0e3a71fcdeab46f3add6e93c3943a38a3303ceb 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala @@ -6,21 +6,22 @@ import nl.lumc.sasc.biopet.core.config.Configurable class ApplyRecalibration(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.ApplyRecalibration with GatkGeneral { override def afterGraph { super.afterGraph - + if (config.contains("scattercount")) scatterCount = config("scattercount") - + nt = Option(getThreads(3)) memoryLimit = Option(nt.getOrElse(1) * 2) ts_filter_level = config("ts_filter_level") } } - + object ApplyRecalibration { - def apply(root: Configurable, input:File, output:File, recal_file:File, tranches_file:File, indel: Boolean = false): ApplyRecalibration = { + def apply(root: Configurable, input: File, output: File, recal_file: File, tranches_file: File, indel: Boolean = false): ApplyRecalibration = { val ar = if (indel) new ApplyRecalibration(root) { mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL defaults ++= Map("ts_filter_level" -> 99.0) - } else new ApplyRecalibration(root) { + } + else new ApplyRecalibration(root) { mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP defaults ++= Map("ts_filter_level" -> 99.5) } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala index 0be0c04bfc63db068c4d286e8d24bc3b3edda898..8c736f955f7fd155ad3bce8f625503078b542869 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala @@ -6,18 +6,18 @@ import nl.lumc.sasc.biopet.core.config.Configurable class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.BaseRecalibrator with GatkGeneral { memoryLimit = Option(4) override val defaultVmem = "8G" - + override def afterGraph { super.afterGraph - + if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").getString) if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").getString) } } - + object BaseRecalibrator { - def apply(root: Configurable, input:File, output:File): BaseRecalibrator = { + def apply(root: Configurable, input: File, output: File): BaseRecalibrator = { val br = new BaseRecalibrator(root) br.input_file :+= input br.out = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala index 21986b24731d76521fd22c538c5f6bd45463ae7f..5b3e4df4b2fd661d249a881255001d342078e12c 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala @@ -6,7 +6,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable class CombineGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineGVCFs with GatkGeneral { if (config.contains("scattercount")) scatterCount = config("scattercount") } - + object CombineGVCFs { def apply(root: Configurable, input: List[File], output: File): CombineGVCFs = { val cg = new CombineGVCFs(root) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala index ab97ad82d09b95d8bbc16eaeb0a1467febb39472..1ad7e42e705c2b3064604d06c3c1dc047330df27 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala @@ -6,9 +6,9 @@ import nl.lumc.sasc.biopet.core.config.Configurable class CombineVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineVariants with GatkGeneral { if (config.contains("scattercount")) scatterCount = config("scattercount") } - + object CombineVariants { - def apply(root: Configurable, input:List[File], output:File): CombineVariants = { + def apply(root: Configurable, input: List[File], output: File): CombineVariants = { val cv = new CombineVariants(root) cv.variant = input cv.out = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala index 28e4e4953f4c8d64ec4d583191885c03ef730e9a..44e5e62fed7ab5319ee24c92fe313082157621fd 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala @@ -5,9 +5,9 @@ import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction { memoryLimit = Option(3) - + override val defaultVmem = "7G" - + if (config.contains("intervals", submodule = "gatk")) intervals = config("intervals", submodule = "gatk").getFileList if (config.contains("exclude_intervals", submodule = "gatk")) excludeIntervals = config("exclude_intervals", submodule = "gatk").getFileList reference_sequence = config("reference", submodule = "gatk") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala index f44043208df1ea086f4a560fae296a8a566c8d68..d1ee959743d22ba09296db9ebbdada53adeaba9e 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala @@ -5,10 +5,10 @@ import nl.lumc.sasc.biopet.core.config.Configurable class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral { annotation ++= config("annotation", default = Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")).getStringList - + if (config.contains("dbsnp")) dbsnp = config("dbsnp") if (config.contains("scattercount", "genotypegvcfs")) scatterCount = config("scattercount") - + if (config("inputtype", default = "dna").getString == "rna") { stand_call_conf = config("stand_call_conf", default = 20) stand_emit_conf = config("stand_emit_conf", default = 0) @@ -17,9 +17,9 @@ class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queu stand_emit_conf = config("stand_emit_conf", default = 0) } } - + object GenotypeGVCFs { - def apply(root: Configurable, gvcfFiles:List[File], output:File): GenotypeGVCFs = { + def apply(root: Configurable, gvcfFiles: List[File], output: File): GenotypeGVCFs = { val gg = new GenotypeGVCFs(root) gg.variant = gvcfFiles gg.out = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala index a83ee965e86207e1a0e73d48cdeb8ba86426ce8b..f9d1dd1467630060f9cb162511eddf9044fec8a3 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala @@ -6,7 +6,7 @@ import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral { override def afterGraph { super.afterGraph - + min_mapping_quality_score = config("minMappingQualityScore", default = 20) if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") @@ -15,13 +15,13 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu bamOutput = config("bamOutput") memoryLimit = Option(nct.getOrElse(1) * 2) if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs") - if (config.contains("output_mode")){ + if (config.contains("output_mode")) { import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._ config("output_mode").getString match { case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES - case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES - case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY - case e => logger.warn("output mode '" + e + "' does not exist") + case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES + case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY + case e => logger.warn("output mode '" + e + "' does not exist") } } @@ -39,7 +39,7 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug") } } - + def useGvcf() { emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF variant_index_type = GATKVCFIndexType.LINEAR diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala index 4ead517edcaa6313ebaedea39ddc10fb7c5dca6f..f9f06daacb9c97f8e73058838b392f318d192205 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala @@ -6,9 +6,9 @@ import nl.lumc.sasc.biopet.core.config.Configurable class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral { if (config.contains("scattercount")) scatterCount = config("scattercount") } - + object IndelRealigner { - def apply(root: Configurable, input:File, targetIntervals:File, outputDir:String): IndelRealigner = { + def apply(root: Configurable, input: File, targetIntervals: File, outputDir: String): IndelRealigner = { val ir = new IndelRealigner(root) ir.input_file :+= input ir.targetIntervals = targetIntervals diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala index b9650eb7feff1889e3e9c425406ff7f656f904ed..6f039932b001ea2f6dde5c4559b1fe952bcadef1 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala @@ -5,14 +5,14 @@ import nl.lumc.sasc.biopet.core.config.Configurable class PrintReads(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.PrintReads with GatkGeneral { memoryLimit = Option(4) - + override val defaultVmem = "8G" - + if (config.contains("scattercount")) scatterCount = config("scattercount") } - + object PrintReads { - def apply(root: Configurable, input:File, output:File): PrintReads = { + def apply(root: Configurable, input: File, output: File): PrintReads = { val br = new PrintReads(root) br.input_file :+= input br.out = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala index 4339760ad6e542f373e83ac712e42b4901d96fa4..da1fe1197194a85010db483759e5ef645e575bb6 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala @@ -6,14 +6,14 @@ import nl.lumc.sasc.biopet.core.config.Configurable class RealignerTargetCreator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.RealignerTargetCreator with GatkGeneral { override val defaultVmem = "6G" memoryLimit = Some(2.5) - + if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("known")) known ++= config("known").getFileList } - + object RealignerTargetCreator { - def apply(root: Configurable, input:File, outputDir:String): RealignerTargetCreator = { + def apply(root: Configurable, input: File, outputDir: String): RealignerTargetCreator = { val re = new RealignerTargetCreator(root) re.input_file :+= input re.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.intervals") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala index e32f9807ef5d21bd5a9fbed1f7b544d4a40d05d6..cd056c4293df7cc16b0442ff217fd931b3bc30e4 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala @@ -6,9 +6,9 @@ import nl.lumc.sasc.biopet.core.config.Configurable class SelectVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.SelectVariants with GatkGeneral { if (config.contains("scattercount")) scatterCount = config("scattercount") } - + object SelectVariants { - def apply(root: Configurable, input:File, output:File): SelectVariants = { + def apply(root: Configurable, input: File, output: File): SelectVariants = { val sv = new SelectVariants(root) sv.variant = input sv.out = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala index 0a70e4465af57e7c8ecd082c806027a57cff2db4..96cc081492fd65b350d8f28b2e75e7d776779811 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala @@ -5,7 +5,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral { override def afterGraph { super.afterGraph - + genotype_likelihoods_model = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") @@ -13,13 +13,13 @@ class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.q nct = config("threads", default = 3) memoryLimit = Option(nct.getOrElse(1) * 2) if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs") - if (config.contains("output_mode")){ + if (config.contains("output_mode")) { import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._ config("output_mode").getString match { case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES - case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES - case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY - case e => logger.warn("output mode '" + e + "' does not exist") + case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES + case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY + case e => logger.warn("output mode '" + e + "' does not exist") } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala index d5f0474f1dd24bc64a5f340c528c93cd0e1acf6d..3a829421cc9122ba14b8ba639d0d7b457ea91705 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala @@ -7,9 +7,9 @@ class VariantAnnotator(val root: Configurable) extends org.broadinstitute.gatk.q if (config.contains("scattercount")) scatterCount = config("scattercount") dbsnp = config("dbsnp") } - + object VariantAnnotator { - def apply(root: Configurable, input:File, bamFiles:List[File], output:File): VariantAnnotator = { + def apply(root: Configurable, input: File, bamFiles: List[File], output: File): VariantAnnotator = { val va = new VariantAnnotator(root) va.variant = input va.input_file = bamFiles diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala index 2045caba3c7c6bad0f2b3fd0f805db7eddc3e86a..25df9f92eb962d021b215d4f8588ca356cee9cab 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala @@ -7,24 +7,24 @@ import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile class VariantRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantRecalibrator with GatkGeneral { nt = Option(getThreads(4)) memoryLimit = Option(nt.getOrElse(1) * 2) - + if (config.contains("dbsnp")) resource :+= new TaggedFile(config("dbsnp").getString, "known=true,training=false,truth=false,prior=2.0") - + an = config("annotation", default = List("QD", "DP", "FS", "ReadPosRankSum", "MQRankSum")).getStringList minNumBadVariants = config("minnumbadvariants") maxGaussians = config("maxgaussians") } - + object VariantRecalibrator { - def apply(root: Configurable, input:File, recal_file:File, tranches_file:File, indel: Boolean = false): VariantRecalibrator = { + def apply(root: Configurable, input: File, recal_file: File, tranches_file: File, indel: Boolean = false): VariantRecalibrator = { val vr = new VariantRecalibrator(root) { override lazy val configName = "variantrecalibrator" - override def configPath: List[String] = (if (indel) "indel" else "snp") :: super.configPath + override def configPath: List[String] = (if (indel) "indel" else "snp") :: super.configPath if (indel) { mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL defaults ++= Map("ts_filter_level" -> 99.0) if (config.contains("mills")) resource :+= new TaggedFile(config("mills").getString, "known=false,training=true,truth=true,prior=12.0") - } else { + } else { mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP defaults ++= Map("ts_filter_level" -> 99.5) if (config.contains("hapmap")) resource +:= new TaggedFile(config("hapmap").getString, "known=false,training=true,truth=true,prior=15.0") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala index 2cd6b073a81e63c444c8e5bad6f8d8525ded12dc..ef76520f8fec26724900aacd39b350e398a368d8 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/AddOrReplaceReadGroups.scala @@ -12,37 +12,37 @@ class AddOrReplaceReadGroups(val root: Configurable) extends Picard { @Output(doc = "The output file to bam file to", required = true) var output: File = _ - + @Output(doc = "The output file to bam file to", required = true) lazy val outputIndex: File = new File(output.getAbsolutePath.stripSuffix(".bam") + ".bai") @Argument(doc = "Sort order of output file Required. Possible values: {unsorted, queryname, coordinate} ", required = true) var sortOrder: String = _ - + @Argument(doc = "RGID", required = true) var RGID: String = _ - + @Argument(doc = "RGLB", required = true) var RGLB: String = _ - + @Argument(doc = "RGPL", required = true) var RGPL: String = _ - + @Argument(doc = "RGPU", required = true) var RGPU: String = _ - + @Argument(doc = "RGSM", required = true) var RGSM: String = _ - + @Argument(doc = "RGCN", required = false) var RGCN: String = _ - + @Argument(doc = "RGDS", required = false) var RGDS: String = _ - + @Argument(doc = "RGDT", required = false) var RGDT: String = _ - + @Argument(doc = "RGPI", required = false) var RGPI: Option[Int] = _ diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala index 8d31cfc7b4663730142d403a45929ac3094031c2..d9daa4e25badebfcf53167bf5fb76412b7254308 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MergeSamFiles.scala @@ -18,16 +18,16 @@ class MergeSamFiles(val root: Configurable) extends Picard { @Argument(doc = "ASSUME_SORTED", required = false) var assumeSorted: Boolean = config("assumesorted", default = false) - + @Argument(doc = "MERGE_SEQUENCE_DICTIONARIES", required = false) var mergeSequenceDictionaries: Boolean = config("merge_sequence_dictionaries", default = false) - + @Argument(doc = "USE_THREADING", required = false) var useThreading: Boolean = config("use_threading", default = false) - + @Argument(doc = "COMMENT", required = false) var comment: String = config("comment") - + override def commandLine = super.commandLine + repeat("INPUT=", input, spaceSeparated = false) + required("OUTPUT=", output, spaceSeparated = false) + diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala index 135ca3b5b56d38931104983452e832022afdb9e6..caf1a3e9c0b52d89f666c32d44c02215613b03ca 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala @@ -25,9 +25,9 @@ trait Picard extends BiopetJavaCommandLineFunction { @Argument(doc = "CREATE_MD5_FILE", required = false) var createMd5: Boolean = config("createmd5", default = false, submodule = "picard") -// override def versionCommand = executable + " " + javaOpts + " " + javaExecutable + " -h" -// override val versionRegex = """Version: (.*)""".r -// override val versionExitcode = List(0, 1) + // override def versionCommand = executable + " " + javaOpts + " " + javaExecutable + " -h" + // override val versionRegex = """Version: (.*)""".r + // override val versionExitcode = List(0, 1) override val defaultVmem = "8G" memoryLimit = Option(3.0) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala index 5733c8b7331742164e03af896c7045e14948226c..398393f98605cffea9df5804f90d06932080324f 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/SamToFastq.scala @@ -12,46 +12,46 @@ class SamToFastq(val root: Configurable) extends Picard { @Output(doc = "R1", required = true) var fastqR1: File = _ - + @Output(doc = "R1", required = false) var fastqR2: File = _ - + @Output(doc = "Unpaired", required = false) var fastqUnpaired: File = _ - + @Argument(doc = "Output per readgroup", required = false) var outputPerRg: Boolean = config("outputPerRg", default = false) - + @Argument(doc = "Output dir", required = false) var outputDir: String = config("outputDir") - + @Argument(doc = "re reverse", required = false) var reReverse: Boolean = config("reReverse", default = false) - + @Argument(doc = "The output file to bam file to", required = false) var interleave: Boolean = config("interleave", default = false) - + @Argument(doc = "includeNonPjReads", required = false) var includeNonPjReads: Boolean = config("includeNonPjReads", default = false) - + @Argument(doc = "clippingAtribute", required = false) var clippingAtribute: String = config("clippingAtribute") - + @Argument(doc = "clippingAction", required = false) var clippingAction: String = config("clippingAction") - + @Argument(doc = "read1Trim", required = false) var read1Trim: Option[Int] = config("read1Trim") - + @Argument(doc = "read1MaxBasesToWrite", required = false) var read1MaxBasesToWrite: Option[Int] = config("read1MaxBasesToWrite") - + @Argument(doc = "read2Trim", required = false) var read2Trim: Option[Int] = config("read2Trim") - + @Argument(doc = "read2MaxBasesToWrite", required = false) var read2MaxBasesToWrite: Option[Int] = config("read2MaxBasesToWrite") - + @Argument(doc = "includeNonPrimaryAlignments", required = false) var includeNonPrimaryAlignments: Boolean = config("includeNonPrimaryAlignments", default = false) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala index 0a250a4893f86d5d1a2b7ee31bb44a9473d32ecf..177d1a6d1c92e2f9d294d6df75b04a005a6f51a7 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaFlagstat.scala @@ -13,12 +13,12 @@ class SambambaFlagstat(val root: Configurable) extends Sambamba { @Output(doc = "output File") var output: File = _ - def cmdLine = required(executable) + - required("flagstat") + - optional("-t", nCoresRequest) + - required(input) + - " > " + - required(output) + def cmdLine = required(executable) + + required("flagstat") + + optional("-t", nCoresRequest) + + required(input) + + " > " + + required(output) } object SambambaFlagstat { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala index a0988a8f5d09c8ce5ef5aca7ed50fa4883cdf081..a90ca7a29ccf34c57b6ccf427ad1c52a40c6ed95 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaIndex.scala @@ -13,11 +13,11 @@ class SambambaIndex(val root: Configurable) extends Sambamba { @Output(doc = "Output .bai file to") var output: File = _ - def cmdLine = required(executable) + - required("index") + - optional("-t", nCoresRequest) + - required(input) + - required(output) + def cmdLine = required(executable) + + required("index") + + optional("-t", nCoresRequest) + + required(input) + + required(output) } object SambambaIndex { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala index 3ae6939c5184f3f4ee537ef89403bdf856c1c118..5db6f67aa799564522ce907f6ca1c2b980c8c393 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMarkdup.scala @@ -13,26 +13,24 @@ class SambambaMarkdup(val root: Configurable) extends Sambamba { @Output(doc = "Markdup output bam") var output: File = _ - var remove_duplicates: Boolean = config("remove_duplicates", default = false) - + // @doc: compression_level 6 is average, 0 = no compression, 9 = best - val compression_level: Option[Int] = config("compression_level", default=6) - val hash_table_size: Option[Int] = config("hash-table-size", default=262144) - val overflow_list_size: Option[Int] = config("overflow-list-size", default=200000) - val io_buffer_size: Option[Int] = config("io-buffer-size", default=128) - - - def cmdLine = required(executable) + - required("markdup") + - conditional(remove_duplicates, "--remove-duplicates") + - optional("-t", nCoresRequest) + - optional("-l", compression_level) + - optional("--hash-table-size=", hash_table_size, spaceSeparated=false ) + - optional("--overflow-list-size=", overflow_list_size, spaceSeparated=false ) + - optional("--io-buffer-size=", io_buffer_size, spaceSeparated=false ) + - required(input) + - required(output) + val compression_level: Option[Int] = config("compression_level", default = 6) + val hash_table_size: Option[Int] = config("hash-table-size", default = 262144) + val overflow_list_size: Option[Int] = config("overflow-list-size", default = 200000) + val io_buffer_size: Option[Int] = config("io-buffer-size", default = 128) + + def cmdLine = required(executable) + + required("markdup") + + conditional(remove_duplicates, "--remove-duplicates") + + optional("-t", nCoresRequest) + + optional("-l", compression_level) + + optional("--hash-table-size=", hash_table_size, spaceSeparated = false) + + optional("--overflow-list-size=", overflow_list_size, spaceSeparated = false) + + optional("--io-buffer-size=", io_buffer_size, spaceSeparated = false) + + required(input) + + required(output) } object SambambaMarkdup { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala index 638b6071faa31b692b519ad68f3bc048e79fd76b..57c227bab8fc91fc1031150e9869362889c76b57 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/sambamba/SambambaMerge.scala @@ -14,14 +14,14 @@ class SambambaMerge(val root: Configurable) extends Sambamba { var output: File = _ // @doc: compression_level 6 is average, 0 = no compression, 9 = best - val compression_level: Option[Int] = config("compression_level", default=6) - - def cmdLine = required(executable) + - required("merge") + - optional("-t", nCoresRequest) + - optional("-l", compression_level) + - required(output) + - repeat("", input) + val compression_level: Option[Int] = config("compression_level", default = 6) + + def cmdLine = required(executable) + + required("merge") + + optional("-t", nCoresRequest) + + optional("-l", compression_level) + + required(output) + + repeat("", input) } object SambambaMerge { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala index 739fd0176c2622a52f6524aa8d7c59646e9b3e41..7ce404762cfde868926d565e88081eba060aa905 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala @@ -13,21 +13,21 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools { @Input(doc = "Reference fasta") var reference: File = config("reference") - + @Input(doc = "Interval bed") var intervalBed: File = config("interval_bed") - + var disableBaq: Boolean = config("disable_baq") var minMapQuality: Option[Int] = config("min_map_quality") var minBaseQuality: Option[Int] = config("min_base_quality") - - def cmdBase = required(executable) + - required("mpileup") + - optional("-f", reference) + - optional("-l", intervalBed) + - optional("-q", minMapQuality) + - optional("-Q", minBaseQuality) + - conditional(disableBaq, "-B") + + def cmdBase = required(executable) + + required("mpileup") + + optional("-f", reference) + + optional("-l", intervalBed) + + optional("-q", minMapQuality) + + optional("-Q", minBaseQuality) + + conditional(disableBaq, "-B") def cmdPipeInput = cmdBase + "-" def cmdPipe = cmdBase + required(input) def cmdLine = cmdPipe + " > " + required(output) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala index d9f1dd2db98028a0e51d71751f722587131ed05b..e2e108de0792897b2caadfb2ac8978c2e3448e03 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsView.scala @@ -10,16 +10,16 @@ class SamtoolsView(val root: Configurable) extends Samtools { @Output(doc = "output File") var output: File = _ - + var quality: Option[Int] = config("quality") var b: Boolean = config("b") var h: Boolean = config("h") - - def cmdBase = required(executable) + - required("view") + - optional("-q", quality) + - conditional(b, "-b") + - conditional(h, "-h") + + def cmdBase = required(executable) + + required("view") + + optional("-q", quality) + + conditional(b, "-b") + + conditional(h, "-h") def cmdPipeInput = cmdBase + "-" def cmdPipe = cmdBase + required(input) def cmdLine = cmdPipe + " > " + required(output) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/Clever.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/Clever.scala index a46dd1e1abaecd2db7a5b0b368a106b88e19c619..4feeae576607c613c4d22d01036bf51f1d8468c5 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/Clever.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/Clever.scala @@ -1,6 +1,5 @@ package nl.lumc.sasc.biopet.extensions.svcallers - import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import org.broadinstitute.gatk.queue.QScript import nl.lumc.sasc.biopet.core.BiopetQScript @@ -9,63 +8,62 @@ import nl.lumc.sasc.biopet.core.PipelineCommand import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File -class Clever(val root: Configurable) extends BiopetCommandLineFunction { +class Clever(val root: Configurable) extends BiopetCommandLineFunction { executable = config("exe", default = "clever") - + private lazy val versionexecutable: File = config("version_exe", default = (new File(executable).getParent + "/ctk-version")) - - + override val defaultVmem = "4G" override val defaultThreads = 8 - + override def versionCommand = versionexecutable.getAbsolutePath override val versionRegex = """(.*)""".r override val versionExitcode = List(0, 1) - + @Input(doc = "Input file (bam)") var input: File = _ - + @Input(doc = "Reference") var reference: File = _ - + @Argument(doc = "Work directory") var workdir: String = _ - + var cwd: String = _ - + @Output(doc = "Clever VCF output") lazy val outputvcf: File = { new File(cwd + "predictions.vcf") } - + @Output(doc = "Clever raw output") lazy val outputraw: File = { new File(workdir + "predictions.raw.txt") } - -// var T: Option[Int] = config("T", default = defaultThreads) + + // var T: Option[Int] = config("T", default = defaultThreads) var f: Boolean = config("f", default = true) // delete work directory before running -// var w: String = config("w", default = workdir + "/work") + // var w: String = config("w", default = workdir + "/work") var a: Boolean = config("a", default = false) // don't recompute AS tags var k: Boolean = config("k", default = false) // keep working directory var r: Boolean = config("r", default = false) // take read groups into account - + override def beforeCmd { if (workdir == null) throw new Exception("Clever :: Workdirectory is not defined") -// if (input.getName.endsWith(".sort.bam")) sorted = true + // if (input.getName.endsWith(".sort.bam")) sorted = true } def cmdLine = required(executable) + - " --sorted " + - " --use_xa " + - optional("-T", nCoresRequest) + - conditional(f ,"-f") + - conditional(a ,"-a") + - conditional(k, "-k") + - conditional(r ,"-r") + - required(this.input) + - required(this.reference) + - required(this.workdir) + " --sorted " + + " --use_xa " + + optional("-T", nCoresRequest) + + conditional(f, "-f") + + conditional(a, "-a") + + conditional(k, "-k") + + conditional(r, "-r") + + required(this.input) + + required(this.reference) + + required(this.workdir) } object Clever { @@ -79,47 +77,45 @@ object Clever { } } - - class CleverPipeline(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - + @Input(doc = "Input file (bam)") var input: File = _ @Input(doc = "Reference") var reference: File = _ - + @Argument(doc = "Work directory") var workdir: String = _ - + @Argument(doc = "Current working directory") var cwd: String = _ - + override def init() { } def biopetScript() { // write the pipeline here logger.info("Starting Clever Pipeline") - + /// start clever and then copy the vcf into the root directory "<sample>.clever/" - val clever = Clever(this, input, reference, cwd, workdir ) - outputFiles += ("clever_vcf" -> clever.outputvcf ) - add( clever ) + val clever = Clever(this, input, reference, cwd, workdir) + outputFiles += ("clever_vcf" -> clever.outputvcf) + add(clever) } } object CleverPipeline extends PipelineCommand { override val pipeline = "/nl/lumc/sasc/biopet/extensions/svcallers/Clever/Clever.class" - + def apply(root: Configurable, input: File, runDir: String): CleverPipeline = { - val cleverpipeline = new CleverPipeline( root ) + val cleverpipeline = new CleverPipeline(root) cleverpipeline.input = input cleverpipeline.workdir = runDir cleverpipeline.init cleverpipeline.biopetScript return cleverpipeline } - + } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/Pindel.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/Pindel.scala index 20d7c5f4660c8291218447338fa3b5386ed0e828..3fb73023bc7ffeec9330e6aadf06838e2d483886 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/Pindel.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/Pindel.scala @@ -1,6 +1,5 @@ package nl.lumc.sasc.biopet.extensions.svcallers.pindel - import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction import org.broadinstitute.gatk.queue.QScript import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction @@ -13,21 +12,21 @@ import java.io.File /// Pindel is actually a mini pipeline executing binaries from the pindel package class Pindel(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - + @Input(doc = "Input file (bam)") var input: File = _ @Input(doc = "Reference Fasta file") var reference: File = _ - + @Argument(doc = "Work directory") var workdir: String = _ - -// @Output(doc = "Pindel VCF output") -// lazy val outputvcf: File = { -// new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") -// } - + + // @Output(doc = "Pindel VCF output") + // lazy val outputvcf: File = { + // new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") + // } + @Output(doc = "Pindel config") lazy val configfile: File = { new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.cfg") @@ -36,34 +35,33 @@ class Pindel(val root: Configurable) extends QScript with BiopetQScript { lazy val outputvcf: File = { new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") } - + override def init() { } def biopetScript() { // read config and set all parameters for the pipeline logger.info("Starting Pindel configuration") - + val cfg = PindelConfig(this, input, this.configfile) - outputFiles += ("pindel_cfg" -> cfg.output ) - add( cfg ) - + outputFiles += ("pindel_cfg" -> cfg.output) + add(cfg) + val output: File = this.outputvcf - val pindel = PindelCaller( this, cfg.output, output ) - add( pindel ) - outputFiles += ("pindel_tsv" -> pindel.output ) + val pindel = PindelCaller(this, cfg.output, output) + add(pindel) + outputFiles += ("pindel_tsv" -> pindel.output) -// val output_vcf: File = this.outputvcf + // val output_vcf: File = this.outputvcf // convert this tsv to vcf using the python script - - + } - -// private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.tsv" + + // private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.tsv" } object Pindel extends PipelineCommand { - def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = { + def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = { val pindel = new Pindel(root) pindel.input = input pindel.reference = reference diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/PindelCaller.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/PindelCaller.scala index 69bd555e956df2c269ea516a636024f8f145f7ca..cdf3b88038f7151b8f47ddd9a0c9046a9ff62305 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/PindelCaller.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/PindelCaller.scala @@ -21,19 +21,19 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File -class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction { +class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction { executable = config("exe", default = "pindel", freeVar = false) - + override val defaultVmem = "8G" override val defaultThreads = 8 - + override val versionRegex = """Pindel version:? (.*)""".r override val versionExitcode = List(1) override def versionCommand = executable - + @Input(doc = "The pindel configuration file") var input: File = _ - + @Input(doc = "Fasta reference") var reference: File = config("reference") @@ -45,21 +45,21 @@ class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction { // <prefix>_TR @Argument(doc = "Work directory") var workdir: String = _ - + @Output(doc = "Pindel VCF output") var output: File = _ - + var window_size: Option[Int] = config("window_size", default = 5) - + override def beforeCmd { } - def cmdLine = required(executable) + - "-i " + required(input) + - "-f " + required(reference) + - "-o " + required(output) + - optional("-w", window_size) + - optional("-T", nCoresRequest) + def cmdLine = required(executable) + + "-i " + required(input) + + "-f " + required(reference) + + "-o " + required(output) + + optional("-w", window_size) + + optional("-T", nCoresRequest) } object PindelCaller { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/PindelConfig.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/PindelConfig.scala index 850b9adaa200f028bfeede7587b697400f0fca5e..ad64a2fabf02023ffceea7531f4cafffd85ed0db 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/PindelConfig.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/svcallers/pindel/PindelConfig.scala @@ -23,7 +23,6 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } import java.io.File - class PindelConfig(val root: Configurable) extends BiopetJavaCommandLineFunction { javaMainClass = getClass.getName @Input(doc = "Bam File") @@ -31,13 +30,13 @@ class PindelConfig(val root: Configurable) extends BiopetJavaCommandLineFunction @Output(doc = "Output Config file") var output: File = _ - - @Argument(doc="Insertsize") + + @Argument(doc = "Insertsize") var insertsize: Option[Int] = _ - - override def commandLine = super.commandLine + - "-i" + required(input) + - "-s" + required(insertsize) + + + override def commandLine = super.commandLine + + "-i" + required(input) + + "-s" + required(insertsize) + "-o" + required(output) } @@ -60,28 +59,30 @@ object PindelConfig extends ToolCommand { } private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.cfg" - - case class Args (inputbam:File = null, samplelabel:Option[String] = None, insertsize:Option[Int] = None) extends AbstractArgs + + case class Args(inputbam: File = null, samplelabel: Option[String] = None, insertsize: Option[Int] = None) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('i', "inputbam") required() valueName("<bamfile/path>") action { (x, c) => - c.copy(inputbam = x) } text("Please specify the input bam file") - opt[String]('l', "samplelabel") valueName("<sample label>") action { (x, c) => - c.copy(samplelabel = Some(x)) } text("Sample label is missing") - opt[Int]('s', "insertsize") valueName("<insertsize>") action { (x, c) => - c.copy(insertsize = Some(x)) } text("Insertsize is missing") + opt[File]('i', "inputbam") required () valueName ("<bamfile/path>") action { (x, c) => + c.copy(inputbam = x) + } text ("Please specify the input bam file") + opt[String]('l', "samplelabel") valueName ("<sample label>") action { (x, c) => + c.copy(samplelabel = Some(x)) + } text ("Sample label is missing") + opt[Int]('s', "insertsize") valueName ("<insertsize>") action { (x, c) => + c.copy(insertsize = Some(x)) + } text ("Insertsize is missing") } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + val input: File = commandArgs.inputbam } } - diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala index 2d36a805816402ea8157806718641bd1d3e78c41..93539997e35e4c31bef7acc6e82c3abe7fcd79b6 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala @@ -6,51 +6,51 @@ import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline import org.broadinstitute.gatk.queue.QScript -class Basty (val root: Configurable) extends QScript with MultiSampleQScript { +class Basty(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) - + class LibraryOutput extends AbstractLibraryOutput { } - + class SampleOutput extends AbstractSampleOutput { } - + defaults ++= Map("ploidy" -> 1, "use_haplotypecaller" -> false, "use_unifiedgenotyper" -> true) - + var gatkPipeline: GatkPipeline = _ - + def init() { gatkPipeline = new GatkPipeline(this) gatkPipeline.outputDir = outputDir gatkPipeline.init } - + def biopetScript() { gatkPipeline.biopetScript addAll(gatkPipeline.functions) - + runSamplesJobs() } - + // Called for each sample def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = { val sampleOutput = new SampleOutput val sampleID: String = sampleConfig("ID").toString val sampleDir = globalSampleDir + sampleID - + sampleOutput.libraries = runLibraryJobs(sampleConfig) - + return sampleOutput } - + // Called for each run from a sample def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = { val libraryOutput = new LibraryOutput - + val runID: String = runConfig("ID").toString val sampleID: String = sampleConfig("ID").toString val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/" - + return libraryOutput } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala index 268586ab64a5ec1f0f7597407057dbbca0f36356..9c549e4b8db6e991917e9deca8659d10f3f86e8f 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -15,12 +15,12 @@ import scala.collection.mutable.Map class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) { @Input(doc = "Fastq contams file", required = false) var contams_file: File = _ - + override def beforeCmd() { super.beforeCmd getContamsFromFile } - + override def cmdLine = { if (!opt_adapter.isEmpty || !opt_anywhere.isEmpty || !opt_front.isEmpty) { analysisName = getClass.getSimpleName @@ -30,7 +30,7 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada Ln(this, fastq_input, fastq_output, relative = true).cmd } } - + def getContamsFromFile { if (contams_file != null) { if (contams_file.exists()) { @@ -48,23 +48,23 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada } else logger.warn("File : " + contams_file + " does not exist") } } - + def getSummary: Json = { val trimR = """.*Trimmed reads: *(\d*) .*""".r val tooShortR = """.*Too short reads: *(\d*) .*""".r val tooLongR = """.*Too long reads: *(\d*) .*""".r val adapterR = """Adapter '([C|T|A|G]*)'.*trimmed (\d*) times.""".r - + var stats: Map[String, Int] = Map("trimmed" -> 0, "tooshort" -> 0, "toolong" -> 0) var adapter_stats: Map[String, Int] = Map() - + if (stats_output.exists) for (line <- Source.fromFile(stats_output).getLines) { line match { - case trimR(m) => stats += ("trimmed" -> m.toInt) - case tooShortR(m) => stats += ("tooshort" -> m.toInt) - case tooLongR(m) => stats += ("toolong" -> m.toInt) - case adapterR(adapter, count) => adapter_stats += (adapter -> count.toInt) - case _ => + case trimR(m) => stats += ("trimmed" -> m.toInt) + case tooShortR(m) => stats += ("tooshort" -> m.toInt) + case tooLongR(m) => stats += ("toolong" -> m.toInt) + case adapterR(adapter, count) => adapter_stats += (adapter -> count.toInt) + case _ => } } return ("num_reads_affected" := stats("trimmed")) ->: @@ -76,25 +76,25 @@ class Cutadapt(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Cutada } object Cutadapt { - def apply(root: Configurable, input:File, output:File): Cutadapt = { + def apply(root: Configurable, input: File, output: File): Cutadapt = { val cutadapt = new Cutadapt(root) cutadapt.fastq_input = input cutadapt.fastq_output = output cutadapt.stats_output = new File(output.getAbsolutePath.substring(0, output.getAbsolutePath.lastIndexOf(".")) + ".stats") return cutadapt } - + def mergeSummaries(jsons: List[Json]): Json = { var affected = 0 var tooShort = 0 var tooLong = 0 var adapter_stats: Map[String, Int] = Map() - + for (json <- jsons) { affected += json.field("num_reads_affected").get.numberOrZero.toInt tooShort += json.field("num_reads_discarded_too_short").get.numberOrZero.toInt tooLong += json.field("num_reads_discarded_too_long").get.numberOrZero.toInt - + val adapters = json.fieldOrEmptyObject("adapters") for (key <- adapters.objectFieldsOrEmpty) { val number = adapters.field(key).get.numberOrZero.toInt @@ -102,7 +102,7 @@ object Cutadapt { else adapter_stats += (key -> number) } } - + return ("num_reads_affected" := affected) ->: ("num_reads_discarded_too_short" := tooShort) ->: ("num_reads_discarded_too_long" := tooLong) ->: diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala index 970932940cf873b3ac7c75e4a94fed4c0dcd3e97..de042f1e4fd49786f31919b092f21aa48ca34a3f 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Fastqc.scala @@ -49,8 +49,8 @@ class Fastqc(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Fastqc(r "plot_sequence_length_distribution" -> "Images/sequence_length_distribution.png", "fastqc_data" -> "fastqc_data.txt") val dir = output.getAbsolutePath.stripSuffix(".zip") + "/" - var outputMap:Map[String,Map[String,String]] = Map() - for ((k,v) <- subfixs) outputMap += (k -> Map("path" -> (dir+v))) + var outputMap: Map[String, Map[String, String]] = Map() + for ((k, v) <- subfixs) outputMap += (k -> Map("path" -> (dir + v))) val temp = ("" := outputMap) ->: jEmptyObject return temp.fieldOrEmptyObject("") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 5d65b7f01191a1911f5a3b8d750dadb746a13310..b0366e5f5bcf0036ed5e64c8ee5c7f3dcb87c8ef 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -6,7 +6,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.{ Gzip, Pbzip2, Md5sum, Zcat, Seqstat } -import nl.lumc.sasc.biopet.scripts.{ FastqSync , FastqcToContams} +import nl.lumc.sasc.biopet.scripts.{ FastqSync, FastqcToContams } class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) @@ -74,10 +74,10 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { runInitialJobs() val out = if (paired) runTrimClip(outputFiles("fastq_input_R1"), outputFiles("fastq_input_R2"), outputDir) - else runTrimClip(outputFiles("fastq_input_R1"), outputDir) - - val R1_files = for ((k,v) <- outputFiles if k.endsWith("output_R1")) yield v - val R2_files = for ((k,v) <- outputFiles if k.endsWith("output_R2")) yield v + else runTrimClip(outputFiles("fastq_input_R1"), outputDir) + + val R1_files = for ((k, v) <- outputFiles if k.endsWith("output_R1")) yield v + val R2_files = for ((k, v) <- outputFiles if k.endsWith("output_R2")) yield v runFinalize(R1_files.toList, R2_files.toList) } @@ -133,19 +133,19 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { var R1: File = new File(R1_in) var R2: File = new File(R2_in) var deps: List[File] = if (paired) List(R1, R2) else List(R1) - + val seqtkSeq_R1 = SeqtkSeq.apply(this, R1, swapExt(outDir, R1, R1_ext, ".sanger" + R1_ext), fastqc_R1) add(seqtkSeq_R1, isIntermediate = true) R1 = seqtkSeq_R1.output deps ::= R1 - + if (paired) { val seqtkSeq_R2 = SeqtkSeq.apply(this, R2, swapExt(outDir, R2, R2_ext, ".sanger" + R2_ext), fastqc_R2) add(seqtkSeq_R2, isIntermediate = true) R2 = seqtkSeq_R2.output deps ::= R2 } - + val seqstat_R1 = Seqstat(this, R1, outDir) add(seqstat_R1, isIntermediate = true) summary.addSeqstat(seqstat_R1, R2 = false, after = false, chunk) @@ -155,7 +155,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { add(seqstat_R2, isIntermediate = true) summary.addSeqstat(seqstat_R2, R2 = true, after = false, chunk) } - + if (!skipClip) { // Adapter clipping val cutadapt_R1 = Cutadapt(this, R1, swapExt(outDir, R1, R1_ext, ".clip" + R1_ext)) @@ -174,9 +174,9 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { summary.addCutadapt(cutadapt_R2, R2 = true, chunk) R2 = cutadapt_R2.fastq_output deps ::= R2 - - val fastqSync = FastqSync(this, cutadapt_R1.fastq_input, cutadapt_R1.fastq_output, cutadapt_R2.fastq_output, - swapExt(outDir, R1, R1_ext, ".sync" + R1_ext), swapExt(outDir, R2, R2_ext, ".sync" + R2_ext), swapExt(outDir, R1, R1_ext, ".sync.stats")) + + val fastqSync = FastqSync(this, cutadapt_R1.fastq_input, cutadapt_R1.fastq_output, cutadapt_R2.fastq_output, + swapExt(outDir, R1, R1_ext, ".sync" + R1_ext), swapExt(outDir, R2, R2_ext, ".sync" + R2_ext), swapExt(outDir, R1, R1_ext, ".sync.stats")) fastqSync.deps :::= deps add(fastqSync, isIntermediate = true) summary.addFastqcSync(fastqSync, chunk) @@ -225,7 +225,7 @@ class Flexiprep(val root: Configurable) extends QScript with BiopetQScript { if (fastq_R1.length != fastq_R2.length && paired) throw new IllegalStateException("R1 and R2 file number is not the same") val R1 = new File(outputDir + R1_name + ".qc" + R1_ext + ".gz") val R2 = new File(outputDir + R2_name + ".qc" + R2_ext + ".gz") - + add(Gzip(this, fastq_R1, R1)) if (paired) add(Gzip(this, fastq_R2, R2)) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala index 7a93d9022776f2d5083e9987329f2f711ec584d7..188bcb6bd96d29b8022a6ba6e54e789a6ee0ca07 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/FlexiprepSummary.scala @@ -48,8 +48,8 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co var flexiprep: Flexiprep = if (root.isInstanceOf[Flexiprep]) root.asInstanceOf[Flexiprep] else { throw new IllegalStateException("Root is no instance of Flexiprep") } - - var resources:Map[String, Json] = Map() + + var resources: Map[String, Json] = Map() def addFastqc(fastqc: Fastqc, R2: Boolean = false, after: Boolean = false): Fastqc = { if (!R2 && !after) this.fastqcR1 = fastqc @@ -135,7 +135,7 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co def seqstatSummary(): Option[Json] = { val R1_chunks = for ((key, value) <- chunks) yield value.seqstatR1.getSummary val R1: Json = Seqstat.mergeSummaries(R1_chunks.toList) - + val R2: Option[Json] = if (!flexiprep.paired) None else if (chunks.size == 1) Option(chunks.head._2.seqstatR2.getSummary) else { @@ -166,7 +166,7 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co val R2_raw = md5Summary(md5R2) val R1_proc = md5Summary(md5R1after) val R2_proc = md5Summary(md5R2after) - + if (!R1_raw.isEmpty) resources += ("fastq_R1_raw" -> R1_raw.get) if (!R2_raw.isEmpty) resources += ("fastq_R2_raw" -> R2_raw.get) if (!R1_proc.isEmpty) resources += ("fastq_R1_proc" -> R1_proc.get) @@ -178,12 +178,12 @@ class FlexiprepSummary(val root: Configurable) extends InProcessFunction with Co else return Option(md5sum.getSummary) } - def getResources(fastqc:Fastqc, md5sum:Md5sum): Option[Json] = { + def getResources(fastqc: Fastqc, md5sum: Md5sum): Option[Json] = { if (fastqc == null || md5sum == null) return None val fastqcSum = fastqcSummary(fastqc).get return Option(("fastq" := md5Summary(md5sum)) ->: fastqcSum) } - + def fastqcSummary(fastqc: Fastqc): Option[Json] = { if (fastqc == null) return None else return Option(fastqc.getSummary) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala index 70c4a5dc5c53c7fad7d95111935cf837aab54409..cb4a05e60615f5d180074578469b92b49b18f28e 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/SeqtkSeq.scala @@ -12,20 +12,20 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk. if (fastqc != null && Q == None) { val encoding = fastqc.getEncoding Q = encoding match { - case null => None + case null => None case s if (s.contains("Sanger / Illumina 1.9")) => None - case s if (s.contains("Illumina <1.3")) => Option(64) - case s if (s.contains("Illumina 1.3")) => Option(64) - case s if (s.contains("Illumina 1.5")) => Option(64) + case s if (s.contains("Illumina <1.3")) => Option(64) + case s if (s.contains("Illumina 1.3")) => Option(64) + case s if (s.contains("Illumina 1.5")) => Option(64) } if (Q != None) V = true } } - + override def afterGraph { if (fastqc != null) deps ::= fastqc.output } - + override def cmdLine = { if (Q != None) { analysisName = getClass.getSimpleName @@ -38,7 +38,7 @@ class SeqtkSeq(root: Configurable) extends nl.lumc.sasc.biopet.extensions.seqtk. } object SeqtkSeq { - def apply(root: Configurable, input:File, output:File, fastqc:Fastqc = null): SeqtkSeq = { + def apply(root: Configurable, input: File, output: File, fastqc: Fastqc = null): SeqtkSeq = { val seqtkSeq = new SeqtkSeq(root) seqtkSeq.input = input seqtkSeq.output = output diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Sickle.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Sickle.scala index 74cdaacab38008a91bf4e064356403731874e316..f255bad4078ef67fbe7bfd999a7d30e186a3ce67 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Sickle.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Sickle.scala @@ -14,25 +14,25 @@ class Sickle(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Sickle(r val singleKept = """FastQ single records kept: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r val pairDiscarded = """FastQ paired records discarded: (\d*) \((\d*) pairs\)""".r val singleDiscarded = """FastQ single records discarded: (\d*) \(from PE1: (\d*), from PE2: (\d*)\)""".r - - var stats:Map[String, Int] = Map() - + + var stats: Map[String, Int] = Map() + if (output_stats.exists) for (line <- Source.fromFile(output_stats).getLines) { line match { case pairKept(reads, pairs) => stats += ("num_paired_reads_kept" -> reads.toInt) case singleKept(total, r1, r2) => { - stats += ("num_reads_kept_R1" -> r1.toInt) - stats += ("num_reads_kept_R2" -> r2.toInt) + stats += ("num_reads_kept_R1" -> r1.toInt) + stats += ("num_reads_kept_R2" -> r2.toInt) } case pairDiscarded(reads, pairs) => stats += ("num_paired_reads_discarded" -> reads.toInt) case singleDiscarded(total, r1, r2) => { - stats += ("num_reads_discarded_R1" -> r1.toInt) - stats += ("num_reads_discarded_R2" -> r2.toInt) + stats += ("num_reads_discarded_R1" -> r1.toInt) + stats += ("num_reads_discarded_R2" -> r2.toInt) } - case _ => + case _ => } } - + val temp = ("" := stats.toMap) ->: jEmptyObject return temp.fieldOrEmptyObject("") } @@ -41,7 +41,7 @@ class Sickle(root: Configurable) extends nl.lumc.sasc.biopet.extensions.Sickle(r object Sickle { def mergeSummaries(jsons: List[Json]): Json = { var total: Map[String, Int] = Map() - + for (json <- jsons) { for (key <- json.objectFieldsOrEmpty) { if (json.field(key).get.isNumber) { @@ -51,7 +51,7 @@ object Sickle { } } } - + val temp = ("" := total.toMap) ->: jEmptyObject return temp.fieldOrEmptyObject("") } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala index 9cd93a32dc2f0663bcedf50ebb40e53945458765..623dc8c9a783bf33360d9a4654c16f056e9f0ab7 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala @@ -16,7 +16,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Argument } class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) - + @Argument(doc = "Only Sample", shortName = "sample", required = false) val onlySample: String = "" @@ -28,10 +28,10 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri @Argument(doc = "Joint variantcalling", shortName = "jointVariantCalling", required = false) var jointVariantcalling = false - + @Argument(doc = "Joint genotyping", shortName = "jointGenotyping", required = false) var jointGenotyping = false - + var singleSampleCalling = true var reference: File = _ var dbsnp: File = _ @@ -43,11 +43,11 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri var mappedBamFile: File = _ var variantcalling: GatkVariantcalling.ScriptOutput = _ } - + class SampleOutput extends AbstractSampleOutput { var variantcalling: GatkVariantcalling.ScriptOutput = _ } - + def init() { useAllelesOption = config("use_alleles_option", default = false) reference = config("reference", required = true) @@ -68,7 +68,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri def biopetScript() { if (onlySample.isEmpty) { runSamplesJobs - + //SampleWide jobs if (mergeGvcfs && gvcfFiles.size > 0) { val newFile = outputDir + "merged.gvcf.vcf.gz" @@ -87,18 +87,20 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri var vcfFile = gatkGenotyping.outputFile } } else logger.warn("No gVCFs to genotype") - + if (jointVariantcalling) { - val allBamfiles = for ((sampleID,sampleOutput) <- samplesOutput; - file <- sampleOutput.variantcalling.bamFiles) yield file - val allRawVcfFiles = for ((sampleID,sampleOutput) <- samplesOutput) yield sampleOutput.variantcalling.rawFilterVcfFile - + val allBamfiles = for ( + (sampleID, sampleOutput) <- samplesOutput; + file <- sampleOutput.variantcalling.bamFiles + ) yield file + val allRawVcfFiles = for ((sampleID, sampleOutput) <- samplesOutput) yield sampleOutput.variantcalling.rawFilterVcfFile + val cvRaw = CombineVariants(this, allRawVcfFiles.toList, outputDir + "variantcalling/multisample.raw.vcf.gz") add(cvRaw) - + val gatkVariantcalling = new GatkVariantcalling(this) { override protected lazy val configName = "gatkvariantcalling" - override def configPath: List[String] = "multisample" :: super.configPath + override def configPath: List[String] = "multisample" :: super.configPath } gatkVariantcalling.preProcesBams = Some(false) gatkVariantcalling.doublePreProces = Some(false) @@ -109,7 +111,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri gatkVariantcalling.init gatkVariantcalling.biopetScript addAll(gatkVariantcalling.functions) - + if (config("inputtype", default = "dna").getString != "rna" && config("recalibration", default = false).getBoolean) { val recalibration = new GatkVariantRecalibration(this) recalibration.inputVcf = gatkVariantcalling.scriptOutput.finalVcfFile @@ -169,10 +171,10 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri } else if (runConfig.contains("bam")) { var bamFile = new File(runConfig("bam").toString) if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile) - + if (config("bam_to_fastq", default = false).getBoolean) { - val samToFastq = SamToFastq(this, bamFile, runDir + sampleID + "-" + runID + ".R1.fastq", - runDir + sampleID + "-" + runID + ".R2.fastq") + val samToFastq = SamToFastq(this, bamFile, runDir + sampleID + "-" + runID + ".R1.fastq", + runDir + sampleID + "-" + runID + ".R2.fastq") add(samToFastq, isIntermediate = true) val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir, startJobs = false) mapping.input_R1 = samToFastq.fastqR1 @@ -206,15 +208,15 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri if (runConfig.contains("CN")) aorrg.RGCN = runConfig("CN").toString add(aorrg, isIntermediate = true) bamFile = aorrg.output - } else throw new IllegalStateException("Readgroup sample and/or library of input bamfile is not correct, file: " + bamFile + - "\nPossible to set 'correct_readgroups' to true on config to automatic fix this") + } else throw new IllegalStateException("Readgroup sample and/or library of input bamfile is not correct, file: " + bamFile + + "\nPossible to set 'correct_readgroups' to true on config to automatic fix this") } addAll(BamMetrics(this, bamFile, runDir + "metrics/").functions) libraryOutput.mappedBamFile = bamFile } } else logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig) - + val gatkVariantcalling = new GatkVariantcalling(this) gatkVariantcalling.inputBams = List(libraryOutput.mappedBamFile) gatkVariantcalling.outputDir = runDir @@ -225,7 +227,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri gatkVariantcalling.biopetScript addAll(gatkVariantcalling.functions) libraryOutput.variantcalling = gatkVariantcalling.scriptOutput - + return libraryOutput } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala index 8e6a07887c89e262c749bb7cc53faa7bb47e7b16..d5dcdb9f2289b19e1835c828be988ed57d6cfe04 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala @@ -10,36 +10,36 @@ import org.broadinstitute.gatk.queue.QScript class GatkVariantRecalibration(val root: Configurable) extends QScript with BiopetQScript { def this() = this(null) - + @Input(doc = "input vcf file", shortName = "I") var inputVcf: File = _ - + @Input(doc = "input vcf file", shortName = "BAM", required = false) var bamFiles: List[File] = Nil - + @Output(doc = "output vcf file", shortName = "out") var outputVcf: File = _ - + def init() { if (inputVcf == null) throw new IllegalStateException("Missing Output directory on gatk module") if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" } - + def biopetScript() { var vcfFile: File = if (!bamFiles.isEmpty) addVariantAnnotator(inputVcf, bamFiles, outputDir) else inputVcf vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir) vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir) } - + def addSnpVariantRecalibrator(inputVcf: File, dir: String): File = { - val snpRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), - swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = false) + val snpRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), + swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = false) if (!snpRecal.resource.isEmpty) { add(snpRecal) - val snpApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), - snpRecal.recal_file, snpRecal.tranches_file, indel = false) + val snpApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), + snpRecal.recal_file, snpRecal.tranches_file, indel = false) add(snpApply) return snpApply.out @@ -50,13 +50,13 @@ class GatkVariantRecalibration(val root: Configurable) extends QScript with Biop } def addIndelVariantRecalibrator(inputVcf: File, dir: String): File = { - val indelRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), - swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = true) + val indelRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), + swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = true) if (!indelRecal.resource.isEmpty) { add(indelRecal) - - val indelApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), - indelRecal.recal_file, indelRecal.tranches_file, indel = true) + + val indelApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), + indelRecal.recal_file, indelRecal.tranches_file, indel = true) add(indelApply) return indelApply.out @@ -67,7 +67,7 @@ class GatkVariantRecalibration(val root: Configurable) extends QScript with Biop } def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: String): File = { - val variantAnnotator = VariantAnnotator(this, inputvcf, bamfiles, swapExt(dir, inputvcf, ".vcf", ".anotated.vcf")) + val variantAnnotator = VariantAnnotator(this, inputvcf, bamfiles, swapExt(dir, inputvcf, ".vcf", ".anotated.vcf")) add(variantAnnotator) return variantAnnotator.out } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index bf63aaaf4341ded9f33efe100ad0da1c04dfc0fd..c39a2fc85531cac296f4f77cb39ecb0495d57f55 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -1,10 +1,10 @@ package nl.lumc.sasc.biopet.pipelines.gatk -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand} +import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import java.io.File import nl.lumc.sasc.biopet.tools.{ MpileupToVcf, VcfFilter } import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.{AnalyzeCovariates,BaseRecalibrator,GenotypeGVCFs,HaplotypeCaller,IndelRealigner,PrintReads,RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper} +import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper } import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile @@ -15,10 +15,10 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr def this() = this(null) val scriptOutput = new GatkVariantcalling.ScriptOutput - + @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM") var inputBams: List[File] = Nil - + @Input(doc = "Raw vcf file", shortName = "raw") var rawVcfInput: File = _ @@ -33,7 +33,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr @Argument(doc = "Sample name", required = false) var sampleID: String = _ - + var preProcesBams: Option[Boolean] = None var variantcalling: Boolean = true var doublePreProces: Option[Boolean] = None @@ -55,20 +55,20 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr else if (!outputDir.endsWith("/")) outputDir += "/" } - private def doublePreProces(files:List[File]): List[File] = { - if (files.size == 1) return files - if (files.isEmpty) throw new IllegalStateException("Files can't be empty") - if (!doublePreProces.get) return files - val markDub = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam")) - if (dbsnp != null) { - add(markDub, isIntermediate = true) - List(addIndelRealign(markDub.output, outputDir, isIntermediate = false)) - } else { - add(markDub, isIntermediate = true) - List(markDub.output) - } + private def doublePreProces(files: List[File]): List[File] = { + if (files.size == 1) return files + if (files.isEmpty) throw new IllegalStateException("Files can't be empty") + if (!doublePreProces.get) return files + val markDub = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam")) + if (dbsnp != null) { + add(markDub, isIntermediate = true) + List(addIndelRealign(markDub.output, outputDir, isIntermediate = false)) + } else { + add(markDub, isIntermediate = true) + List(markDub.output) } - + } + def biopetScript() { scriptOutput.bamFiles = if (preProcesBams.get) { var bamFiles: List[File] = Nil @@ -80,10 +80,10 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } else if (inputBams.size > 1 && doublePreProces.get) { doublePreProces(inputBams) } else inputBams - + if (variantcalling) { var mergBuffer: SortedMap[String, File] = SortedMap() - + if (sampleID != null && (useHaplotypecaller.get || config("joint_genotyping", default = false).getBoolean)) { val hcGvcf = new HaplotypeCaller(this) hcGvcf.useGvcf @@ -92,7 +92,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr add(hcGvcf) scriptOutput.gvcfFile = hcGvcf.out } - + if (useHaplotypecaller.get) { if (sampleID != null) { val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), outputDir + outputName + ".hc.discovery.vcf.gz") @@ -107,7 +107,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } mergBuffer += ("1.HC-Discovery" -> scriptOutput.hcVcfFile) } - + if (useUnifiedGenotyper.get) { val ugVcf = new UnifiedGenotyper(this) ugVcf.input_file = scriptOutput.bamFiles @@ -116,7 +116,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr scriptOutput.ugVcfFile = ugVcf.out mergBuffer += ("2.UG-Discovery" -> scriptOutput.ugVcfFile) } - + // Generate raw vcf if (sampleID != null && scriptOutput.bamFiles.size == 1) { val m2v = new MpileupToVcf(this) @@ -127,10 +127,10 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr scriptOutput.rawVcfFile = m2v.output val vcfFilter = new VcfFilter(this) - vcfFilter.defaults ++= Map("min_sample_depth" -> 8, - "min_alternate_depth" -> 2, - "min_samples_pass" -> 1, - "filter_ref_calls" -> true) + vcfFilter.defaults ++= Map("min_sample_depth" -> 8, + "min_alternate_depth" -> 2, + "min_samples_pass" -> 1, + "filter_ref_calls" -> true) vcfFilter.inputVcf = m2v.output vcfFilter.outputVcf = this.swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz") add(vcfFilter) @@ -138,7 +138,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput if (scriptOutput.rawFilterVcfFile == null) throw new IllegalStateException("Files can't be empty") mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile) - + if (useAllelesOption.get) { val alleleOnly = new CommandLineFunction { @Input val input: File = scriptOutput.rawFilterVcfFile @@ -147,7 +147,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr def commandLine = "zcat " + input + " | cut -f1,2,3,4,5,6,7,8 | bgzip -c > " + output + " && tabix -pvcf " + output } add(alleleOnly, isIntermediate = true) - + if (useHaplotypecaller.get) { val hcAlleles = new HaplotypeCaller(this) hcAlleles.input_file = scriptOutput.bamFiles @@ -158,7 +158,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr scriptOutput.hcAlleleVcf = hcAlleles.out mergBuffer += ("3.HC-alleles" -> hcAlleles.out) } - + if (useUnifiedGenotyper.get) { val ugAlleles = new UnifiedGenotyper(this) ugAlleles.input_file = scriptOutput.bamFiles @@ -171,7 +171,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } } - def removeNoneVariants(input:File): File = { + def removeNoneVariants(input: File): File = { val output = input.getAbsolutePath.stripSuffix(".vcf.gz") + ".variants_only.vcf.gz" val sv = SelectVariants(this, input, output) sv.excludeFiltered = true @@ -180,7 +180,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr sv.out } - def mergeList = mergBuffer map {case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key)} + def mergeList = mergBuffer map { case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key) } val cvFinal = CombineVariants(this, mergeList.toList, outputDir + outputName + ".final.vcf.gz") add(cvFinal) scriptOutput.finalVcfFile = cvFinal.out @@ -199,7 +199,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr def addBaseRecalibrator(inputBam: File, dir: String, isIntermediate: Boolean = false): File = { val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) //with gatkArguments { - + if (baseRecalibrator.knownSites.isEmpty) { logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) return inputBam diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala index 137e6bd4ad531e19d0f0d5348dc2d927bfe2ebe0..2ed691bb0d52b94a07b8ccef7c84909cbf3ff372 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala @@ -6,7 +6,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants import nl.lumc.sasc.biopet.extensions.gatk.SelectVariants import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.gatk.{ CommandLineGATK, VariantEval} +import org.broadinstitute.gatk.queue.extensions.gatk.{ CommandLineGATK, VariantEval } import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQScript { @@ -20,24 +20,24 @@ class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQS @Argument(doc = "Target bed", shortName = "targetBed", required = false) var targetBed: List[File] = Nil - + @Argument(doc = "Samples", shortName = "sample", required = false) var samples: List[String] = Nil - + var vcfFile: File = _ - var sampleVcfs:Map[String, File] = Map() + var sampleVcfs: Map[String, File] = Map() def generalSampleDir = outputDir + "samples/" - + trait gatkArguments extends CommandLineGATK { this.reference_sequence = reference this.memoryLimit = 2 this.jobResourceRequests :+= "h_vmem=4G" } - + def init() { if (reference == null) reference = config("reference") - if (config.contains("target_bed")) - for (bed <- config("target_bed").getList) + if (config.contains("target_bed")) + for (bed <- config("target_bed").getList) targetBed :+= bed.toString if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") else if (!outputDir.endsWith("/")) outputDir += "/" @@ -49,7 +49,7 @@ class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQS add(combineVariants) combineVariants.out } else vcfFiles.head - + for (sample <- samples) { sampleVcfs += (sample -> new File(generalSampleDir + sample + File.separator + sample + ".vcf")) val selectVariants = SelectVariants(this, vcfFile, sampleVcfs(sample)) @@ -57,7 +57,7 @@ class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQS selectVariants.excludeNonVariants = true add(selectVariants) } - + val sampleCompareMetrics = new SampleCompareMetrics(this) sampleCompareMetrics.samples = samples sampleCompareMetrics.sampleDir = generalSampleDir @@ -66,10 +66,10 @@ class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQS sampleCompareMetrics.indelRelFile = outputDir + "compare.indel.rel.tsv" sampleCompareMetrics.indelAbsFile = outputDir + "compare.indel.abs.tsv" sampleCompareMetrics.totalFile = outputDir + "total.tsv" - - for ((sample,sampleVcf) <- sampleVcfs) { + + for ((sample, sampleVcf) <- sampleVcfs) { val sampleDir = generalSampleDir + sample + File.separator - for ((compareSample,compareSampleVcf) <- sampleVcfs) { + for ((compareSample, compareSampleVcf) <- sampleVcfs) { val variantEval = new VariantEval with gatkArguments variantEval.eval = Seq(sampleVcf) variantEval.comp = Seq(compareSampleVcf) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala index 8c111d15dd962a11205e4daa83e2202ef4f698a0..b66ac47c353a84a3093d61a343d294cc2af93c58 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala @@ -7,7 +7,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.R.RScriptExecutor import org.broadinstitute.gatk.utils.commandline.{ Output, Argument } import scala.io.Source -import org.broadinstitute.gatk.utils.R.{RScriptLibrary, RScriptExecutor} +import org.broadinstitute.gatk.utils.R.{ RScriptLibrary, RScriptExecutor } import org.broadinstitute.gatk.utils.io.Resource import scala.collection.mutable.Map import scala.math._ @@ -17,39 +17,39 @@ class SampleCompareMetrics(val root: Configurable) extends BiopetJavaCommandLine @Argument(doc = "Sample Dir", shortName = "sampleDir", required = true) var sampleDir: String = _ - + @Argument(doc = "Samples", shortName = "sample", required = true) var samples: List[String] = Nil - + @Argument(doc = "File sufix", shortName = "sufix", required = false) var fileSufix: String = _ - + @Output(doc = "snpRelFile", shortName = "snpRelFile", required = true) var snpRelFile: File = _ - + @Output(doc = "snpAbsFile", shortName = "snpAbsFile", required = true) var snpAbsFile: File = _ - + @Output(doc = "indelRelFile", shortName = "indelRelFile", required = true) var indelRelFile: File = _ - + @Output(doc = "indelAbsFile", shortName = "indelAbsFile", required = true) var indelAbsFile: File = _ - + @Output(doc = "totalFile", shortName = "totalFile", required = true) var totalFile: File = _ - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - - override def commandLine = super.commandLine + - required("-sampleDir", sampleDir) + - repeat("-sample", samples) + - optional("-fileSufix", fileSufix) + - required("-snpRelFile", snpRelFile) + - required("-snpAbsFile", snpAbsFile) + - required("-indelRelFile", indelRelFile) + - required("-indelAbsFile", indelAbsFile) + + + override def commandLine = super.commandLine + + required("-sampleDir", sampleDir) + + repeat("-sample", samples) + + optional("-fileSufix", fileSufix) + + required("-snpRelFile", snpRelFile) + + required("-snpAbsFile", snpAbsFile) + + required("-indelRelFile", indelRelFile) + + required("-indelAbsFile", indelAbsFile) + required("-totalFile", totalFile) } @@ -66,41 +66,41 @@ object SampleCompareMetrics { * @param args the command line arguments */ def main(args: Array[String]): Unit = { - + for (t <- 0 until args.size) { args(t) match { - case "-sample" => samples +:= args(t+1) - case "-sampleDir" => sampleDir = args(t+1) - case "-fileSufix" => fileSufix = args(t+1) - case "-snpRelFile" => snpRelFile = new File(args(t+1)) - case "-snpAbsFile" => snpAbsFile = new File(args(t+1)) - case "-indelRelFile" => indelRelFile = new File(args(t+1)) - case "-indelAbsFile" => indelAbsFile = new File(args(t+1)) - case "-totalFile" => totalFile = new File(args(t+1)) - case _ => + case "-sample" => samples +:= args(t + 1) + case "-sampleDir" => sampleDir = args(t + 1) + case "-fileSufix" => fileSufix = args(t + 1) + case "-snpRelFile" => snpRelFile = new File(args(t + 1)) + case "-snpAbsFile" => snpAbsFile = new File(args(t + 1)) + case "-indelRelFile" => indelRelFile = new File(args(t + 1)) + case "-indelAbsFile" => indelAbsFile = new File(args(t + 1)) + case "-totalFile" => totalFile = new File(args(t + 1)) + case _ => } } if (sampleDir == null) throw new IllegalStateException("No sampleDir, use -sampleDir") else if (!sampleDir.endsWith("/")) sampleDir += "/" - + val regex = """\W+""".r - val snpsOverlap: Map[(String,String), Int] = Map() - val indelsOverlap: Map[(String,String), Int] = Map() + val snpsOverlap: Map[(String, String), Int] = Map() + val indelsOverlap: Map[(String, String), Int] = Map() val snpsTotal: Map[String, Int] = Map() val indelsTotal: Map[String, Int] = Map() for (sample1 <- samples; sample2 <- samples) { val reader = Source.fromFile(new File(sampleDir + sample1 + "/" + sample1 + "-" + sample2 + fileSufix)) for (line <- reader.getLines) { regex.split(line) match { - case Array(_,_,_,varType, all, novel, overlap, rate, _*) => { + case Array(_, _, _, varType, all, novel, overlap, rate, _*) => { varType match { case "SNP" => { - snpsOverlap += (sample1, sample2) -> overlap.toInt - snpsTotal += sample1 -> all.toInt + snpsOverlap += (sample1, sample2) -> overlap.toInt + snpsTotal += sample1 -> all.toInt } case "INDEL" => { - indelsOverlap += (sample1, sample2) -> overlap.toInt - indelsTotal += sample1 -> all.toInt + indelsOverlap += (sample1, sample2) -> overlap.toInt + indelsTotal += sample1 -> all.toInt } case _ => } @@ -110,12 +110,12 @@ object SampleCompareMetrics { } reader.close() } - + val snpRelWritter = new PrintWriter(snpRelFile) val snpAbsWritter = new PrintWriter(snpAbsFile) val indelRelWritter = new PrintWriter(indelRelFile) val indelAbsWritter = new PrintWriter(indelAbsFile) - + val allWritters = List(snpRelWritter, snpAbsWritter, indelRelWritter, indelAbsWritter) for (writter <- allWritters) writter.println(samples.mkString("\t", "\t", "")) for (sample1 <- samples) { @@ -129,14 +129,14 @@ object SampleCompareMetrics { for (writter <- allWritters) writter.println() } for (writter <- allWritters) writter.close() - + val totalWritter = new PrintWriter(totalFile) totalWritter.println("Sample\tSNPs\tIndels") - for (sample <- samples) + for (sample <- samples) totalWritter.println(sample + "\t" + snpsTotal(sample) + "\t" + indelsTotal(sample)) totalWritter.close() - - def plot(file:File) { + + def plot(file: File) { val executor = new RScriptExecutor executor.addScript(new Resource("plotHeatmap.R", getClass)) executor.addArgs(file, file.getAbsolutePath.stripSuffix(".tsv") + ".png", file.getAbsolutePath.stripSuffix(".tsv") + ".clustering.png") diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index d3085d3447cafaaf8579ed684fdead1c5c3c1e36..ad1406f566dab1c061e7cb52f7feae8e9c260df9 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -5,8 +5,8 @@ import java.io.File import java.util.Date import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import nl.lumc.sasc.biopet.tools.FastqSplitter -import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star , Bowtie, Stampy} -import nl.lumc.sasc.biopet.extensions.picard.{MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups} +import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star, Bowtie, Stampy } +import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, SortSam, MergeSamFiles, AddOrReplaceReadGroups } import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep import org.broadinstitute.gatk.queue.QScript @@ -34,7 +34,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { @Argument(doc = "Skip metrics", shortName = "skipmetrics", required = false) var skipMetrics: Boolean = false - + @Argument(doc = "Aligner", shortName = "ALN", required = false) var aligner: String = _ @@ -43,7 +43,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { @Argument(doc = "Chunking", shortName = "chunking", required = false) var chunking: Boolean = false - + @ClassType(classOf[Int]) @Argument(doc = "Number of chunks, when not defined pipeline will automatic calculate number of chunks", shortName = "numberChunks", required = false) var numberChunks: Option[Int] = None @@ -79,7 +79,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { var paired: Boolean = false var defaultAligner = "bwa" val flexiprep = new Flexiprep(this) - + def init() { for (file <- configfiles) globalConfig.loadConfigFile(file) if (aligner == null) aligner = config("aligner", default = defaultAligner) @@ -112,7 +112,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { else { val chunkSize: Int = config("chunksize", (1 << 30)) val filesize = if (input_R1.getName.endsWith(".gz") || input_R1.getName.endsWith(".gzip")) input_R1.length * 3 - else input_R1.length + else input_R1.length numberChunks = Option(ceil(filesize.toDouble / chunkSize).toInt) } } @@ -146,8 +146,9 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { val chunkDir = outputDir + "chunks/" + t + "/" chunks += (chunkDir -> (removeGz(chunkDir + fastq_R1.getName), if (paired) removeGz(chunkDir + fastq_R2.getName) else "")) - } else chunks += (outputDir -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir), - flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir))) + } + else chunks += (outputDir -> (flexiprep.extractIfNeeded(fastq_R1, flexiprep.outputDir), + flexiprep.extractIfNeeded(fastq_R2, flexiprep.outputDir))) if (chunking) { val fastSplitter_R1 = new FastqSplitter(this) @@ -176,16 +177,16 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { fastq_R1_output :+= R1 fastq_R2_output :+= R2 } - + val outputBam = new File(chunkDir + outputName + ".bam") bamFiles :+= outputBam aligner match { - case "bwa" => addBwa(R1, R2, outputBam, deps) - case "bowtie" => addBowtie(R1, R2, outputBam, deps) - case "stampy" => addStampy(R1, R2, outputBam, deps) - case "star" => addStar(R1, R2, outputBam, deps) + case "bwa" => addBwa(R1, R2, outputBam, deps) + case "bowtie" => addBowtie(R1, R2, outputBam, deps) + case "stampy" => addStampy(R1, R2, outputBam, deps) + case "star" => addStar(R1, R2, outputBam, deps) case "star-2pass" => addStar2pass(R1, R2, outputBam, deps) - case _ => throw new IllegalStateException("Option Aligner: '" + aligner + "' is not valid") + case _ => throw new IllegalStateException("Option Aligner: '" + aligner + "' is not valid") } if (config("chunk_metrics", default = false)) addAll(BamMetrics(this, outputBam, chunkDir + "metrics/").functions) @@ -204,13 +205,13 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { add(mergeSamFile) bamFile = mergeSamFile.output } - + if (!skipMetrics) addAll(BamMetrics(this, bamFile, outputDir + "metrics/").functions) - + outputFiles += ("finalBamFile" -> bamFile) } - def addBwa(R1:File, R2:File, output:File, deps:List[File]): File = { + def addBwa(R1: File, R2: File, output: File, deps: List[File]): File = { val bwaCommand = new Bwa(this) bwaCommand.R1 = R1 if (paired) bwaCommand.R2 = R2 @@ -223,9 +224,9 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { add(sortSam) return sortSam.output } - - def addStampy(R1:File, R2:File, output:File, deps:List[File]): File = { - + + def addStampy(R1: File, R2: File, output: File, deps: List[File]): File = { + var RG: String = "ID:" + RGID + "," RG += "SM:" + RGSM + "," RG += "LB:" + RGLB + "," @@ -249,8 +250,8 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { add(sortSam) return sortSam.output } - - def addBowtie(R1:File, R2:File, output:File, deps:List[File]): File = { + + def addBowtie(R1: File, R2: File, output: File, deps: List[File]): File = { val bowtie = new Bowtie(this) bowtie.R1 = R1 if (paired) bowtie.R2 = R2 @@ -259,19 +260,19 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { add(bowtie, isIntermediate = true) return addAddOrReplaceReadGroups(bowtie.output, output) } - - def addStar(R1:File, R2:File, output:File, deps:List[File]): File = { + + def addStar(R1: File, R2: File, output: File, deps: List[File]): File = { val starCommand = Star(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps) add(starCommand) return addAddOrReplaceReadGroups(starCommand.outputSam, output) } - - def addStar2pass(R1:File, R2:File, output:File, deps:List[File]): File = { + + def addStar2pass(R1: File, R2: File, output: File, deps: List[File]): File = { val starCommand = Star._2pass(this, R1, if (paired) R2 else null, outputDir, isIntermediate = true, deps = deps) addAll(starCommand._2) return addAddOrReplaceReadGroups(starCommand._1, output) } - + def addAddOrReplaceReadGroups(input: File, output: File): File = { val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output) addOrReplaceReadGroups.createIndex = true @@ -305,7 +306,7 @@ class Mapping(val root: Configurable) extends QScript with BiopetQScript { } object Mapping extends PipelineCommand { - def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], + def loadFromLibraryConfig(root: Configurable, runConfig: Map[String, Any], sampleConfig: Map[String, Any], runDir: String, startJobs: Boolean = true): Mapping = { val mapping = new Mapping(root) @@ -323,7 +324,7 @@ object Mapping extends PipelineCommand { if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString mapping.outputDir = runDir - + if (startJobs) { mapping.init mapping.biopetScript diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala index 36da78d18051e41b59085f98340a0575ef9ea89c..937b84261a20935025aeb79c4f555a7fdfb6827b 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/sage/Sage.scala @@ -18,45 +18,45 @@ import org.broadinstitute.gatk.queue.function._ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { def this() = this(null) - + @Input(doc = "countBed", required = false) - var countBed : File = _ - + var countBed: File = _ + @Input(doc = "squishedCountBed, by suppling this file the auto squish job will be skipped", required = false) - var squishedCountBed : File = _ - + var squishedCountBed: File = _ + @Input(doc = "Transcriptome, used for generation of tag library", required = false) - var transcriptome : File = _ - - var tagsLibrary : File = _ - + var transcriptome: File = _ + + var tagsLibrary: File = _ + defaults ++= Map("bowtie" -> Map( - "m" -> 1, - "k" -> 1, - "best" -> true, - "strata" -> true, - "seedmms" -> 1 - ) - ) - + "m" -> 1, + "k" -> 1, + "best" -> true, + "strata" -> true, + "seedmms" -> 1 + ) + ) + class LibraryOutput extends AbstractLibraryOutput { var mappedBamFile: File = _ var prefixFastq: File = _ } - + class SampleOutput extends AbstractSampleOutput { - + } - + def init() { if (!outputDir.endsWith("/")) outputDir += "/" if (countBed == null) countBed = config("count_bed") if (squishedCountBed == null) squishedCountBed = config("squished_count_bed") if (tagsLibrary == null) tagsLibrary = config("tags_library") if (transcriptome == null) transcriptome = config("transcriptome") - if (transcriptome == null && tagsLibrary == null) + if (transcriptome == null && tagsLibrary == null) throw new IllegalStateException("No transcriptome or taglib found") - if (countBed == null && squishedCountBed == null) + if (countBed == null && squishedCountBed == null) throw new IllegalStateException("No bedfile supplied, please add a countBed or squishedCountBed") } @@ -66,7 +66,7 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { add(squishBed) squishedCountBed = squishBed.output } - + if (tagsLibrary == null) { val cdl = new SageCreateLibrary(this) cdl.input = transcriptome @@ -77,38 +77,38 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { add(cdl) tagsLibrary = cdl.output } - + runSamplesJobs } - + // Called for each sample def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = { val sampleOutput = new SampleOutput var libraryBamfiles: List[File] = List() var libraryFastqFiles: List[File] = List() val sampleID: String = sampleConfig("ID").toString - val sampleDir: String = globalSampleDir + sampleID + "/" + val sampleDir: String = globalSampleDir + sampleID + "/" for ((library, libraryFiles) <- runLibraryJobs(sampleConfig)) { libraryFastqFiles +:= libraryFiles.prefixFastq libraryBamfiles +:= libraryFiles.mappedBamFile } - + val bamFile: File = if (libraryBamfiles.size == 1) libraryBamfiles.head - else if (libraryBamfiles.size > 1) { - val mergeSamFiles = MergeSamFiles(this, libraryBamfiles, sampleDir) - add(mergeSamFiles) - mergeSamFiles.output - } else null + else if (libraryBamfiles.size > 1) { + val mergeSamFiles = MergeSamFiles(this, libraryBamfiles, sampleDir) + add(mergeSamFiles) + mergeSamFiles.output + } else null val fastqFile: File = if (libraryFastqFiles.size == 1) libraryFastqFiles.head - else if (libraryFastqFiles.size > 1) { - val cat = Cat.apply(this, libraryFastqFiles, sampleDir + sampleID + ".fastq") - add(cat) - cat.output - } else null - + else if (libraryFastqFiles.size > 1) { + val cat = Cat.apply(this, libraryFastqFiles, sampleDir + sampleID + ".fastq") + add(cat) + cat.output + } else null + addBedtoolsCounts(bamFile, sampleID, sampleDir) addTablibCounts(fastqFile, sampleID, sampleDir) - + return sampleOutput } @@ -129,14 +129,14 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { flexiprep.init flexiprep.biopetScript addAll(flexiprep.functions) - - val flexiprepOutput = for ((key,file) <- flexiprep.outputFiles if key.endsWith("output_R1")) yield file + + val flexiprepOutput = for ((key, file) <- flexiprep.outputFiles if key.endsWith("output_R1")) yield file val prefixFastq = PrefixFastq.apply(this, flexiprepOutput.head, runDir) prefixFastq.prefix = config("sage_tag", default = "CATG") prefixFastq.deps +:= flexiprep.outputFiles("fastq_input_R1") add(prefixFastq) libraryOutput.prefixFastq = prefixFastq.output - + val mapping = new Mapping(this) mapping.skipFlexiprep = true mapping.skipMarkduplicates = true @@ -151,45 +151,45 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript { mapping.init mapping.biopetScript addAll(mapping.functions) - + if (config("library_counts", default = false).getBoolean) { addBedtoolsCounts(mapping.outputFiles("finalBamFile"), sampleID + "-" + runID, runDir) addTablibCounts(prefixFastq.output, sampleID + "-" + runID, runDir) } - + libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile") } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig) return libraryOutput } - - def addBedtoolsCounts(bamFile:File, outputPrefix: String, outputDir: String) { - val bedtoolsSense = BedtoolsCoverage(this, bamFile, squishedCountBed, outputDir + outputPrefix + ".genome.sense.coverage", - depth = false, sameStrand = true, diffStrand = false) + + def addBedtoolsCounts(bamFile: File, outputPrefix: String, outputDir: String) { + val bedtoolsSense = BedtoolsCoverage(this, bamFile, squishedCountBed, outputDir + outputPrefix + ".genome.sense.coverage", + depth = false, sameStrand = true, diffStrand = false) val countSense = new BedtoolsCoverageToCounts(this) countSense.input = bedtoolsSense.output countSense.output = outputDir + outputPrefix + ".genome.sense.counts" - - val bedtoolsAntisense = BedtoolsCoverage(this, bamFile, squishedCountBed, outputDir + outputPrefix + ".genome.antisense.coverage", - depth = false, sameStrand = false, diffStrand = true) + + val bedtoolsAntisense = BedtoolsCoverage(this, bamFile, squishedCountBed, outputDir + outputPrefix + ".genome.antisense.coverage", + depth = false, sameStrand = false, diffStrand = true) val countAntisense = new BedtoolsCoverageToCounts(this) countAntisense.input = bedtoolsAntisense.output countAntisense.output = outputDir + outputPrefix + ".genome.antisense.counts" - - val bedtools = BedtoolsCoverage(this, bamFile, squishedCountBed, outputDir + outputPrefix + ".genome.coverage", - depth = false, sameStrand = false, diffStrand = false) + + val bedtools = BedtoolsCoverage(this, bamFile, squishedCountBed, outputDir + outputPrefix + ".genome.coverage", + depth = false, sameStrand = false, diffStrand = false) val count = new BedtoolsCoverageToCounts(this) count.input = bedtools.output count.output = outputDir + outputPrefix + ".genome.counts" - + add(bedtoolsSense, countSense, bedtoolsAntisense, countAntisense, bedtools, count) } - - def addTablibCounts(fastq:File, outputPrefix: String, outputDir: String) { + + def addTablibCounts(fastq: File, outputPrefix: String, outputDir: String) { val countFastq = new SageCountFastq(this) countFastq.input = fastq countFastq.output = outputDir + outputPrefix + ".raw.counts" add(countFastq) - + val createTagCounts = new SageCreateTagCounts(this) createTagCounts.input = countFastq.output createTagCounts.tagLib = tagsLibrary diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala index 16460f0fd66978f98ba79268cbf93e2a10425e00..10aef464b610c43571398d6285d3ddd7bc9c5b3c 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala @@ -12,7 +12,6 @@ import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaIndex, SambambaMerge } import nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel import nl.lumc.sasc.biopet.extensions.svcallers.{ Breakdancer, Clever } - import nl.lumc.sasc.biopet.pipelines.mapping.Mapping import org.broadinstitute.gatk.queue.QScript @@ -67,7 +66,6 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { val sampleDir: String = outputDir + sampleID + "/" val alignmentDir: String = sampleDir + "alignment/" - val svcallingDir: String = sampleDir + "svcalls/" sampleOutput.libraries = runLibraryJobs(sampleConfig) @@ -76,7 +74,7 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { libraryBamfiles ++= List(libraryOutput.mappedBamFile) } - val bamFile: File = + val bamFile: File = if (libraryBamfiles.size == 1) libraryBamfiles.head else if (libraryBamfiles.size > 1) { val mergeSamFiles = new SambambaMerge(root) @@ -85,10 +83,10 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { add(mergeSamFiles) mergeSamFiles.output } else null - + val bamIndex = SambambaIndex(root, bamFile) add(bamIndex) - + /// bamfile will be used as input for the SV callers. First run Clever // val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf" @@ -104,12 +102,12 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { addAll(breakdancer.functions) // for pindel we should use per library config collected into one config file -// val pindelDir = svcallingDir + sampleID + ".pindel/" -// val pindel = Pindel(this, bamFile, this.reference, pindelDir) -// sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf)) -// addAll(pindel.functions) -// -// + // val pindelDir = svcallingDir + sampleID + ".pindel/" + // val pindel = Pindel(this, bamFile, this.reference, pindelDir) + // sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf)) + // addAll(pindel.functions) + // + // return sampleOutput } @@ -117,12 +115,12 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = { val libraryOutput = new LibraryOutput - + val runID: String = runConfig("ID").toString val sampleID: String = sampleConfig("ID").toString val alignmentDir: String = outputDir + sampleID + "/alignment/" val runDir: String = alignmentDir + "run_" + runID + "/" - + if (runConfig.contains("R1")) { val mapping = new Mapping(this) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/CoverageStats.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/CoverageStats.scala index fc1ffb12a499e33dc0687d2c917bbfb92a8ef158..11ac6281796563659929670e9a2158371dc5d926 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/CoverageStats.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/CoverageStats.scala @@ -16,7 +16,7 @@ class CoverageStats(val root: Configurable) extends PythonCommandLineFunction { @Output(doc = "plot File (png)") var plot: File = _ - + override val defaultVmem = "12G" def cmdLine = getPythonCommand + diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/FastqSync.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/FastqSync.scala index f98f9299a636abbe9fac47bff98c543afdd9ebc7..01d65e4eea8e2a947b0cd412b66de872798178fe 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/FastqSync.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/FastqSync.scala @@ -35,33 +35,33 @@ class FastqSync(val root: Configurable) extends PythonCommandLineFunction { def cmdLine = { getPythonCommand + - required(input_start_fastq) + - required(input_R1) + - required(input_R2) + - required(output_R1) + - required(output_R2) + - " > " + - required(output_stats) + required(input_start_fastq) + + required(input_R1) + + required(input_R2) + + required(output_R1) + + required(output_R2) + + " > " + + required(output_stats) } def getSummary: Json = { val R1_filteredR = """Filtered (\d*) reads from first read file.""".r val R2_filteredR = """Filtered (\d*) reads from second read file.""".r val readsLeftR = """Synced read files contain (\d*) reads.""".r - + var R1_filtered = 0 var R2_filtered = 0 var readsLeft = 0 - + if (output_stats.exists) for (line <- Source.fromFile(output_stats).getLines) { line match { case R1_filteredR(m) => R1_filtered = m.toInt case R2_filteredR(m) => R2_filtered = m.toInt - case readsLeftR(m) => readsLeft = m.toInt - case _ => + case readsLeftR(m) => readsLeft = m.toInt + case _ => } } - + return ("num_reads_discarded_R1" := R1_filtered) ->: ("num_reads_discarded_R2" := R2_filtered) ->: ("num_reads_kept" := readsLeft) ->: @@ -70,8 +70,8 @@ class FastqSync(val root: Configurable) extends PythonCommandLineFunction { } object FastqSync { - def apply(root: Configurable, input_start_fastq:File, input_R1:File, input_R2:File, - output_R1:File, output_R2:File, output_stats:File): FastqSync = { + def apply(root: Configurable, input_start_fastq: File, input_R1: File, input_R2: File, + output_R1: File, output_R2: File, output_stats: File): FastqSync = { val fastqSync = new FastqSync(root) fastqSync.input_start_fastq = input_start_fastq fastqSync.input_R1 = input_R1 @@ -81,18 +81,18 @@ object FastqSync { fastqSync.output_stats = output_stats return fastqSync } - + def mergeSummaries(jsons: List[Json]): Json = { var R1_filtered = 0 var R2_filtered = 0 var readsLeft = 0 - + for (json <- jsons) { R1_filtered += json.field("num_reads_discarded_R1").get.numberOrZero.toInt R2_filtered += json.field("num_reads_discarded_R2").get.numberOrZero.toInt readsLeft += json.field("num_reads_kept").get.numberOrZero.toInt } - + return ("num_reads_discarded_R1" := R1_filtered) ->: ("num_reads_discarded_R2" := R2_filtered) ->: ("num_reads_kept" := readsLeft) ->: diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala index f41fd754771b055fdb33a4de1b88dcad2f0a0490..85c40191fa42fada02aa378dca9c2e9dd4b37277 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/SquishBed.scala @@ -13,9 +13,9 @@ class SquishBed(val root: Configurable) extends PythonCommandLineFunction { @Output(doc = "output File") var output: File = _ - + def cmdLine = getPythonCommand + - required(input) + + required(input) + required(output) } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/prefixFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/prefixFastq.scala index 07070727a2af455e956fd02bb8ebe754ad53a795..0411eb46ad1087d36b7c1c7b52d87a6fc42540c9 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/prefixFastq.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/scripts/prefixFastq.scala @@ -16,17 +16,17 @@ class PrefixFastq(val root: Configurable) extends PythonCommandLineFunction { @Argument(doc = "Prefix sequence") var prefix: String = "CATG" - + @Argument(doc = "Input file is gziped", required = false) var gzip: Boolean = _ - + override def beforeCmd { if (input.getName.endsWith(".gzip") || input.getName.endsWith("gz")) gzip = true } - + def cmdLine = getPythonCommand + - required("-o", output) + - required("--prefix", prefix) + + required("-o", output) + + required("--prefix", prefix) + required(input) } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala index 550853dff22d871690bb71631b80d6c55dc19698..436a4ff083eea141b3a45dda18d024022a0960e0 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedToInterval.scala @@ -45,24 +45,27 @@ object BedToInterval extends ToolCommand { return bedToInterval } - case class Args (inputFile:File = null, outputFile:File = null, bamFile:File = null) extends AbstractArgs + case class Args(inputFile: File = null, outputFile: File = null, bamFile: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required() valueName("<file>") action { (x, c) => - c.copy(inputFile = x) } - opt[File]('o', "output") required() valueName("<file>") action { (x, c) => - c.copy(outputFile = x) } - opt[File]('b', "bam") required() valueName("<file>") action { (x, c) => - c.copy(bamFile = x) } + opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) => + c.copy(inputFile = x) + } + opt[File]('o', "output") required () valueName ("<file>") action { (x, c) => + c.copy(outputFile = x) + } + opt[File]('b', "bam") required () valueName ("<file>") action { (x, c) => + c.copy(bamFile = x) + } } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + val writer = new PrintWriter(commandArgs.outputFile) val inputSam = new SAMFileReader(commandArgs.bamFile) @@ -72,19 +75,16 @@ object BedToInterval extends ToolCommand { record.getSequenceName -> record.getSequenceLength } inputSam.close - val refsMap = Map(refs:_*) - + val refsMap = Map(refs: _*) + val bedFile = Source.fromFile(commandArgs.inputFile) for ( - line <- bedFile.getLines; - val split = line.split("\t") - if split.size >= 3; - val chr = split(0); - val start = split(1); - val stop = split(2) - if start forall Character.isDigit - if stop forall Character.isDigit - ) { + line <- bedFile.getLines; + val split = line.split("\t") if split.size >= 3; + val chr = split(0); + val start = split(1); + val stop = split(2) if start forall Character.isDigit if stop forall Character.isDigit + ) { if (!refsMap.contains(chr)) throw new IllegalStateException("Chr '" + chr + "' in bed file not found in bam file") writer.write(chr + "\t" + start + "\t" + stop + "\t") if (split.length >= 6 && (split(5) == "+" || split(5) == "-")) writer.write(split(5)) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala index 004d322caabfb8fe0dd24ad793b14b141371d30b..ff818d111d9e44a334d035ec3e492e97d51e89e7 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BedtoolsCoverageToCounts.scala @@ -16,38 +16,40 @@ class BedtoolsCoverageToCounts(val root: Configurable) extends BiopetJavaCommand @Input(doc = "Input fasta", shortName = "input", required = true) var input: File = _ - + @Output(doc = "Output tag library", shortName = "output", required = true) var output: File = _ - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - - override def commandLine = super.commandLine + - required("-I", input) + + + override def commandLine = super.commandLine + + required("-I", input) + required("-o", output) } object BedtoolsCoverageToCounts extends ToolCommand { - case class Args (input:File = null, output:File = null) extends AbstractArgs + case class Args(input: File = null, output: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") required() valueName("<file>") action { (x, c) => - c.copy(input = x) } - opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(output = x) } + opt[File]('I', "input") required () valueName ("<file>") action { (x, c) => + c.copy(input = x) + } + opt[File]('o', "output") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(output = x) + } } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) - - val counts:Map[String, Long] = Map() + + val counts: Map[String, Long] = Map() for (line <- Source.fromFile(commandArgs.input).getLines) { val values = line.split("\t") val gene = values(3) @@ -55,11 +57,11 @@ object BedtoolsCoverageToCounts extends ToolCommand { if (counts.contains(gene)) counts(gene) += count else counts += gene -> count } - - val sortedCounts:SortedMap[String, Long] = SortedMap(counts.toArray:_*) - + + val sortedCounts: SortedMap[String, Long] = SortedMap(counts.toArray: _*) + val writer = new PrintWriter(commandArgs.output) - for ((seq,count) <- sortedCounts) { + for ((seq, count) <- sortedCounts) { if (count > 0) writer.println(seq + "\t" + count) } writer.close diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala index 361c6e6c8b6cbb80a5696ea7ff027d8dd82a522e..8465f45637bd2f8aec6976ec814d49d00636d338 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/BiopetFlagstat.scala @@ -38,41 +38,43 @@ object BiopetFlagstat extends ToolCommand { flagstat.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".biopetflagstat") return flagstat } - - case class Args (inputFile:File = null, region:Option[String] = None) extends AbstractArgs + + case class Args(inputFile: File = null, region: Option[String] = None) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required() valueName("<file>") action { (x, c) => - c.copy(inputFile = x) } text("out is a required file property") - opt[String]('r', "region") valueName("<chr:start-stop>") action { (x, c) => - c.copy(region = Some(x)) } text("out is a required file property") + opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) => + c.copy(inputFile = x) + } text ("out is a required file property") + opt[String]('r', "region") valueName ("<chr:start-stop>") action { (x, c) => + c.copy(region = Some(x)) + } text ("out is a required file property") } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val inputSam = new SAMFileReader(commandArgs.inputFile) val iterSam = if (commandArgs.region == None) inputSam.iterator else { val regionRegex = """(.*):(.*)-(.*)""".r commandArgs.region.get match { case regionRegex(chr, start, stop) => inputSam.query(chr, start.toInt, stop.toInt, false) - case _ => sys.error("Region wrong format") + case _ => sys.error("Region wrong format") } } - + val flagstatCollector = new FlagstatCollector flagstatCollector.loadDefaultFunctions val m = 10 val max = 60 for (t <- 0 to (max / m)) flagstatCollector.addFunction("MAPQ>" + (t * m), record => record.getMappingQuality > (t * m)) - flagstatCollector.addFunction("First normal, second read inverted (paired end orientation)", record => { - if (record.getReadPairedFlag && - record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag && + flagstatCollector.addFunction("First normal, second read inverted (paired end orientation)", record => { + if (record.getReadPairedFlag && + record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag && ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || @@ -80,8 +82,8 @@ object BiopetFlagstat extends ToolCommand { else false }) flagstatCollector.addFunction("First normal, second read normal", record => { - if (record.getReadPairedFlag && - record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag && + if (record.getReadPairedFlag && + record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag && ((record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || (record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || @@ -89,8 +91,8 @@ object BiopetFlagstat extends ToolCommand { else false }) flagstatCollector.addFunction("First inverted, second read inverted", record => { - if (record.getReadPairedFlag && - record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag && + if (record.getReadPairedFlag && + record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag == record.getMateNegativeStrandFlag && ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || (record.getSecondOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || @@ -98,8 +100,8 @@ object BiopetFlagstat extends ToolCommand { else false }) flagstatCollector.addFunction("First inverted, second read normal", record => { - if (record.getReadPairedFlag && - record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag && + if (record.getReadPairedFlag && + record.getReferenceIndex == record.getMateReferenceIndex && record.getReadNegativeStrandFlag != record.getMateNegativeStrandFlag && ((record.getFirstOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || (record.getFirstOfPairFlag && !record.getReadNegativeStrandFlag && record.getAlignmentStart > record.getMateAlignmentStart) || (record.getSecondOfPairFlag && record.getReadNegativeStrandFlag && record.getAlignmentStart < record.getMateAlignmentStart) || @@ -109,7 +111,7 @@ object BiopetFlagstat extends ToolCommand { flagstatCollector.addFunction("Mate in same strand", record => record.getReadPairedFlag && record.getReadNegativeStrandFlag && record.getMateNegativeStrandFlag && record.getReferenceIndex == record.getMateReferenceIndex) flagstatCollector.addFunction("Mate on other chr", record => record.getReadPairedFlag && record.getReferenceIndex != record.getMateReferenceIndex) - + logger.info("Start reading file: " + commandArgs.inputFile) for (record <- iterSam) { if (flagstatCollector.readsCount % 1e6 == 0 && flagstatCollector.readsCount > 0) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala index 315a8db8c8b2743c81cdb618685a6eabf381891f..867eb36ad2b17e19cbb3b8d93bc894ef50c05dae 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/CheckAllelesVcfInBam.scala @@ -36,60 +36,64 @@ import scala.math._ //} object CheckAllelesVcfInBam extends ToolCommand { - case class Args (inputFile:File = null, outputFile:File = null, samples:List[String] = Nil, - bamFiles:List[File] = Nil, minMapQual:Int = 1) extends AbstractArgs + case class Args(inputFile: File = null, outputFile: File = null, samples: List[String] = Nil, + bamFiles: List[File] = Nil, minMapQual: Int = 1) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required() maxOccurs(1) valueName("<file>") action { (x, c) => - c.copy(inputFile = x) } - opt[File]('o', "outputFile") required() maxOccurs(1) valueName("<file>") action { (x, c) => - c.copy(outputFile = x) } - opt[String]('s', "sample") unbounded() minOccurs(1) action { (x, c) => - c.copy(samples = x :: c.samples) } - opt[File]('b', "bam") unbounded() minOccurs(1) action { (x, c) => - c.copy(bamFiles = x :: c.bamFiles) } - opt[Int]('m', "min_mapping_quality") maxOccurs(1) action { (x, c) => - c.copy(minMapQual = c.minMapQual) } + opt[File]('I', "inputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(inputFile = x) + } + opt[File]('o', "outputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(outputFile = x) + } + opt[String]('s', "sample") unbounded () minOccurs (1) action { (x, c) => + c.copy(samples = x :: c.samples) + } + opt[File]('b', "bam") unbounded () minOccurs (1) action { (x, c) => + c.copy(bamFiles = x :: c.bamFiles) + } + opt[Int]('m', "min_mapping_quality") maxOccurs (1) action { (x, c) => + c.copy(minMapQual = c.minMapQual) + } } - - private class CountReport ( + + private class CountReport( var notFound: Int = 0, var aCounts: mutable.Map[String, Int] = mutable.Map(), var duplicateReads: Int = 0, - var lowMapQualReads: Int = 0 - ) - + var lowMapQualReads: Int = 0) + def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + if (commandArgs.bamFiles.size != commandArgs.samples.size) logger.warn("Number of samples is diffrent then number of bam files, left over will be removed") - val bamReaders:Map[String,SAMFileReader] = Map(commandArgs.samples zip commandArgs.bamFiles.map(x => new SAMFileReader(x)):_*) + val bamReaders: Map[String, SAMFileReader] = Map(commandArgs.samples zip commandArgs.bamFiles.map(x => new SAMFileReader(x)): _*) val bamHeaders = bamReaders.map(x => (x._1, x._2.getFileHeader)) - + val reader = new VCFFileReader(commandArgs.inputFile, false) val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputFile).build) - + val header = reader.getFileHeader - for ((sample,_) <- bamReaders) { - header.addMetaDataLine(new VCFInfoHeaderLine("BAM-AD-" + sample, - VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allele depth, ref and alt on order of vcf file")) - header.addMetaDataLine(new VCFInfoHeaderLine("BAM-DP-" + sample, - 1, VCFHeaderLineType.Integer, "Total reads on this location")) + for ((sample, _) <- bamReaders) { + header.addMetaDataLine(new VCFInfoHeaderLine("BAM-AD-" + sample, + VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allele depth, ref and alt on order of vcf file")) + header.addMetaDataLine(new VCFInfoHeaderLine("BAM-DP-" + sample, + 1, VCFHeaderLineType.Integer, "Total reads on this location")) } - + writer.writeHeader(header) - + for (vcfRecord <- reader) { - val countReports: Map[String,CountReport] = bamReaders.map(x => (x._1, new CountReport)) + val countReports: Map[String, CountReport] = bamReaders.map(x => (x._1, new CountReport)) val refAllele = vcfRecord.getReference.getBaseString for ((sample, bamReader) <- bamReaders) { - val queryInterval = new QueryInterval(bamHeaders(sample).getSequenceIndex(vcfRecord.getChr), - vcfRecord.getStart, vcfRecord.getStart + refAllele.size - 1) + val queryInterval = new QueryInterval(bamHeaders(sample).getSequenceIndex(vcfRecord.getChr), + vcfRecord.getStart, vcfRecord.getStart + refAllele.size - 1) val bamIter = bamReader.query(Array(queryInterval), false) - - def filterRead(samRecord:SAMRecord): Boolean = { + + def filterRead(samRecord: SAMRecord): Boolean = { if (samRecord.getDuplicateReadFlag) { countReports(sample).duplicateReads += 1 return true @@ -102,26 +106,26 @@ object CheckAllelesVcfInBam extends ToolCommand { } return false } - + val counts = for (samRecord <- bamIter if !filterRead(samRecord)) { checkAlles(samRecord, vcfRecord) match { - case Some(a) => if (countReports(sample).aCounts.contains(a)) countReports(sample).aCounts(a) += 1 - else countReports(sample).aCounts += (a -> 1) + case Some(a) => if (countReports(sample).aCounts.contains(a)) countReports(sample).aCounts(a) += 1 + else countReports(sample).aCounts += (a -> 1) case _ => countReports(sample).notFound += 1 } - } + } bamIter.close } - + val builder = new VariantContextBuilder(vcfRecord) - for ((k,v) <- countReports) { + for ((k, v) <- countReports) { val s = for (allele <- vcfRecord.getAlleles) yield { val s = allele.getBaseString if (v.aCounts.contains(s)) v.aCounts(s) else 0 } builder.attribute("BAM-AD-" + k, s.mkString(",")) - builder.attribute("BAM-DP-" + k, (0 /: s) (_ + _) + v.notFound) + builder.attribute("BAM-DP-" + k, (0 /: s)(_ + _) + v.notFound) } writer.add(builder.make) } @@ -129,32 +133,32 @@ object CheckAllelesVcfInBam extends ToolCommand { reader.close writer.close } - - def checkAlles(samRecord:SAMRecord, vcfRecord:VariantContext): Option[String] = { + + def checkAlles(samRecord: SAMRecord, vcfRecord: VariantContext): Option[String] = { val readStartPos = List.range(0, samRecord.getReadBases.length) - .find(x => samRecord.getReferencePositionAtReadPosition(x+1) == vcfRecord.getStart) getOrElse { return None } + .find(x => samRecord.getReferencePositionAtReadPosition(x + 1) == vcfRecord.getStart) getOrElse { return None } val readBases = samRecord.getReadBases() val alleles = vcfRecord.getAlleles.map(x => x.getBaseString) val refAllele = alleles.head var maxSize = 1 for (allele <- alleles if allele.size > maxSize) maxSize = allele.size - val readC = for (t <- readStartPos until readStartPos+maxSize if t < readBases.length) yield readBases(t).toChar - val allelesInRead = mutable.Set(alleles.filter(readC.mkString.startsWith(_)):_*) - + val readC = for (t <- readStartPos until readStartPos + maxSize if t < readBases.length) yield readBases(t).toChar + val allelesInRead = mutable.Set(alleles.filter(readC.mkString.startsWith(_)): _*) + // Removal of insertions that are not really in the cigarstring for (allele <- allelesInRead if allele.size > refAllele.size) { val refPos = for (t <- refAllele.size until allele.size) yield samRecord.getReferencePositionAtReadPosition(readStartPos + t + 1) if (refPos.exists(_ > 0)) allelesInRead -= allele } - + // Removal of alleles that are not really in the cigarstring for (allele <- allelesInRead) { val readPosAfterAllele = samRecord.getReferencePositionAtReadPosition(readStartPos + allele.size + 1) val vcfPosAfterAllele = vcfRecord.getStart + refAllele.size - if (readPosAfterAllele != vcfPosAfterAllele && - (refAllele.size != allele.size || (refAllele.size == allele.size && readPosAfterAllele < 0))) allelesInRead -= allele + if (readPosAfterAllele != vcfPosAfterAllele && + (refAllele.size != allele.size || (refAllele.size == allele.size && readPosAfterAllele < 0))) allelesInRead -= allele } - + for (allele <- allelesInRead if allele.size >= refAllele.size) { if (allelesInRead.exists(_.size > allele.size)) allelesInRead -= allele } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala index 83a607de4a035013cbf82072223f533380ff942c..d0f09baaa820b1a57ed181f66c25996faf9330a6 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala @@ -83,9 +83,9 @@ object ExtractAlignedFastq extends ToolCommand { require(inAlnReader.hasIndex) def getSequenceIndex(name: String): Int = inAlnReader.getFileHeader.getSequenceIndex(name) match { - case x if x >= 0 => + case x if x >= 0 => x - case otherwise => + case otherwise => throw new IllegalArgumentException("Chromosome " + name + " is not found in the alignment file") } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala index c59d8a2464797f03eca4e0f7b2d6c3e0965ab624..9a95d2435c6ad44020a0438cac8ed3e7ce55df8c 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FastqSplitter.scala @@ -24,22 +24,24 @@ class FastqSplitter(val root: Configurable) extends BiopetJavaCommandLineFunctio } object FastqSplitter extends ToolCommand { - case class Args (inputFile:File = null, outputFile:List[File] = Nil) extends AbstractArgs + case class Args(inputFile: File = null, outputFile: List[File] = Nil) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required() valueName("<file>") action { (x, c) => - c.copy(inputFile = x) } text("out is a required file property") - opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(outputFile = x :: c.outputFile) } text("out is a required file property") + opt[File]('I', "inputFile") required () valueName ("<file>") action { (x, c) => + c.copy(inputFile = x) + } text ("out is a required file property") + opt[File]('o', "output") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(outputFile = x :: c.outputFile) + } text ("out is a required file property") } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val groupsize = 100 val output = for (file <- commandArgs.outputFile) yield new PrintWriter(file) val inputStream = { diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala index add0849bc4e60e4df8662c5432dad0c197b26631..b6c9690bc90b45c2f9376f69b574e73157f39f06 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/FindRepeatsPacBio.scala @@ -26,40 +26,44 @@ import scala.io.Source import scala.collection.JavaConversions._ object FindRepeatsPacBio extends ToolCommand { - case class Args (inputBam:File = null, inputBed:File = null) extends AbstractArgs + case class Args(inputBam: File = null, inputBed: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputBam") required() maxOccurs(1) valueName("<file>") action { (x, c) => - c.copy(inputBam = x) } - opt[File]('b', "inputBed") required() maxOccurs(1) valueName("<file>") action { (x, c) => - c.copy(inputBed = x) } text("output file, default to stdout") + opt[File]('I', "inputBam") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(inputBam = x) + } + opt[File]('b', "inputBed") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(inputBed = x) + } text ("output file, default to stdout") } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { - + val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) val bamReader = new SAMFileReader(commandArgs.inputBam) bamReader.setValidationStringency(ValidationStringency.SILENT) val bamHeader = bamReader.getFileHeader - - val header = List("chr", "startPos", "stopPos","Repeat_seq", "repeatLength", - "original_Repeat_readLength", "Calculated_repeat_readLength", - "minLength", "maxLength", "inserts", "deletions", "notSpan") + + val header = List("chr", "startPos", "stopPos", "Repeat_seq", "repeatLength", + "original_Repeat_readLength", "Calculated_repeat_readLength", + "minLength", "maxLength", "inserts", "deletions", "notSpan") println(header.mkString("\t")) - - for (bedLine <- Source.fromFile(commandArgs.inputBed).getLines; - val values = bedLine.split("\t"); if values.size >= 3) { + + for ( + bedLine <- Source.fromFile(commandArgs.inputBed).getLines; + val values = bedLine.split("\t"); if values.size >= 3 + ) { val interval = new QueryInterval(bamHeader.getSequenceIndex(values(0)), values(1).toInt, values(2).toInt) val bamIter = bamReader.query(Array(interval), false) - val results = for (samRecord <-bamIter) yield procesSamrecord(samRecord, interval) + val results = for (samRecord <- bamIter) yield procesSamrecord(samRecord, interval) val chr = values(0) val startPos = values(1) val stopPos = values(2) - val typeRepeat: String = if (values.size >= 15) values(14) else "" + val typeRepeat: String = if (values.size >= 15) values(14) else "" val repeatLength = typeRepeat.length val oriRepeatLength = values(2).toInt - values(1).toInt + 1 var calcRepeatLength: List[Int] = Nil @@ -68,48 +72,48 @@ object FindRepeatsPacBio extends ToolCommand { var inserts: List[String] = Nil var deletions: List[String] = Nil var notSpan = 0 - + for (result <- results) { if (result.isEmpty) notSpan += 1 else { inserts ::= result.get.ins.map(_.insert).mkString(",") deletions ::= result.get.dels.map(_.length).mkString(",") - val length = oriRepeatLength - result.get.beginDel - result.get.endDel - - ((0 /: result.get.dels.map(_.length)) (_ + _)) + ((0 /: result.get.ins.map(_.insert.size)) (_ + _)) + val length = oriRepeatLength - result.get.beginDel - result.get.endDel - + ((0 /: result.get.dels.map(_.length))(_ + _)) + ((0 /: result.get.ins.map(_.insert.size))(_ + _)) calcRepeatLength ::= length if (length > maxLength) maxLength = length if (length < minLength || minLength == -1) minLength = length } } - println(List(chr, startPos, stopPos, typeRepeat, repeatLength, oriRepeatLength, calcRepeatLength.mkString(","), minLength, - maxLength, inserts.mkString("/"), deletions.mkString("/"), notSpan).mkString("\t")) + println(List(chr, startPos, stopPos, typeRepeat, repeatLength, oriRepeatLength, calcRepeatLength.mkString(","), minLength, + maxLength, inserts.mkString("/"), deletions.mkString("/"), notSpan).mkString("\t")) bamIter.close } } - - case class Del(pos:Int, length:Int) - case class Ins(pos:Int, insert:String) - + + case class Del(pos: Int, length: Int) + case class Ins(pos: Int, insert: String) + class Result() { var beginDel = 0 var endDel = 0 var dels: List[Del] = Nil var ins: List[Ins] = Nil var samRecord: SAMRecord = _ - + override def toString = { - "id: " + samRecord.getReadName + " beginDel: " + beginDel + " endDel: " + endDel + " dels: " + dels + " ins: " + ins + "id: " + samRecord.getReadName + " beginDel: " + beginDel + " endDel: " + endDel + " dels: " + dels + " ins: " + ins } } - - def procesSamrecord(samRecord:SAMRecord, interval:QueryInterval): Option[Result] = { + + def procesSamrecord(samRecord: SAMRecord, interval: QueryInterval): Option[Result] = { val readStartPos = List.range(0, samRecord.getReadBases.length) - .find(x => samRecord.getReferencePositionAtReadPosition(x) >= interval.start) + .find(x => samRecord.getReferencePositionAtReadPosition(x) >= interval.start) var readPos = if (readStartPos.isEmpty) return None else readStartPos.get if (samRecord.getAlignmentEnd < interval.end) return None if (samRecord.getAlignmentStart > interval.start) return None var refPos = samRecord.getReferencePositionAtReadPosition(readPos) - + val result = new Result result.samRecord = samRecord result.beginDel = interval.start - refPos @@ -119,19 +123,19 @@ object FindRepeatsPacBio extends ToolCommand { do { readPos += 1 refPos = samRecord.getReferencePositionAtReadPosition(readPos) - } while(refPos < oldReadPos) + } while (refPos < oldReadPos) val readDiff = readPos - oldReadPos val refDiff = refPos - oldRefPos if (refPos > interval.end) { result.endDel = interval.end - oldRefPos } else if (readDiff > refDiff) { //Insertion - val insert = for (t <- oldReadPos+1 until readPos) yield samRecord.getReadBases()(t-1).toChar + val insert = for (t <- oldReadPos + 1 until readPos) yield samRecord.getReadBases()(t - 1).toChar result.ins ::= Ins(oldRefPos, insert.mkString) } else if (readDiff < refDiff) { // Deletion result.dels ::= Del(oldRefPos, refDiff - readDiff) } } - + return Some(result) } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala index a407a64de28c47de0a2c53033793a4b3c4f15a5e..52b5df3f5c5f0d9f6c007e00a48722aece6785ee 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/MpileupToVcf.scala @@ -15,82 +15,89 @@ import scala.collection.JavaConversions._ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction { javaMainClass = getClass.getName - + @Input(doc = "Input mpileup file", shortName = "mpileup", required = false) var inputMpileup: File = _ - + @Input(doc = "Input bam file", shortName = "bam", required = false) var inputBam: File = _ - + @Output(doc = "Output tag library", shortName = "output", required = true) var output: File = _ - - var minDP:Option[Int] = config("min_dp") - var minAP:Option[Int] = config("min_ap") - var homoFraction:Option[Double] = config("homoFraction") - var ploidy:Option[Int] = config("ploidy") + + var minDP: Option[Int] = config("min_dp") + var minAP: Option[Int] = config("min_ap") + var homoFraction: Option[Double] = config("homoFraction") + var ploidy: Option[Int] = config("ploidy") var sample: String = _ var reference: String = config("reference") - + override val defaultVmem = "6G" memoryLimit = Option(2.0) - + if (config.contains("target_bed")) defaults ++= Map("samtoolsmpileup" -> Map("interval_bed" -> config("target_bed").getStringList.head, - "disable_baq" -> true, "min_map_quality" -> 1)) - + "disable_baq" -> true, "min_map_quality" -> 1)) + override def afterGraph { super.afterGraph val samtoolsMpileup = new SamtoolsMpileup(this) } - + override def commandLine = { (if (inputMpileup == null) { val samtoolsMpileup = new SamtoolsMpileup(this) samtoolsMpileup.input = inputBam samtoolsMpileup.cmdPipe + " | " - } else "") + - super.commandLine + - required("-o", output) + - optional("--minDP", minDP) + + } else "") + + super.commandLine + + required("-o", output) + + optional("--minDP", minDP) + optional("--minAP", minAP) + optional("--homoFraction", homoFraction) + optional("--ploidy", ploidy) + - required("--sample", sample) + + required("--sample", sample) + (if (inputBam == null) required("-I", inputMpileup) else "") } } object MpileupToVcf extends ToolCommand { - case class Args (input:File = null, output:File = null, sample:String = null, minDP:Int = 8, minAP:Int = 2, - homoFraction:Double = 0.8, ploidy:Int = 2) extends AbstractArgs + case class Args(input: File = null, output: File = null, sample: String = null, minDP: Int = 8, minAP: Int = 2, + homoFraction: Double = 0.8, ploidy: Int = 2) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") valueName("<file>") action { (x, c) => - c.copy(input = x) } text("input, default is stdin") - opt[File]('o', "output") required() valueName("<file>") action { (x, c) => - c.copy(output = x) } text("out is a required file property") - opt[String]('s', "sample") required() action { (x, c) => - c.copy(sample = x) } - opt[Int]("minDP") required() action { (x, c) => - c.copy(minDP = x) } - opt[Int]("minAP") required() action { (x, c) => - c.copy(minAP = x) } - opt[Double]("homoFraction") required() action { (x, c) => - c.copy(homoFraction = x) } - opt[Int]("ploidy") required() action { (x, c) => - c.copy(ploidy = x) } + opt[File]('I', "input") valueName ("<file>") action { (x, c) => + c.copy(input = x) + } text ("input, default is stdin") + opt[File]('o', "output") required () valueName ("<file>") action { (x, c) => + c.copy(output = x) + } text ("out is a required file property") + opt[String]('s', "sample") required () action { (x, c) => + c.copy(sample = x) + } + opt[Int]("minDP") required () action { (x, c) => + c.copy(minDP = x) + } + opt[Int]("minAP") required () action { (x, c) => + c.copy(minAP = x) + } + opt[Double]("homoFraction") required () action { (x, c) => + c.copy(homoFraction = x) + } + opt[Int]("ploidy") required () action { (x, c) => + c.copy(ploidy = x) + } } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + import scala.collection.mutable.Map if (commandArgs.input != null && !commandArgs.input.exists) throw new IllegalStateException("Input file does not exist") - + val writer = new PrintWriter(commandArgs.output) writer.println("##fileformat=VCFv4.1") writer.println("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">") @@ -105,83 +112,85 @@ object MpileupToVcf extends ToolCommand { writer.println("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">") writer.println("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + commandArgs.sample) val inputStream = if (commandArgs.input != null) Source.fromFile(commandArgs.input).getLines else Source.stdin.getLines - class Counts(var forward:Int, var reverse:Int) - for (line <- inputStream; - val values = line.split("\t"); - if values.size > 5) { + class Counts(var forward: Int, var reverse: Int) + for ( + line <- inputStream; + val values = line.split("\t"); + if values.size > 5 + ) { val chr = values(0) val pos = values(1) val ref = values(2) val reads = values(3).toInt val mpileup = values(4) val qual = values(5) - - val counts: Map[String, Counts] = Map(ref.toUpperCase -> new Counts(0,0)) - - def addCount(s:String) { + + val counts: Map[String, Counts] = Map(ref.toUpperCase -> new Counts(0, 0)) + + def addCount(s: String) { val upper = s.toUpperCase - if (!counts.contains(upper)) counts += upper -> new Counts(0,0) + if (!counts.contains(upper)) counts += upper -> new Counts(0, 0) if (s(0).isLower) counts(upper).reverse += 1 else counts(upper).forward += 1 } - + var t = 0 var dels = 0 - while(t < mpileup.size) { + while (t < mpileup.size) { mpileup(t) match { case ',' => { - addCount(ref.toLowerCase) - t += 1 + addCount(ref.toLowerCase) + t += 1 } case '.' => { - addCount(ref.toUpperCase) - t += 1 + addCount(ref.toUpperCase) + t += 1 } case '^' => t += 2 case '$' => t += 1 case '*' => { - dels += 1 - t += 1 + dels += 1 + t += 1 } case '+' | '-' => { + t += 1 + var size = "" + var insert = "" + while (mpileup(t).isDigit) { + size += mpileup(t) t += 1 - var size = "" - var insert = "" - while (mpileup(t).isDigit) { - size += mpileup(t) - t += 1 - } - for (c <- t until t + size.toInt) insert = insert + mpileup(c) - t += size.toInt + } + for (c <- t until t + size.toInt) insert = insert + mpileup(c) + t += size.toInt } - case 'a' | 'c' | 't' | 'g' | 'A' | 'C' | 'T' | 'G' => { - addCount(mpileup(t).toString) - t += 1 + case 'a' | 'c' | 't' | 'g' | 'A' | 'C' | 'T' | 'G' => { + addCount(mpileup(t).toString) + t += 1 } case _ => t += 1 } } - + val info: ArrayBuffer[String] = ArrayBuffer("DP=" + reads) val format: Map[String, String] = Map("DP" -> reads.toString) val alt: ArrayBuffer[String] = new ArrayBuffer format += ("RFC" -> counts(ref.toUpperCase).forward.toString) format += ("RRC" -> counts(ref.toUpperCase).reverse.toString) format += ("AD" -> (counts(ref.toUpperCase).forward + counts(ref.toUpperCase).reverse).toString) - if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward+value.reverse >= commandArgs.minAP) { + if (reads >= commandArgs.minDP) for ((key, value) <- counts if key != ref.toUpperCase if value.forward + value.reverse >= commandArgs.minAP) { alt += key format += ("AD" -> (format("AD") + "," + (value.forward + value.reverse).toString)) - format += ("AFC" -> ( (if (format.contains("AFC")) format("AFC") + "," else "") + value.forward)) - format += ("ARC" -> ( (if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse)) - format += ("FREQ" -> ( (if (format.contains("FREQ")) format("FREQ") + "," else "") + - round((value.forward+value.reverse).toDouble/reads*1E4).toDouble/1E2)) + format += ("AFC" -> ((if (format.contains("AFC")) format("AFC") + "," else "") + value.forward)) + format += ("ARC" -> ((if (format.contains("ARC")) format("ARC") + "," else "") + value.reverse)) + format += ("FREQ" -> ((if (format.contains("FREQ")) format("FREQ") + "," else "") + + round((value.forward + value.reverse).toDouble / reads * 1E4).toDouble / 1E2)) } - + if (alt.size > 0) { val ad = for (ad <- format("AD").split(",")) yield ad.toInt var left = reads - dels val gt = ArrayBuffer[Int]() - + for (p <- 0 to alt.size if gt.size < commandArgs.ploidy) { var max = -1 for (a <- 0 until ad.length if ad(a) > (if (max >= 0) ad(max) else -1) && !gt.exists(_ == a)) max = a @@ -192,9 +201,9 @@ object MpileupToVcf extends ToolCommand { } left -= ad(max) } - writer.println(Array(chr, pos, ".", ref.toUpperCase, alt.mkString(","), ".", ".", info.mkString(";"), - "GT:" + format.keys.mkString(":"), gt.sortWith(_ < _).mkString("/") + ":" + format.values.mkString(":") - ).mkString("\t")) + writer.println(Array(chr, pos, ".", ref.toUpperCase, alt.mkString(","), ".", ".", info.mkString(";"), + "GT:" + format.keys.mkString(":"), gt.sortWith(_ < _).mkString("/") + ":" + format.values.mkString(":") + ).mkString("\t")) } } writer.close diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala index a07cc4b8bb8eb73aeb00d1dbc2b2880b761b6acc..b2b10550328709ed41745005d5ce8a410fd8d4ac 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCountFastq.scala @@ -6,7 +6,7 @@ import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.core.ToolCommand import nl.lumc.sasc.biopet.core.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -import org.biojava3.sequencing.io.fastq.{SangerFastqReader, StreamListener, Fastq} +import org.biojava3.sequencing.io.fastq.{ SangerFastqReader, StreamListener, Fastq } import scala.collection.JavaConversions._ import scala.collection.SortedMap import scala.collection.mutable.Map @@ -17,44 +17,46 @@ class SageCountFastq(val root: Configurable) extends BiopetJavaCommandLineFuncti @Input(doc = "Input fasta", shortName = "input", required = true) var input: File = _ - + @Output(doc = "Output tag library", shortName = "output", required = true) var output: File = _ - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - - override def commandLine = super.commandLine + - required("-I", input) + + + override def commandLine = super.commandLine + + required("-I", input) + required("-o", output) } object SageCountFastq extends ToolCommand { - case class Args (input:File = null, output:File = null) extends AbstractArgs + case class Args(input: File = null, output: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") required() valueName("<file>") action { (x, c) => - c.copy(input = x) } - opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(output = x) } + opt[File]('I', "input") required () valueName ("<file>") action { (x, c) => + c.copy(input = x) + } + opt[File]('o', "output") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(output = x) + } } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) - - val counts:Map[String, Long] = Map() + + val counts: Map[String, Long] = Map() val reader = new SangerFastqReader var count = 0 logger.info("Reading fastq file: " + commandArgs.input) val fileReader = new FileReader(commandArgs.input) reader.stream(fileReader, new StreamListener { - def fastq(fastq:Fastq) { + def fastq(fastq: Fastq) { val seq = fastq.getSequence if (counts.contains(seq)) counts(seq) += 1 else counts += (seq -> 1) @@ -63,13 +65,13 @@ object SageCountFastq extends ToolCommand { } }) logger.info(count + " sequences done") - + logger.info("Sorting") - val sortedCounts:SortedMap[String, Long] = SortedMap(counts.toArray:_*) - + val sortedCounts: SortedMap[String, Long] = SortedMap(counts.toArray: _*) + logger.info("Writting outputfile: " + commandArgs.output) val writer = new PrintWriter(commandArgs.output) - for ((seq,count) <- sortedCounts) { + for ((seq, count) <- sortedCounts) { writer.println(seq + "\t" + count) } writer.close diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala index b339ab2471e756e654fbbe621e15a1ac7f5a36cb..61336896ed4c8f2a3140ba6143060ab6f5bd1ef4 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateLibrary.scala @@ -9,7 +9,7 @@ import org.biojava3.core.sequence.DNASequence import org.biojava3.core.sequence.io.FastaReaderHelper import org.broadinstitute.gatk.utils.commandline.{ Input, Output } import scala.collection.SortedMap -import scala.collection.mutable.{Map, Set} +import scala.collection.mutable.{ Map, Set } import scala.collection.JavaConversions._ import scala.util.matching.Regex @@ -18,28 +18,28 @@ class SageCreateLibrary(val root: Configurable) extends BiopetJavaCommandLineFun @Input(doc = "Input fasta", shortName = "input", required = true) var input: File = _ - + @Output(doc = "Output tag library", shortName = "output", required = true) var output: File = _ - + @Output(doc = "Output no tags", shortName = "noTagsOutput", required = false) var noTagsOutput: File = _ - + @Output(doc = "Output no anti tags library", shortName = "noAntiTagsOutput", required = false) var noAntiTagsOutput: File = _ - + @Output(doc = "Output file all genes", shortName = "allGenes", required = false) var allGenesOutput: File = _ - + var tag: String = config("tag", default = "CATG") var length: Option[Int] = config("length", default = 17) - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - - override def commandLine = super.commandLine + - required("-I", input) + - optional("--tag", tag) + + + override def commandLine = super.commandLine + + required("-I", input) + + optional("--tag", tag) + optional("--length", length) + optional("--notag", noTagsOutput) + optional("--noantitag", noAntiTagsOutput) + @@ -47,54 +47,61 @@ class SageCreateLibrary(val root: Configurable) extends BiopetJavaCommandLineFun } object SageCreateLibrary extends ToolCommand { - case class Args (input:File = null, tag:String = "CATG", length:Int = 17,output:File = null, noTagsOutput:File = null, - noAntiTagsOutput:File = null, allGenesOutput:File = null) extends AbstractArgs + case class Args(input: File = null, tag: String = "CATG", length: Int = 17, output: File = null, noTagsOutput: File = null, + noAntiTagsOutput: File = null, allGenesOutput: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(input = x) } - opt[File]('o', "output") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(output = x) } - opt[String]("tag") required() unbounded() action { (x, c) => - c.copy(tag = x) } - opt[Int]("length") required() unbounded() action { (x, c) => - c.copy(length = x) } - opt[File]("noTagsOutput") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(noTagsOutput = x) } - opt[File]("noAntiTagsOutput") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(noAntiTagsOutput = x) } - opt[File]("allGenesOutput") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(allGenesOutput = x) } + opt[File]('I', "input") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(input = x) + } + opt[File]('o', "output") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(output = x) + } + opt[String]("tag") required () unbounded () action { (x, c) => + c.copy(tag = x) + } + opt[Int]("length") required () unbounded () action { (x, c) => + c.copy(length = x) + } + opt[File]("noTagsOutput") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(noTagsOutput = x) + } + opt[File]("noAntiTagsOutput") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(noAntiTagsOutput = x) + } + opt[File]("allGenesOutput") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(allGenesOutput = x) + } } - + var tagRegex: Regex = null var geneRegex = """ENSG[0-9]{11}""".r - + val tagGenesMap: Map[String, TagGenes] = Map() - + val allGenes: Set[String] = Set() val tagGenes: Set[String] = Set() val antiTagGenes: Set[String] = Set() - + class TagGenes { val firstTag: Set[String] = Set() - val allTags:Set[String] = Set() + val allTags: Set[String] = Set() val firstAntiTag: Set[String] = Set() - val allAntiTags:Set[String] = Set() + val allAntiTags: Set[String] = Set() } - class TagResult( val firstTag: String, val allTags:List[String], val firstAntiTag: String, val allAntiTags:List[String] ) - + class TagResult(val firstTag: String, val allTags: List[String], val firstAntiTag: String, val allAntiTags: List[String]) + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) - + tagRegex = (commandArgs.tag + "[CATG]{" + commandArgs.length + "}").r - + var count = 0 System.err.println("Reading fasta file") val reader = FastaReaderHelper.readFastaDNASequence(commandArgs.input) @@ -105,22 +112,22 @@ object SageCreateLibrary extends ToolCommand { if (count % 10000 == 0) System.err.println(count + " transcripts done") } System.err.println(count + " transcripts done") - + System.err.println("Start sorting tags") - val tagGenesMapSorted:SortedMap[String, TagGenes] = SortedMap(tagGenesMap.toArray:_*) - + val tagGenesMapSorted: SortedMap[String, TagGenes] = SortedMap(tagGenesMap.toArray: _*) + System.err.println("Writting output files") val writer = new PrintWriter(commandArgs.output) writer.println("#tag\tfirstTag\tAllTags\tFirstAntiTag\tAllAntiTags") for ((tag, genes) <- tagGenesMapSorted) { val line = tag + "\t" + genes.firstTag.mkString(",") + - "\t" + genes.allTags.mkString(",") + - "\t" + genes.firstAntiTag.mkString(",") + - "\t" + genes.allAntiTags.mkString(",") + "\t" + genes.allTags.mkString(",") + + "\t" + genes.firstAntiTag.mkString(",") + + "\t" + genes.allAntiTags.mkString(",") writer.println(line) } writer.close() - + if (commandArgs.noTagsOutput != null) { val writer = new PrintWriter(commandArgs.noTagsOutput) for (gene <- allGenes if !tagGenes.contains(gene)) { @@ -128,7 +135,7 @@ object SageCreateLibrary extends ToolCommand { } writer.close } - + if (commandArgs.noAntiTagsOutput != null) { val writer = new PrintWriter(commandArgs.noAntiTagsOutput) for (gene <- allGenes if !antiTagGenes.contains(gene)) { @@ -136,7 +143,7 @@ object SageCreateLibrary extends ToolCommand { } writer.close } - + if (commandArgs.allGenesOutput != null) { val writer = new PrintWriter(commandArgs.allGenesOutput) for (gene <- allGenes) { @@ -145,44 +152,44 @@ object SageCreateLibrary extends ToolCommand { writer.close } } - - def addTagresultToTaglib(name:String, tagResult:TagResult) { + + def addTagresultToTaglib(name: String, tagResult: TagResult) { val id = name.split(" ").head //.stripPrefix("hg19_ensGene_") val geneID = geneRegex.findFirstIn(name).getOrElse("unknown_gene") allGenes.add(geneID) - + if (tagResult.firstTag != null) { if (!tagGenesMap.contains(tagResult.firstTag)) tagGenesMap += (tagResult.firstTag -> new TagGenes) tagGenesMap(tagResult.firstTag).firstTag.add(geneID) tagGenes.add(geneID) } - + for (tag <- tagResult.allTags) { if (!tagGenesMap.contains(tag)) tagGenesMap += (tag -> new TagGenes) tagGenesMap(tag).allTags.add(geneID) } - + if (tagResult.firstAntiTag != null) { if (!tagGenesMap.contains(tagResult.firstAntiTag)) tagGenesMap += (tagResult.firstAntiTag -> new TagGenes) tagGenesMap(tagResult.firstAntiTag).firstAntiTag.add(geneID) antiTagGenes.add(geneID) } - + for (tag <- tagResult.allAntiTags) { if (!tagGenesMap.contains(tag)) tagGenesMap += (tag -> new TagGenes) tagGenesMap(tag).allAntiTags.add(geneID) } } - - def getTags(name:String, seq: DNASequence): TagResult = { + + def getTags(name: String, seq: DNASequence): TagResult = { val allTags: List[String] = for (tag <- tagRegex.findAllMatchIn(seq.getSequenceAsString).toList) yield tag.toString val firstTag = if (allTags.isEmpty) null else allTags.last val allAntiTags: List[String] = for (tag <- tagRegex.findAllMatchIn(seq.getReverseComplement.getSequenceAsString).toList) yield tag.toString - val firstAntiTag = if (allAntiTags.isEmpty) null else allAntiTags.head + val firstAntiTag = if (allAntiTags.isEmpty) null else allAntiTags.head val result = new TagResult(firstTag, allTags, firstAntiTag, allAntiTags) - + addTagresultToTaglib(name, result) - + return result } } diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala index 1bfc6e0f8606c3ed14576791f9ac50a8cfa99b04..eba1ff87f3984f5996071059dd9f05dd2163ab71 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/SageCreateTagCounts.scala @@ -15,62 +15,68 @@ class SageCreateTagCounts(val root: Configurable) extends BiopetJavaCommandLineF @Input(doc = "Raw count file", shortName = "input", required = true) var input: File = _ - + @Input(doc = "tag library", shortName = "taglib", required = true) var tagLib: File = _ - + @Output(doc = "Sense count file", shortName = "sense", required = true) var countSense: File = _ - + @Output(doc = "Sense all coun filet", shortName = "allsense", required = true) var countAllSense: File = _ - + @Output(doc = "AntiSense count file", shortName = "antisense", required = true) var countAntiSense: File = _ - + @Output(doc = "AntiSense all count file", shortName = "allantisense", required = true) var countAllAntiSense: File = _ - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - - override def commandLine = super.commandLine + - required("-I", input) + - required("--taglib", tagLib) + - optional("--sense", countSense) + - optional("--allsense", countAllSense) + - optional("--antisense", countAntiSense) + + + override def commandLine = super.commandLine + + required("-I", input) + + required("--taglib", tagLib) + + optional("--sense", countSense) + + optional("--allsense", countAllSense) + + optional("--antisense", countAntiSense) + optional("--allantisense", countAllAntiSense) } object SageCreateTagCounts extends ToolCommand { - case class Args (input:File = null, tagLib:File = null, countSense:File = null, countAllSense:File = null, - countAntiSense:File = null, countAllAntiSense:File = null) extends AbstractArgs + case class Args(input: File = null, tagLib: File = null, countSense: File = null, countAllSense: File = null, + countAntiSense: File = null, countAllAntiSense: File = null) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "input") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(input = x) } - opt[File]('t', "tagLib") required() unbounded() valueName("<file>") action { (x, c) => - c.copy(tagLib = x) } - opt[File]("countSense") unbounded() valueName("<file>") action { (x, c) => - c.copy(countSense = x) } - opt[File]("countAllSense") unbounded() valueName("<file>") action { (x, c) => - c.copy(countAllSense = x) } - opt[File]("countAntiSense") unbounded() valueName("<file>") action { (x, c) => - c.copy(countAntiSense = x) } - opt[File]("countAllAntiSense") unbounded() valueName("<file>") action { (x, c) => - c.copy(countAllAntiSense = x) } + opt[File]('I', "input") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(input = x) + } + opt[File]('t', "tagLib") required () unbounded () valueName ("<file>") action { (x, c) => + c.copy(tagLib = x) + } + opt[File]("countSense") unbounded () valueName ("<file>") action { (x, c) => + c.copy(countSense = x) + } + opt[File]("countAllSense") unbounded () valueName ("<file>") action { (x, c) => + c.copy(countAllSense = x) + } + opt[File]("countAntiSense") unbounded () valueName ("<file>") action { (x, c) => + c.copy(countAntiSense = x) + } + opt[File]("countAllAntiSense") unbounded () valueName ("<file>") action { (x, c) => + c.copy(countAllAntiSense = x) + } } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input) - + val rawCounts: Map[String, Long] = Map() for (line <- Source.fromFile(commandArgs.input).getLines) { val values = line.split("\t") @@ -79,50 +85,50 @@ object SageCreateTagCounts extends ToolCommand { if (rawCounts.contains(gene)) rawCounts(gene) += count else rawCounts += gene -> count } - + val senseCounts: Map[String, Long] = Map() val allSenseCounts: Map[String, Long] = Map() val antiSenseCounts: Map[String, Long] = Map() val allAntiSenseCounts: Map[String, Long] = Map() - + for (line <- Source.fromFile(commandArgs.tagLib).getLines if !line.startsWith("#")) { val values = line.split("\t") - val tag = values(0) + val tag = values(0) val sense = values(1) val allSense = values(2) val antiSense = if (values.size > 3) values(3) else "" val allAntiSense = if (values.size > 4) values(4) else "" - + if (!sense.isEmpty && !sense.contains(",")) { val count = if (rawCounts.contains(tag)) rawCounts(tag) else 0 if (senseCounts.contains(sense)) senseCounts(sense) += count else senseCounts += sense -> count } - + if (!allSense.isEmpty && !allSense.contains(",")) { val count = if (rawCounts.contains(tag)) rawCounts(tag) else 0 if (allSenseCounts.contains(allSense)) allSenseCounts(allSense) += count else allSenseCounts += allSense -> count } - + if (!antiSense.isEmpty && !antiSense.contains(",")) { val count = if (rawCounts.contains(tag)) rawCounts(tag) else 0 if (antiSenseCounts.contains(antiSense)) antiSenseCounts(antiSense) += count else antiSenseCounts += antiSense -> count } - + if (!allAntiSense.isEmpty && !allAntiSense.contains(",")) { val count = if (rawCounts.contains(tag)) rawCounts(tag) else 0 if (allAntiSenseCounts.contains(allAntiSense)) allAntiSenseCounts(allAntiSense) += count else allAntiSenseCounts += allAntiSense -> count } } - - def writeFile(file:File, counts:Map[String, Long]) { - val sorted: SortedMap[String,Long] = SortedMap(counts.toArray:_*) + + def writeFile(file: File, counts: Map[String, Long]) { + val sorted: SortedMap[String, Long] = SortedMap(counts.toArray: _*) if (file != null) { val writer = new PrintWriter(file) - for ((gene,count) <- sorted) { + for ((gene, count) <- sorted) { if (count > 0) writer.println(gene + "\t" + count) } writer.close diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index dfd93ff511ac2c64792722893fbb507f8cc6f61e..d9f9c8c49aad126d42d3538e3276274b66e83d2c 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -16,19 +16,19 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction { @Input(doc = "Input vcf", shortName = "I", required = true) var inputVcf: File = _ - + @Output(doc = "Output vcf", shortName = "o", required = false) var outputVcf: File = _ - + var minSampleDepth: Option[Int] = _ var minTotalDepth: Option[Int] = _ var minAlternateDepth: Option[Int] = _ var minSamplesPass: Option[Int] = _ var filterRefCalls: Boolean = _ - + override val defaultVmem = "8G" memoryLimit = Option(4.0) - + override def afterGraph { minSampleDepth = config("min_sample_depth") minTotalDepth = config("min_total_depth") @@ -36,82 +36,90 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction { minSamplesPass = config("min_samples_pass") filterRefCalls = config("filter_ref_calls") } - - override def commandLine = super.commandLine + - required("-I", inputVcf) + + + override def commandLine = super.commandLine + + required("-I", inputVcf) + required("-o", outputVcf) + optional("--minSampleDepth", minSampleDepth) + optional("--minTotalDepth", minTotalDepth) + - optional("--minAlternateDepth", minAlternateDepth) + + optional("--minAlternateDepth", minAlternateDepth) + optional("--minSamplesPass", minSamplesPass) + conditional(filterRefCalls, "--filterRefCalls") } object VcfFilter extends ToolCommand { - case class Args (inputVcf:File = null, outputVcf:File = null, minSampleDepth: Int = -1, minTotalDepth: Int = -1, - minAlternateDepth: Int = -1, minSamplesPass: Int = 0, minBamAlternateDepth: Int = 0, filterRefCalls: Boolean = false) extends AbstractArgs + case class Args(inputVcf: File = null, outputVcf: File = null, minSampleDepth: Int = -1, minTotalDepth: Int = -1, + minAlternateDepth: Int = -1, minSamplesPass: Int = 0, minBamAlternateDepth: Int = 0, filterRefCalls: Boolean = false) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputVcf") required() maxOccurs(1) valueName("<file>") action { (x, c) => - c.copy(inputVcf = x) } - opt[File]('o', "outputVcf") required() maxOccurs(1) valueName("<file>") action { (x, c) => - c.copy(outputVcf = x) } text("output file, default to stdout") - opt[Int]("minSampleDepth") unbounded() action { (x, c) => - c.copy(minSampleDepth = x ) } - opt[Int]("minTotalDepth") unbounded() action { (x, c) => - c.copy(minTotalDepth = x ) } - opt[Int]("minAlternateDepth") unbounded() action { (x, c) => - c.copy(minAlternateDepth = x) } - opt[Int]("minSamplesPass") unbounded() action { (x, c) => - c.copy(minSamplesPass = x) } - opt[Int]("minBamAlternateDepth") unbounded() action { (x, c) => - c.copy(minBamAlternateDepth = x) } - opt[Unit]("filterRefCalls") unbounded() action { (x, c) => - c.copy(filterRefCalls = true) } + opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(inputVcf = x) + } + opt[File]('o', "outputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(outputVcf = x) + } text ("output file, default to stdout") + opt[Int]("minSampleDepth") unbounded () action { (x, c) => + c.copy(minSampleDepth = x) + } + opt[Int]("minTotalDepth") unbounded () action { (x, c) => + c.copy(minTotalDepth = x) + } + opt[Int]("minAlternateDepth") unbounded () action { (x, c) => + c.copy(minAlternateDepth = x) + } + opt[Int]("minSamplesPass") unbounded () action { (x, c) => + c.copy(minSamplesPass = x) + } + opt[Int]("minBamAlternateDepth") unbounded () action { (x, c) => + c.copy(minBamAlternateDepth = x) + } + opt[Unit]("filterRefCalls") unbounded () action { (x, c) => + c.copy(filterRefCalls = true) + } } - + /** * @param args the command line arguments */ def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + val reader = new VCFFileReader(commandArgs.inputVcf, false) val header = reader.getFileHeader val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build) writer.writeHeader(header) - + val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList val bamDPFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-DP-")) yield line.getID).toList - + for (record <- reader) { val genotypes = for (genotype <- record.getGenotypes) yield { val DP = if (genotype.hasDP) genotype.getDP else -1 - val AD = if (genotype.hasAD) List(genotype.getAD:_*) else Nil - DP >= commandArgs.minSampleDepth && - (if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true) && - !(commandArgs.filterRefCalls && genotype.isHomRef) + val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil + DP >= commandArgs.minSampleDepth && + (if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true) && + !(commandArgs.filterRefCalls && genotype.isHomRef) } - + val bamADvalues = (for (field <- bamADFields) yield { record.getAttribute(field, new ArrayList) match { - case t:ArrayList[_] if t.length > 1 => { - for (i <- 1 until t.size) yield { - t(i) match { - case a:Int => a > commandArgs.minBamAlternateDepth - case a:String => a.toInt > commandArgs.minBamAlternateDepth - case _ => false - } + case t: ArrayList[_] if t.length > 1 => { + for (i <- 1 until t.size) yield { + t(i) match { + case a: Int => a > commandArgs.minBamAlternateDepth + case a: String => a.toInt > commandArgs.minBamAlternateDepth + case _ => false } + } } case _ => List(false) } }).flatten - - if (record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth && - genotypes.count(_ == true) >= commandArgs.minSamplesPass && - (commandArgs.minBamAlternateDepth <= 0 || bamADvalues.count(_ == true) >= commandArgs.minSamplesPass)) + + if (record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth && + genotypes.count(_ == true) >= commandArgs.minSamplesPass && + (commandArgs.minBamAlternateDepth <= 0 || bamADvalues.count(_ == true) >= commandArgs.minSamplesPass)) writer.add(record) } reader.close diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala index 3629ad74a4e8577aaf30f3d50f4183149a8d756d..1a5c374fca1990f33f321aaef8a8c3b2a9be3d83 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfToTsv.scala @@ -21,78 +21,85 @@ import java.io.File import java.io.PrintStream import nl.lumc.sasc.biopet.core.ToolCommand import scala.collection.JavaConversions._ -import scala.collection.mutable.{Map, ListBuffer} +import scala.collection.mutable.{ Map, ListBuffer } class VcfToTsv { // TODO: Queue wrapper } object VcfToTsv extends ToolCommand { - case class Args (inputFile:File = null, outputFile:File = null, fields: List[String] = Nil, infoFields: List[String] = Nil, - sampleFileds: List[String] = Nil, disableDefaults: Boolean = false, - allInfo:Boolean = false, allFormat:Boolean = false) extends AbstractArgs + case class Args(inputFile: File = null, outputFile: File = null, fields: List[String] = Nil, infoFields: List[String] = Nil, + sampleFileds: List[String] = Nil, disableDefaults: Boolean = false, + allInfo: Boolean = false, allFormat: Boolean = false) extends AbstractArgs class OptParser extends AbstractOptParser { - opt[File]('I', "inputFile") required() maxOccurs(1) valueName("<file>") action { (x, c) => - c.copy(inputFile = x) } - opt[File]('o', "outputFile") maxOccurs(1) valueName("<file>") action { (x, c) => - c.copy(outputFile = x) } text("output file, default to stdout") - opt[String]('f', "field") unbounded() action { (x, c) => - c.copy(fields = x :: c.fields) } - opt[String]('i', "info_field") unbounded() action { (x, c) => - c.copy(infoFields = x :: c.infoFields) } - opt[Unit]("all_info") unbounded() action { (x, c) => - c.copy(allInfo = true) } - opt[Unit]("all_format") unbounded() action { (x, c) => - c.copy(allFormat = true) } - opt[String]('s', "sample_field") unbounded() action { (x, c) => - c.copy(sampleFileds = x :: c.sampleFileds) } - opt[Unit]('d', "disable_defaults") unbounded() action { (x, c) => - c.copy(disableDefaults = true) } + opt[File]('I', "inputFile") required () maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(inputFile = x) + } + opt[File]('o', "outputFile") maxOccurs (1) valueName ("<file>") action { (x, c) => + c.copy(outputFile = x) + } text ("output file, default to stdout") + opt[String]('f', "field") unbounded () action { (x, c) => + c.copy(fields = x :: c.fields) + } + opt[String]('i', "info_field") unbounded () action { (x, c) => + c.copy(infoFields = x :: c.infoFields) + } + opt[Unit]("all_info") unbounded () action { (x, c) => + c.copy(allInfo = true) + } + opt[Unit]("all_format") unbounded () action { (x, c) => + c.copy(allFormat = true) + } + opt[String]('s', "sample_field") unbounded () action { (x, c) => + c.copy(sampleFileds = x :: c.sampleFileds) + } + opt[Unit]('d', "disable_defaults") unbounded () action { (x, c) => + c.copy(disableDefaults = true) + } } - + val defaultFields = List("chr", "pos", "id", "ref", "alt", "qual") - + def main(args: Array[String]): Unit = { val argsParser = new OptParser val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - + val reader = new VCFFileReader(commandArgs.inputFile, false) val header = reader.getFileHeader val samples = header.getSampleNamesInOrder - + val allInfoFields = header.getInfoHeaderLines.map(_.getID).toList val allFormatFields = header.getFormatHeaderLines.map(_.getID).toList - - val fields: Set[String] = (if (commandArgs.disableDefaults) Nil else defaultFields).toSet[String] ++ - commandArgs.fields.toSet[String] ++ - (if (commandArgs.allInfo) allInfoFields else commandArgs.infoFields).map("INFO-"+_) ++ { - val buffer: ListBuffer[String] = ListBuffer() - for (f <- (if (commandArgs.allFormat) allFormatFields else commandArgs.sampleFileds); sample <- samples) { - buffer += sample+"-"+f + + val fields: Set[String] = (if (commandArgs.disableDefaults) Nil else defaultFields).toSet[String] ++ + commandArgs.fields.toSet[String] ++ + (if (commandArgs.allInfo) allInfoFields else commandArgs.infoFields).map("INFO-" + _) ++ { + val buffer: ListBuffer[String] = ListBuffer() + for (f <- (if (commandArgs.allFormat) allFormatFields else commandArgs.sampleFileds); sample <- samples) { + buffer += sample + "-" + f + } + buffer.toSet[String] } - buffer.toSet[String] - } - - val sortedFields = fields.toList.sortWith((a,b) => { - val aT = if (a.startsWith("INFO-")) 'i' else if (samples.exists(x => a.startsWith(x+"-"))) 'f' else 'g' - val bT = if (b.startsWith("INFO-")) 'i' else if (samples.exists(x => b.startsWith(x+"-"))) 'f' else 'g' + + val sortedFields = fields.toList.sortWith((a, b) => { + val aT = if (a.startsWith("INFO-")) 'i' else if (samples.exists(x => a.startsWith(x + "-"))) 'f' else 'g' + val bT = if (b.startsWith("INFO-")) 'i' else if (samples.exists(x => b.startsWith(x + "-"))) 'f' else 'g' if (aT == 'g' && bT == 'g') { val ai = defaultFields.indexOf(a) val bi = defaultFields.indexOf(b) if (bi < 0) true else ai <= bi - } - else if (aT == 'g') true + } else if (aT == 'g') true else if (bT == 'g') false else if (aT == bT) (if (a.compareTo(b) > 0) false else true) else if (aT == 'i') true else false }) - + val witter = if (commandArgs.outputFile != null) new PrintStream(commandArgs.outputFile) else sys.process.stdout - + witter.println(sortedFields.mkString("#", "\t", "")) for (vcfRecord <- reader) { val values: Map[String, Any] = Map() @@ -104,36 +111,36 @@ object VcfToTsv extends ToolCommand { val t = for (a <- vcfRecord.getAlternateAlleles) yield a.getBaseString t.mkString(",") } - values += "qual" -> (if (vcfRecord.getPhredScaledQual == -10) "." else scala.math.round(vcfRecord.getPhredScaledQual*100.0)/100.0) + values += "qual" -> (if (vcfRecord.getPhredScaledQual == -10) "." else scala.math.round(vcfRecord.getPhredScaledQual * 100.0) / 100.0) values += "filter" -> vcfRecord.getFilters for ((field, content) <- vcfRecord.getAttributes) { - values += "INFO-"+field -> { + values += "INFO-" + field -> { content match { - case a:List[_] => a.mkString(",") - case a:Array[_] => a.mkString(",") - case a:java.util.ArrayList[_] => a.mkString(",") - case _ => content + case a: List[_] => a.mkString(",") + case a: Array[_] => a.mkString(",") + case a: java.util.ArrayList[_] => a.mkString(",") + case _ => content } } } - + for (sample <- samples) { val genotype = vcfRecord.getGenotype(sample) - values += sample+"-GT" -> { + values += sample + "-GT" -> { val l = for (g <- genotype.getAlleles) yield vcfRecord.getAlleleIndex(g) l.map(x => if (x < 0) "." else x).mkString("/") } - if (genotype.hasAD) values += sample+"-AD" -> List(genotype.getAD:_*).mkString(",") - if (genotype.hasDP) values += sample+"-DP" -> genotype.getDP - if (genotype.hasGQ )values += sample+"-GQ" -> genotype.getGQ - if (genotype.hasPL) values += sample+"-PL" -> List(genotype.getPL:_*).mkString(",") + if (genotype.hasAD) values += sample + "-AD" -> List(genotype.getAD: _*).mkString(",") + if (genotype.hasDP) values += sample + "-DP" -> genotype.getDP + if (genotype.hasGQ) values += sample + "-GQ" -> genotype.getGQ + if (genotype.hasPL) values += sample + "-PL" -> List(genotype.getPL: _*).mkString(",") for ((field, content) <- genotype.getExtendedAttributes) { - values += sample+"-"+field -> content + values += sample + "-" + field -> content } } val line = for (f <- sortedFields) yield { - if (values.contains(f)) { - values(f) + if (values.contains(f)) { + values(f) } else "" } witter.println(line.mkString("\t")) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala index d131692da08156dcc7b4fd085e71cb791bfca2c0..37eff846f598514b13d35003dfd8a191f0b1374f 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/WipeReads.scala @@ -8,7 +8,7 @@ import java.io.File import scala.collection.JavaConverters._ -import com.google.common.hash.{Funnel, BloomFilter, PrimitiveSink} +import com.google.common.hash.{ Funnel, BloomFilter, PrimitiveSink } import htsjdk.samtools.AlignmentBlock import htsjdk.samtools.SAMFileReader import htsjdk.samtools.QueryInterval @@ -106,13 +106,13 @@ object WipeReads extends ToolCommand { /** Function to create iterator from refFlat file */ def makeFeatureFromRefFlat(inFile: File): Iterator[Feature] = ??? - // convert coordinate to 1-based fully closed - // parse chrom, start blocks, end blocks, strands + // convert coordinate to 1-based fully closed + // parse chrom, start blocks, end blocks, strands /** Function to create iterator from GTF file */ def makeFeatureFromGtf(inFile: File): Iterator[Feature] = ??? - // convert coordinate to 1-based fully closed - // parse chrom, start blocks, end blocks, strands + // convert coordinate to 1-based fully closed + // parse chrom, start blocks, end blocks, strands // detect interval file format from extension val iterFunc: (File => Iterator[Feature]) = @@ -252,7 +252,6 @@ object WipeReads extends ToolCommand { else (r: SAMRecord) => readGroupIds.contains(r.getReadGroup.getReadGroupId) - val readyBam = prepIndexedInputBam() /* NOTE: the interval vector here should be bypass-able if we can make @@ -285,11 +284,11 @@ object WipeReads extends ToolCommand { .filter(x => rgFilter(x)) // fold starting from empty set .foldLeft(BloomFilter.create(SAMFunnel, bloomSize.toInt, bloomFp) - )((acc, rec) => { - acc.put(rec) - if (rec.getReadPairedFlag) acc.put(makeMockPair(rec)) - acc - }) + )((acc, rec) => { + acc.put(rec) + if (rec.getReadPairedFlag) acc.put(makeMockPair(rec)) + acc + }) if (filterOutMulti) (rec: SAMRecord) => filteredOutSet.mightContain(rec) diff --git a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/LnUnitTest.scala b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/LnUnitTest.scala index d82d70aef47603d0c7f0a0247b947671e135eca3..76c609094230185cead8dd01789b4604beb87342 100644 --- a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/LnUnitTest.scala +++ b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/extensions/LnUnitTest.scala @@ -19,7 +19,7 @@ class LnUnitTest extends TestNGSuite with Matchers { ln.relative = true ln.in = new File("/dir/nested/target.txt") ln.out = new File("/dir/nested/link.txt") - ln.cmd should === ("ln -s 'target.txt' '/dir/nested/link.txt'") + ln.cmd should ===("ln -s 'target.txt' '/dir/nested/link.txt'") } @Test(description = "Target is one level above link, relative set to true") @@ -28,7 +28,7 @@ class LnUnitTest extends TestNGSuite with Matchers { ln.relative = true ln.in = new File("/dir/target.txt") ln.out = new File("/dir/nested/link.txt") - ln.cmd should === ("ln -s '../target.txt' '/dir/nested/link.txt'") + ln.cmd should ===("ln -s '../target.txt' '/dir/nested/link.txt'") } @Test(description = "Target is two levels above link, relative set to true") @@ -37,7 +37,7 @@ class LnUnitTest extends TestNGSuite with Matchers { ln.relative = true ln.in = new File("/target.txt") ln.out = new File("/dir/nested/link.txt") - ln.cmd should === ("ln -s '../../target.txt' '/dir/nested/link.txt'") + ln.cmd should ===("ln -s '../../target.txt' '/dir/nested/link.txt'") } @Test(description = "Target is a child of a directory one level above link, relative set to true") @@ -46,7 +46,7 @@ class LnUnitTest extends TestNGSuite with Matchers { ln.relative = true ln.in = new File("/dir/another_nested/target.txt") ln.out = new File("/dir/nested/link.txt") - ln.cmd should === ("ln -s '../another_nested/target.txt' '/dir/nested/link.txt'") + ln.cmd should ===("ln -s '../another_nested/target.txt' '/dir/nested/link.txt'") } @Test(description = "Target is one level below link, relative set to true") @@ -55,7 +55,7 @@ class LnUnitTest extends TestNGSuite with Matchers { ln.relative = true ln.in = new File("/dir/nested/deeper/target.txt") ln.out = new File("/dir/nested/link.txt") - ln.cmd should === ("ln -s 'deeper/target.txt' '/dir/nested/link.txt'") + ln.cmd should ===("ln -s 'deeper/target.txt' '/dir/nested/link.txt'") } @Test(description = "Target is two levels below link, relative set to true") @@ -64,7 +64,7 @@ class LnUnitTest extends TestNGSuite with Matchers { ln.relative = true ln.in = new File("/dir/nested/even/deeper/target.txt") ln.out = new File("/dir/nested/link.txt") - ln.cmd should === ("ln -s 'even/deeper/target.txt' '/dir/nested/link.txt'") + ln.cmd should ===("ln -s 'even/deeper/target.txt' '/dir/nested/link.txt'") } @Test(description = "Relative set to false") @@ -73,7 +73,7 @@ class LnUnitTest extends TestNGSuite with Matchers { ln.relative = false ln.in = new File("/dir/nested/target.txt") ln.out = new File("/dir/nested/link.txt") - ln.cmd should === ("ln -s '/dir/nested/target.txt' '/dir/nested/link.txt'") + ln.cmd should ===("ln -s '/dir/nested/target.txt' '/dir/nested/link.txt'") } // TODO: test for case where abosolute is true and input paths are relative? diff --git a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala index 0dac57875f5ae3966b009d9605a2e7c7d55b5d96..19b05904b6135df1576d334668a4338494e6bd7f 100644 --- a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala +++ b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala @@ -42,40 +42,40 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat @Test def testIntervalStartEnd() = { val obs = makeIntervalFromString(List("chr5:1000-1100")).next() val exp = new Interval("chr5", 1000, 1100) - obs.getSequence should === (exp.getSequence) - obs.getStart should === (exp.getStart) - obs.getEnd should === (exp.getEnd) + obs.getSequence should ===(exp.getSequence) + obs.getStart should ===(exp.getStart) + obs.getEnd should ===(exp.getEnd) } @Test def testIntervalStartEndComma() = { val obs = makeIntervalFromString(List("chr5:1,000-1,100")).next() val exp = new Interval("chr5", 1000, 1100) - obs.getSequence should === (exp.getSequence) - obs.getStart should === (exp.getStart) - obs.getEnd should === (exp.getEnd) + obs.getSequence should ===(exp.getSequence) + obs.getStart should ===(exp.getStart) + obs.getEnd should ===(exp.getEnd) } @Test def testIntervalStartEndDot() = { val obs = makeIntervalFromString(List("chr5:1.000-1.100")).next() val exp = new Interval("chr5", 1000, 1100) - obs.getSequence should === (exp.getSequence) - obs.getStart should === (exp.getStart) - obs.getEnd should === (exp.getEnd) + obs.getSequence should ===(exp.getSequence) + obs.getStart should ===(exp.getStart) + obs.getEnd should ===(exp.getEnd) } @Test def testIntervalStart() = { val obs = makeIntervalFromString(List("chr5:1000")).next() val exp = new Interval("chr5", 1000, 1000) - obs.getSequence should === (exp.getSequence) - obs.getStart should === (exp.getStart) - obs.getEnd should === (exp.getEnd) + obs.getSequence should ===(exp.getSequence) + obs.getStart should ===(exp.getStart) + obs.getEnd should ===(exp.getEnd) } @Test def testIntervalError() = { val thrown = intercept[IllegalArgumentException] { makeIntervalFromString(List("chr5:1000-")).next() } - thrown.getMessage should === ("Invalid interval string: chr5:1000-") + thrown.getMessage should ===("Invalid interval string: chr5:1000-") } @Test def testMemFuncIntervalError() = { @@ -84,7 +84,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat val thrown = intercept[IllegalArgumentException] { makeMembershipFunction(iv, inAln) } - thrown.getMessage should === ("Chromosome chrP is not found in the alignment file") + thrown.getMessage should ===("Chromosome chrP is not found in the alignment file") } @DataProvider(name = "singleAlnProvider1", parallel = true) @@ -218,7 +218,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat val thrown = intercept[IllegalArgumentException] { selectFastqReads(memFunc, in1, out1, in2) } - thrown.getMessage should === ("Missing output FASTQ 2") + thrown.getMessage should ===("Missing output FASTQ 2") verify(out1, never).write(anyObject.asInstanceOf[FastqRecord]) } @@ -230,7 +230,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat val thrown = intercept[IllegalArgumentException] { selectFastqReads(memFunc, in1, out1, outputFastq2 = out2) } - thrown.getMessage should === ("Output FASTQ 2 supplied but there is no input FASTQ 2") + thrown.getMessage should ===("Output FASTQ 2 supplied but there is no input FASTQ 2") verify(out1, never).write(anyObject.asInstanceOf[FastqRecord]) verify(out2, never).write(anyObject.asInstanceOf[FastqRecord]) }