From bd8a2a63b4fa4f268e4b67cb1547115ed1a01623 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 17 Jun 2014 16:10:23 +0200 Subject: [PATCH] Added BiopetCommandlineFunction All commandlinefunctions now extends from this trait --- .../core/BiopetCommandLineFunction.scala | 116 ++++++++++++++++++ .../function/PythonCommandLineFunction.scala | 30 +++++ .../lumc/sasc/biopet/function/Sha1sum.scala | 17 ++- .../nl/lumc/sasc/biopet/function/Zcat.scala | 18 +-- .../sasc/biopet/function/fastq/Cutadapt.scala | 57 ++++----- .../sasc/biopet/function/fastq/Fastqc.scala | 75 ++++------- .../sasc/biopet/function/fastq/Sickle.scala | 83 ++++++------- .../flexiprep/scripts/FastqSync.scala | 16 +-- .../flexiprep/scripts/FastqcToContams.scala | 18 +-- .../flexiprep/scripts/FastqcToQualtype.scala | 16 +-- .../pipelines/flexiprep/scripts/Seqstat.scala | 17 +-- .../flexiprep/scripts/Summarize.scala | 17 +-- .../sasc/biopet/function/aligners/Bwa.scala | 61 ++++----- .../sasc/biopet/function/aligners/Star.scala | 79 +++++++----- 14 files changed, 345 insertions(+), 275 deletions(-) create mode 100644 biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala create mode 100644 biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/PythonCommandLineFunction.scala diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala new file mode 100644 index 000000000..bd7c3fb9e --- /dev/null +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetCommandLineFunction.scala @@ -0,0 +1,116 @@ +package nl.lumc.sasc.biopet.core + +import java.io.File +import org.broadinstitute.sting.queue.QException +import org.broadinstitute.sting.queue.function.CommandLineFunction +import org.broadinstitute.sting.commandline._ +import scala.sys.process._ +import scala.util.matching.Regex + +trait BiopetCommandLineFunction extends CommandLineFunction { + val globalConfig: Config + analysisName = getClass.getSimpleName + protected var config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName.toLowerCase), globalConfig) + logger.debug("config passed for " + analysisName) + + @Input(doc="deps", required=false) + var deps: List[File] = Nil + + @Argument(doc="Threads", required=false) + var threads = 0 + val defaultThreads = 1 + + @Argument(doc="Vmem", required=false) + var vmem: String = _ + val defaultVmem: String = "" + + @Argument(doc="Executeble") + var executeble: String = _ + + protected def beforeCmd { + } + + protected def afterGraph { + } + + def setConfig(name:String) { + analysisName = name + config = Config.mergeConfigs(config.getAsConfig(analysisName.toLowerCase), config) + } + + override def freezeFieldValues() { + checkExecuteble + afterGraph + jobOutputFile = new File(firstOutput.getParent + "/." + firstOutput.getName + "." + analysisName + ".out") + + super.freezeFieldValues() + } + + protected def checkExecuteble { + try if (executeble != null) { + val buffer = new StringBuffer() + val cmd = Seq("which", executeble) + val process = Process(cmd).run(ProcessLogger(buffer.append(_))) + if (process.exitValue == 0) { + executeble = buffer.toString + val file = new File(executeble) + executeble = file.getCanonicalPath + } else { + logger.error("executeble: '" + executeble + "' not found, please check config") + throw new QException("executeble: '" + executeble + "' not found, please check config") + } + } catch { + case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executeble skipped: " + ioe) + } + } + + final protected def preCmdInternal { + checkExecuteble + //for (input <- this.inputs) if (!input.exists) throw new IllegalStateException("Input: " + input + " for " + analysisName + " is missing") + logger.debug("Config for " + analysisName + ": " + config) + + beforeCmd + + addJobReportBinding("version", getVersion) + + if (threads == 0) threads = config.getThreads(defaultThreads) + if (threads > 1) nCoresRequest = Option(threads) + addJobReportBinding("cores", if (nCoresRequest.get.toInt > 0) nCoresRequest.get.toInt else 1) + + if (vmem == null) { + if (config.contains("vmem")) vmem = config.getAsString("vmem") + else if (!defaultVmem.isEmpty) vmem = defaultVmem + } + if (vmem != null) jobResourceRequests :+= "h_vmem=" + vmem + jobName = this.analysisName + ":" + firstOutput.getName + } + + protected def cmdLine: String + final def commandLine: String = { + preCmdInternal + val cmd = cmdLine + addJobReportBinding("command", cmd) + cmd + } + + protected var versionCommand: String = _ + protected val versionRegex: Regex = null + def getVersion : String = { + if (versionCommand == null || versionRegex == null) return "N/A" + val buffer = new StringBuffer() + val process = Process(versionCommand).run(ProcessLogger(buffer append _)) + if (process.exitValue != 0) { + logger.warn("Version command: '" + versionCommand + "' give exit code " + process.exitValue + ", version not found") + return "N/A" + } + val lines = versionCommand lines_! ProcessLogger(buffer append _) + for (line <- lines) { + line match { + case versionRegex(m) => return m + case _ => + } + } + logger.warn("Version command: '" + versionCommand + "' give a exit code 0 but no version was found, executeble oke?") + return "N/A" + } +} diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/PythonCommandLineFunction.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/PythonCommandLineFunction.scala new file mode 100644 index 000000000..ea2945642 --- /dev/null +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/PythonCommandLineFunction.scala @@ -0,0 +1,30 @@ +package nl.lumc.sasc.biopet.function + +import java.io.FileOutputStream +import nl.lumc.sasc.biopet.core._ +import org.broadinstitute.sting.commandline._ +import java.io.File +import scala.collection.JavaConversions._ + +trait PythonCommandLineFunction extends BiopetCommandLineFunction { + executeble = config.getAsString("python_exe", "python") + + @Input(doc="Python script", required=false) + var python_script: File = _ + + protected var python_script_name : String = _ + def setPythonScript(script:String) { setPythonScript(script,"") } + def setPythonScript(script:String, subpackage:String) { + python_script_name = script + python_script = new File(".queue/tmp/" + subpackage + python_script_name) + if (!python_script.getParentFile.exists) python_script.getParentFile.mkdirs + val is = getClass.getResourceAsStream(subpackage + python_script_name) + val os = new FileOutputStream(python_script) + org.apache.commons.io.IOUtils.copy(is, os) + os.close() + } + + def getPythonCommand() : String = { + required(executeble) + required(python_script) + } +} diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Sha1sum.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Sha1sum.scala index a3b546a5e..fb43a126a 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Sha1sum.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Sha1sum.scala @@ -1,18 +1,17 @@ package nl.lumc.sasc.biopet.function import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File -class Sha1sum(val globalConfig: Config) extends CommandLineFunction { - def this() = this(new Config(Map())) - this.analysisName = "sha1sum" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + this.analysisName + ": " + config) +class Sha1sum(val globalConfig: Config) extends BiopetCommandLineFunction { + @Input(doc="Zipped file") + var input: File = _ - @Input(doc="Zipped file") var in: File = _ - @Output(doc="Unzipped file") var out: File = _ + @Output(doc="Unzipped file") + var output: File = _ - def commandLine = "sha1sum %s > %s".format(in, out) + executeble = config.getAsString("exe","sha1sum") + + def cmdLine = required(executeble) + required(input) + " > " + required(output) } \ No newline at end of file diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala index 33b407671..95f3095d0 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/function/Zcat.scala @@ -1,18 +1,18 @@ package nl.lumc.sasc.biopet.function import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction +//import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File -class Zcat(val globalConfig: Config) extends CommandLineFunction { - def this() = this(new Config(Map())) - this.analysisName = "zcat" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + this.analysisName + ": " + config) +class Zcat(val globalConfig: Config) extends BiopetCommandLineFunction { + @Input(doc="Zipped file") + var input: File = _ - @Input(doc="Zipped file") var in: File = _ - @Output(doc="Unzipped file") var out: File = _ + @Output(doc="Unzipped file") + var output: File = _ - def commandLine = "zcat %s > %s".format(in, out) + executeble = config.getAsString("exe", "zcat") + + def cmdLine = required(executeble) + required(input) + " > " + required(output) } \ No newline at end of file diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala index ca436d089..50334d606 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Cutadapt.scala @@ -1,23 +1,22 @@ package nl.lumc.sasc.biopet.function.fastq import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File import scala.io.Source._ import scala.sys.process._ -class Cutadapt(val globalConfig: Config) extends CommandLineFunction { - def this() = this(new Config(Map())) - analysisName = "cutadapt" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + this.analysisName + ": " + config) +class Cutadapt(val globalConfig: Config) extends BiopetCommandLineFunction { + @Input(doc="Input fastq file") + var fastq_input: File = _ - @Input(doc="Cutadapt exe", required=false) - var cutadapt_exe: File = new File(config.getAsString("exe","/usr/local/bin/cutadapt")) - @Input(doc="Input fastq file") var fastq_input: File = _ - @Input(doc="Fastq contams file", required=false) var contams_file: File = _ - @Output(doc="Output fastq file") var fastq_output: File = _ + @Input(doc="Fastq contams file", required=false) + var contams_file: File = _ + + @Output(doc="Output fastq file") + var fastq_output: File = _ + + executeble = config.getAsString("exe","cutadapt") var default_clip_mode = config.getAsString("default_clip_mode", "3") var opt_adapter: Set[String] = config.getAsListOfStrings("adapter", Nil).to[Set] @@ -28,15 +27,20 @@ class Cutadapt(val globalConfig: Config) extends CommandLineFunction { var opt_minimum_length: String = config.getAsInt("minimum_length", 1).toString var opt_maximum_length: String = config.getAsString("maximum_length", null) - def init() { - this.addJobReportBinding("version", getVersion) + override val versionRegex = """(.*)""".r + + override def afterGraph() { + versionCommand = executeble + " --version" + } + + override def beforeCmd() { this.getContamsFromFile } - def commandLine = { - init() + def cmdLine = { if (!opt_adapter.isEmpty || !opt_anywhere.isEmpty || !opt_front.isEmpty) { - required(cutadapt_exe) + + analysisName = getClass.getName + required(executeble) + // options repeat("-a", opt_adapter) + repeat("-b", opt_anywhere) + @@ -48,6 +52,7 @@ class Cutadapt(val globalConfig: Config) extends CommandLineFunction { required(fastq_input) + " > " + required(fastq_output) } else { + analysisName = getClass.getName + "-ln" "ln -sf " + required(fastq_input) + required(fastq_output) @@ -71,24 +76,4 @@ class Cutadapt(val globalConfig: Config) extends CommandLineFunction { } else logger.warn("File : " + contams_file + " does not exist") } } - - private var version: String = _ - var versionCommand = cutadapt_exe + " --version" - var versionRegex = """(.*)""" - def getVersion: String = getVersion(versionCommand, versionRegex) - def getVersion(cmd:String, regex:String) : String = { - val REG = regex.r - if (cmd.! != 0) { - logger.warn("Version command: '" + cmd + "' give a none-zero exit code, version not found") - return "NA" - } - for (line <- cmd.!!.split("\n")) { - line match { - case REG(m) => return m - case _ => - } - } - logger.warn("Version command: '" + cmd + "' give a exit code 0 but no version was found, executeble oke?") - return "NA" - } } \ No newline at end of file diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala index 16c4b9d9e..6303ef3a6 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Fastqc.scala @@ -1,44 +1,39 @@ package nl.lumc.sasc.biopet.function.fastq import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File import scala.sys.process._ -class Fastqc(val globalConfig: Config) extends CommandLineFunction { - def this() = this(new Config(Map())) - this.analysisName = "fastqc" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + this.analysisName + ": " + config) - - @Input(doc="fastqc executeble", shortName="Fastqc_Exe") - var fastqc_exe: File = new File(config.getAsString("exe","/usr/local/FastQC/FastQC_v0.10.1/fastqc")) - @Argument(doc="java vm executeble", shortName="Java_Exe", required=false) - var java_exe: String = globalConfig.getAsConfig("java").getAsString("exe", "java") - @Argument(doc="kmers", required=false) var kmers: Int = config.getAsInt("kmers", 5) - @Argument(doc="threads", required=false) var threads: Int = config.getAsInt("threads", 4) - @Argument(doc="quiet", required=false) var quiet: Boolean = config.getAsBoolean("quiet", false) - @Argument(doc="noextract", required=false) var noextract: Boolean = config.getAsBoolean("noextract", false) - @Argument(doc="nogroup", required=false) var nogroup: Boolean = config.getAsBoolean("nogroup", false) +class Fastqc(val globalConfig: Config) extends BiopetCommandLineFunction { @Input(doc="Contaminants", required=false) - var contaminants: File = new File(config.getAsString("contaminants",fastqc_exe.getParent() + "/Contaminants/contaminant_list.txt")) - @Input(doc="Fastq file", shortName="FQ") var fastqfile: File = _ - @Output(doc="Output", shortName="out") var output: File = _ + var contaminants: File = _ + + @Input(doc="Fastq file", shortName="FQ") + var fastqfile: File = _ - if (config.contains("vmem")) jobResourceRequests :+= "h_vmem=" + config.getAsString("vmem") + @Output(doc="Output", shortName="out") + var output: File = _ + + executeble = config.getAsString("exe","fastqc") + var java_exe: String = config.getAsConfig("java").getAsString("exe", "java") + var kmers: Int = config.getAsInt("kmers", 5) + var quiet: Boolean = config.getAsBoolean("quiet", false) + var noextract: Boolean = config.getAsBoolean("noextract", false) + var nogroup: Boolean = config.getAsBoolean("nogroup", false) - def init() { - this.addJobReportBinding("version", getVersion) - var maxThreads: Int = config.getAsInt("maxthreads", 24) - if (threads > maxThreads) threads = maxThreads - nCoresRequest = Option(threads) - this.jobNativeArgs :+= "-l h_vmem="+config.getAsString("vmem", "4G") - } + override val versionRegex = """FastQC (.*)""".r + override val defaultThreads = 4 - def commandLine = { - init() - required(fastqc_exe) + + override def afterGraph { + this.checkExecuteble + val fastqcDir = executeble.substring(0, executeble.lastIndexOf("/")) + if (contaminants == null) contaminants = new File(fastqcDir + "/Contaminants/contaminant_list.txt") + versionCommand = executeble + " --version" + } + + def cmdLine = { + required(executeble) + optional("--java", java_exe) + optional("--threads",threads) + optional("--contaminants",contaminants) + @@ -50,24 +45,4 @@ class Fastqc(val globalConfig: Config) extends CommandLineFunction { required(fastqfile) + required(" > ", output, escape=false) } - - private var version: String = _ - var versionCommand = fastqc_exe + " --version" - var versionRegex = """FastQC (.*)""" - def getVersion: String = getVersion(versionCommand, versionRegex) - def getVersion(cmd:String, regex:String) : String = { - val REG = regex.r - if (cmd.! != 0) { - logger.warn("Version command: '" + cmd + "' give a none-zero exit code, version not found") - return "NA" - } - for (line <- cmd.!!.split("\n")) { - line match { - case REG(m) => return m - case _ => - } - } - logger.warn("Version command: '" + cmd + "' give a exit code 0 but no version was found, executeble oke?") - return "NA" - } } \ No newline at end of file diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala index 92a098a50..2cef9e3e2 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/function/fastq/Sickle.scala @@ -1,40 +1,52 @@ package nl.lumc.sasc.biopet.function.fastq import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File import scala.io.Source._ import scala.sys.process._ -class Sickle(val globalConfig: Config) extends CommandLineFunction { - def this() = this(new Config(Map())) - this.analysisName = "sickle" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + this.analysisName + ": " + config) +class Sickle(val globalConfig: Config) extends BiopetCommandLineFunction { + @Input(doc="R1 input") + var input_R1: File = null - @Input(doc="Sickle exe", required=false) var sickle_exe: File = new File(config.getAsString("exe", "/usr/local/bin/sickle")) - @Input(doc="R1 input") var input_R1: File = null - @Input(doc="R2 input", required=false) var input_R2: File = null - @Output(doc="R1 output") var output_R1: File = null - @Output(doc="R2 output", required=false) var output_R2: File = null - @Output(doc="singles output", required=false) var output_singles: File = null - @Output(doc="stats output") var output_stats: File = null - @Input(doc="qualityType file", required=false) var qualityTypeFile: File = null - @Argument(doc="Quality Type", required=false) var qualityType: String = config.getAsString("qualitytype", null) - @Input(doc="deps", required=false) var deps: List[File] = Nil + @Input(doc="R2 input", required=false) + var input_R2: File = null - var defaultQualityType: String = config.getAsString("defaultqualitytype", "sanger") + @Input(doc="qualityType file", required=false) + var qualityTypeFile: File = _ - def init() { - this.addJobReportBinding("version", getVersion) - this.getQualityTypeFromFile + @Output(doc="R1 output") + var output_R1: File = null + + @Output(doc="R2 output", required=false) + var output_R2: File = null + + @Output(doc="singles output", required=false) + var output_singles: File = null + + @Output(doc="stats output") + var output_stats: File = null + + executeble = config.getAsString("exe", "sickle") + var qualityType: String = config.getAsString("qualitytype", null) + + var defaultQualityType: String = _ + override val versionRegex = """sickle version (.*)""".r + + override def afterGraph { + if (defaultQualityType == null) defaultQualityType = config.getAsString("defaultqualitytype", "sanger") if (qualityType == null && defaultQualityType != null) qualityType = defaultQualityType + + versionCommand = executeble + " --version" } - def commandLine = { - init() - var cmd: String = required(sickle_exe) + override def beforeCmd { + qualityType = getQualityTypeFromFile + } + + def cmdLine = { + var cmd: String = required(executeble) if (input_R2 != null) { cmd += required("pe") + required("-r", input_R2) + @@ -49,34 +61,15 @@ class Sickle(val globalConfig: Config) extends CommandLineFunction { " > " + required(output_stats) } - def getQualityTypeFromFile { + def getQualityTypeFromFile: String = { if (qualityType == null && qualityTypeFile != null) { if (qualityTypeFile.exists()) { for (line <- fromFile(qualityTypeFile).getLines) { var s: String = line.substring(0,line.lastIndexOf("\t")) - qualityType = s + return s } } else logger.warn("File : " + qualityTypeFile + " does not exist") } - } - - private var version: String = _ - var versionCommand = sickle_exe + " --version" - var versionRegex = """sickle version (.*)""" - def getVersion: String = getVersion(versionCommand, versionRegex) - def getVersion(cmd:String, regex:String) : String = { - val REG = regex.r - if (cmd.! != 0) { - logger.warn("Version command: '" + cmd + "' give a none-zero exit code, version not found") - return "NA" - } - for (line <- cmd.!!.split("\n")) { - line match { - case REG(m) => return m - case _ => - } - } - logger.warn("Version command: '" + cmd + "' give a exit code 0 but no version was found, executeble oke?") - return "NA" + return null } } \ No newline at end of file diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqSync.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqSync.scala index 3180785a3..5f7803b3f 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqSync.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqSync.scala @@ -2,19 +2,13 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts //import java.io.FileOutputStream import nl.lumc.sasc.biopet.core._ -import nl.lumc.sasc.biopet.function._ -import org.broadinstitute.sting.queue.function.CommandLineFunction +import nl.lumc.sasc.biopet.function.PythonCommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File -class FastqSync(val globalConfig: Config) extends Python { - def this() = this(new Config(Map())) - analysisName = "fastqsync" - val config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName.toLowerCase), globalConfig) - logger.debug("Config for " + analysisName + ": " + config) - - setPythonScript("__init__.py", "scripts/pyfastqc/") - setPythonScript("sync_paired_end_reads.py", "scripts/") +class FastqSync(val globalConfig: Config) extends PythonCommandLineFunction { + setPythonScript("__init__.py", "pyfastqc/") + setPythonScript("sync_paired_end_reads.py") @Input(doc="Start fastq") var input_start_fastq: File = _ @Input(doc="R1 input") var input_R1: File = _ @@ -23,7 +17,7 @@ class FastqSync(val globalConfig: Config) extends Python { @Output(doc="R2 output") var output_R2: File = _ var output_stats: File = _ - def commandLine = { + def cmdLine = { getPythonCommand + required(input_start_fastq) + required(input_R1) + diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToContams.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToContams.scala index 1ab9b00b5..45515458e 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToContams.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToContams.scala @@ -1,25 +1,19 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction -import nl.lumc.sasc.biopet.function.Python +import nl.lumc.sasc.biopet.function.PythonCommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File -class FastqcToContams(val globalConfig: Config) extends CommandLineFunction with Python { - def this() = this(new Config(Map())) - analysisName = "getcontams" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + analysisName + ": " + config) - - setPythonScript("__init__.py", "scripts/pyfastqc/") - setPythonScript("fastqc_contam.py", "scripts/") +class FastqcToContams(val globalConfig: Config) extends PythonCommandLineFunction { + setPythonScript("__init__.py", "pyfastqc/") + setPythonScript("fastqc_contam.py") @Input(doc="Fastqc output", shortName="fastqc", required=true) var fastqc_output: File = _ - @Input(doc="Contams input", shortName="fastqc", required=true) var contams_file: File = _ + @Input(doc="Contams input", shortName="fastqc", required=false) var contams_file: File = _ @Output(doc="Output file", shortName="out", required=true) var out: File = _ - def commandLine = { + def cmdLine = { getPythonCommand + required(fastqc_output.getParent()) + required("-c",contams_file) + diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala index da7594b5c..e3a7cf7b2 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/FastqcToQualtype.scala @@ -1,24 +1,18 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction -import nl.lumc.sasc.biopet.function.Python +import nl.lumc.sasc.biopet.function.PythonCommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File -class FastqcToQualtype(val globalConfig: Config) extends CommandLineFunction with Python { - def this() = this(new Config(Map())) - analysisName = "getqualtype" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + analysisName + ": " + config) - - setPythonScript("__init__.py", "scripts/pyfastqc/") - setPythonScript("qual_type_sickle.py", "scripts/") +class FastqcToQualtype(val globalConfig: Config) extends PythonCommandLineFunction { + setPythonScript("__init__.py", "pyfastqc/") + setPythonScript("qual_type_sickle.py") @Input(doc="Fastqc output", shortName="fastqc", required=true) var fastqc_output: File = _ @Output(doc="Output file", shortName="out", required=true) var out: File = _ - def commandLine = { + def cmdLine = { getPythonCommand + required(fastqc_output.getParent()) + " > " + diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Seqstat.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Seqstat.scala index 7e6a4c094..3ed1a7bec 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Seqstat.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Seqstat.scala @@ -1,27 +1,20 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction -import nl.lumc.sasc.biopet.function.Python +import nl.lumc.sasc.biopet.function.PythonCommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File -class Seqstat(val globalConfig: Config) extends CommandLineFunction with Python { - def this() = this(new Config(Map())) - analysisName = "seqstat" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + analysisName + ": " + config) - - setPythonScript("__init__.py", "scripts/pyfastqc/") - setPythonScript("seq_stat.py", "scripts/") +class Seqstat(val globalConfig: Config) extends PythonCommandLineFunction { + setPythonScript("__init__.py", "pyfastqc/") + setPythonScript("seq_stat.py") @Input(doc="Fastq input", shortName="fastqc", required=true) var input_fastq: File = _ - @Input(doc="Dep", shortName="dep", required=false) var deps: List[File] = Nil @Output(doc="Output file", shortName="out", required=true) var out: File = _ var fmt: String = _ - def commandLine = { + def cmdLine = { getPythonCommand + optional("--fmt", fmt) + required("-o", out) + diff --git a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala index a25832abe..afbb9f53b 100644 --- a/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala +++ b/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/scripts/Summarize.scala @@ -1,21 +1,14 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction -import nl.lumc.sasc.biopet.function.Python +import nl.lumc.sasc.biopet.function.PythonCommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File -class Summarize(val globalConfig: Config) extends CommandLineFunction with Python { - def this() = this(new Config(Map())) - analysisName = "flexiprep_sumarize" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + analysisName + ": " + config) +class Summarize(val globalConfig: Config) extends PythonCommandLineFunction { + setPythonScript("__init__.py", "pyfastqc/") + setPythonScript("summarize_flexiprep.py") - setPythonScript("__init__.py", "scripts/pyfastqc/") - setPythonScript("summarize_flexiprep.py", "scripts/") - - @Input(doc="Dep", shortName="dep", required=false) var deps: List[File] = Nil @Output(doc="Output file", shortName="out", required=true) var out: File = _ var samplea: String = _ @@ -25,7 +18,7 @@ class Summarize(val globalConfig: Config) extends CommandLineFunction with Pytho var trim: Boolean = true var clip: Boolean = true - def commandLine = { + def cmdLine = { var mode: String = "" if (clip) mode += "clip" if (trim) mode += "trim" diff --git a/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Bwa.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Bwa.scala index d1dae66e3..699e082c9 100644 --- a/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Bwa.scala +++ b/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Bwa.scala @@ -1,40 +1,38 @@ package nl.lumc.sasc.biopet.function.aligners import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File import scala.sys.process._ -class Bwa(val globalConfig: Config) extends CommandLineFunction { - def this() = this(new Config(Map())) - this.analysisName = "bwa" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + this.analysisName + ": " + config) - - @Argument(doc="Bwa executeble", shortName="bwa_exe", required=false) var bwa_exe: String = config.getAsString("exe", "/usr/local/bin/bwa") - @Input(doc="The reference file for the bam files.", shortName="R") var referenceFile: File = new File(config.getAsString("referenceFile")) - @Input(doc="Fastq file R1", shortName="R1") var R1: File = _ - @Input(doc="Fastq file R2", shortName="R2", required=false) var R2: File = _ - @Output(doc="Output file SAM", shortName="output") var output: File = _ +class Bwa(val globalConfig: Config) extends BiopetCommandLineFunction { + @Input(doc="Fastq file R1", shortName="R1") + var R1: File = _ + + @Input(doc="Fastq file R2", shortName="R2", required=false) + var R2: File = _ + + @Input(doc="The reference file for the bam files.", shortName="R") + var referenceFile: File = new File(config.getAsString("referenceFile")) - @Argument(doc="Readgroup header", shortName="RG", required=false) var RG: String = _ - @Argument(doc="M", shortName="M", required=false) var M: Boolean = config.getAsBoolean("M", true) + @Output(doc="Output file SAM", shortName="output") + var output: File = _ - jobResourceRequests :+= "h_vmem=" + config.getAsString("vmem", "6G") + executeble = config.getAsString("exe", "bwa") - var threads: Int = config.getAsInt("threads", 8) - var maxThreads: Int = config.getAsInt("maxthreads", 24) - if (threads > maxThreads) threads = maxThreads - nCoresRequest = Option(threads) + var RG: String = _ + var M = config.getAsBoolean("M", true) - def init() { - this.addJobReportBinding("version", getVersion) + override val defaultVmem = "6G" + override val defaultThreads = 8 + override val versionRegex = """Version: (.*)""".r + + override def beforeCmd() { + versionCommand = executeble } - def commandLine = { - init() - required(bwa_exe) + + def cmdLine = { + required(executeble) + required("mem") + optional("-t", nCoresRequest) + optional("-R", RG) + @@ -44,19 +42,4 @@ class Bwa(val globalConfig: Config) extends CommandLineFunction { optional(R2) + " > " + required(output) } - - private var version: String = "" - def getVersion : String = { - val REG = """Version: (.*)""".r - if (version == null) for (line <- bwa_exe.!!.split("\n")) { - line match { - case REG(m) => { - version = m - return version - } - case _ => - } - } - return version - } } diff --git a/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Star.scala b/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Star.scala index 492c0413b..8df9e20b4 100644 --- a/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Star.scala +++ b/mapping/src/main/scala/nl/lumc/sasc/biopet/function/aligners/Star.scala @@ -1,40 +1,52 @@ package nl.lumc.sasc.biopet.function.aligners import nl.lumc.sasc.biopet.core._ -import org.broadinstitute.sting.queue.function.CommandLineFunction import org.broadinstitute.sting.commandline._ import java.io.File import scala.sys.process._ -class Star(val globalConfig: Config) extends CommandLineFunction { - def this() = this(new Config(Map())) - this.analysisName = "star" - val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig) - logger.debug("Config for " + this.analysisName + ": " + config) +class Star(val globalConfig: Config) extends BiopetCommandLineFunction { + @Input(doc="The reference file for the bam files.", required=false) + var referenceFile: File = new File(config.getAsString("referenceFile")) - @Argument(doc="STAR executeble", required=false) var star_exe: String = config.getAsString("exe", "/usr/local/bin/STAR") - @Input(doc="The reference file for the bam files.", required=false) var referenceFile: File = new File(config.getAsString("referenceFile")) - @Input(doc="Fastq file R1", required=false) var R1: File = _ - @Input(doc="Fastq file R2", required=false) var R2: File = _ - @Argument(doc="Output Directory") var outputDir: String = _ - @Argument(doc="GenomeDir", required=false) var genomeDir: String = config.getAsString("genomeDir", referenceFile.getParent + "/star/") - @Argument(doc="STAR runmode", shortName="runmode", required=false) var runmode: String = _ - @Output(doc="Output SAM file", required=false) var outputSam: File = _ - @Output(doc="Output tab file", required=false) var outputTab: File = _ - @Input(doc="sjdbFileChrStartEnd file", required=false) var sjdbFileChrStartEnd: File = _ - @Argument(doc="sjdbOverhang", required=false) var sjdbOverhang: Int = _ - @Argument(doc="outFileNamePrefix", required=false) var outFileNamePrefix: String = _ - @Input(doc="deps", required=false) var deps: List[File] = Nil + @Input(doc="Fastq file R1", required=false) + var R1: File = _ - @Output(doc="Output genome file", required=false) var outputGenome: File = _ - @Output(doc="Output SA file", required=false) var outputSA: File = _ - @Output(doc="Output SAindex file", required=false) var outputSAindex: File = _ + @Input(doc="Fastq file R2", required=false) + var R2: File = _ - jobResourceRequests :+= "h_vmem=" + config.getAsString("vmem", "6G") - nCoresRequest = Option(config.getThreads(8)) - addJobReportBinding("version", "NA") + @Output(doc="Output SAM file", required=false) + var outputSam: File = _ - def init() { + @Output(doc="Output tab file", required=false) + var outputTab: File = _ + + @Input(doc="sjdbFileChrStartEnd file", required=false) + var sjdbFileChrStartEnd: File = _ + + @Output(doc="Output genome file", required=false) + var outputGenome: File = _ + + @Output(doc="Output SA file", required=false) + var outputSA: File = _ + + @Output(doc="Output SAindex file", required=false) + var outputSAindex: File = _ + + executeble = config.getAsString("exe", "STAR") + + @Argument(doc="Output Directory") + var outputDir: String = _ + + var genomeDir: String = config.getAsString("genomeDir", referenceFile.getParent + "/star/") + var runmode: String = _ + var sjdbOverhang: Int = _ + var outFileNamePrefix: String = _ + + override val defaultVmem = "6G" + override val defaultThreads = 8 + + override def afterGraph() { if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix +="." if (!outputDir.endsWith("/")) outputDir += "/" val prefix = if (outFileNamePrefix != null) outputDir+outFileNamePrefix else outputDir @@ -49,9 +61,8 @@ class Star(val globalConfig: Config) extends CommandLineFunction { } } - def commandLine : String= { - init() - var cmd: String = required("cd",outputDir) + "&&" + required(star_exe) + def cmdLine : String = { + var cmd: String = required("cd",outputDir) + "&&" + required(executeble) if (runmode != null && runmode == "genomeGenerate") { // Create index cmd += required("--runMode", runmode) + required("--genomeFastaFiles", referenceFile) @@ -66,4 +77,14 @@ class Star(val globalConfig: Config) extends CommandLineFunction { return cmd } +} + +object Star { + def apply(config:Config, R1:File, R2:File, outputDir:String): Star = { + val star = new Star(config) + star.R1 = R1 + if (R2 != null) star.R2 = R2 + star.outputDir = outputDir + return star + } } \ No newline at end of file -- GitLab