Commit bd8a2a63 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added BiopetCommandlineFunction

All commandlinefunctions now extends from this trait
parent c4bdd49e
package nl.lumc.sasc.biopet.core
import java.io.File
import org.broadinstitute.sting.queue.QException
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline._
import scala.sys.process._
import scala.util.matching.Regex
trait BiopetCommandLineFunction extends CommandLineFunction {
val globalConfig: Config
analysisName = getClass.getSimpleName
protected var config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName.toLowerCase), globalConfig)
logger.debug("config passed for " + analysisName)
@Input(doc="deps", required=false)
var deps: List[File] = Nil
@Argument(doc="Threads", required=false)
var threads = 0
val defaultThreads = 1
@Argument(doc="Vmem", required=false)
var vmem: String = _
val defaultVmem: String = ""
@Argument(doc="Executeble")
var executeble: String = _
protected def beforeCmd {
}
protected def afterGraph {
}
def setConfig(name:String) {
analysisName = name
config = Config.mergeConfigs(config.getAsConfig(analysisName.toLowerCase), config)
}
override def freezeFieldValues() {
checkExecuteble
afterGraph
jobOutputFile = new File(firstOutput.getParent + "/." + firstOutput.getName + "." + analysisName + ".out")
super.freezeFieldValues()
}
protected def checkExecuteble {
try if (executeble != null) {
val buffer = new StringBuffer()
val cmd = Seq("which", executeble)
val process = Process(cmd).run(ProcessLogger(buffer.append(_)))
if (process.exitValue == 0) {
executeble = buffer.toString
val file = new File(executeble)
executeble = file.getCanonicalPath
} else {
logger.error("executeble: '" + executeble + "' not found, please check config")
throw new QException("executeble: '" + executeble + "' not found, please check config")
}
} catch {
case ioe: java.io.IOException => logger.warn("Could not use 'which', check on executeble skipped: " + ioe)
}
}
final protected def preCmdInternal {
checkExecuteble
//for (input <- this.inputs) if (!input.exists) throw new IllegalStateException("Input: " + input + " for " + analysisName + " is missing")
logger.debug("Config for " + analysisName + ": " + config)
beforeCmd
addJobReportBinding("version", getVersion)
if (threads == 0) threads = config.getThreads(defaultThreads)
if (threads > 1) nCoresRequest = Option(threads)
addJobReportBinding("cores", if (nCoresRequest.get.toInt > 0) nCoresRequest.get.toInt else 1)
if (vmem == null) {
if (config.contains("vmem")) vmem = config.getAsString("vmem")
else if (!defaultVmem.isEmpty) vmem = defaultVmem
}
if (vmem != null) jobResourceRequests :+= "h_vmem=" + vmem
jobName = this.analysisName + ":" + firstOutput.getName
}
protected def cmdLine: String
final def commandLine: String = {
preCmdInternal
val cmd = cmdLine
addJobReportBinding("command", cmd)
cmd
}
protected var versionCommand: String = _
protected val versionRegex: Regex = null
def getVersion : String = {
if (versionCommand == null || versionRegex == null) return "N/A"
val buffer = new StringBuffer()
val process = Process(versionCommand).run(ProcessLogger(buffer append _))
if (process.exitValue != 0) {
logger.warn("Version command: '" + versionCommand + "' give exit code " + process.exitValue + ", version not found")
return "N/A"
}
val lines = versionCommand lines_! ProcessLogger(buffer append _)
for (line <- lines) {
line match {
case versionRegex(m) => return m
case _ =>
}
}
logger.warn("Version command: '" + versionCommand + "' give a exit code 0 but no version was found, executeble oke?")
return "N/A"
}
}
package nl.lumc.sasc.biopet.function
import java.io.FileOutputStream
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.sting.commandline._
import java.io.File
import scala.collection.JavaConversions._
trait PythonCommandLineFunction extends BiopetCommandLineFunction {
executeble = config.getAsString("python_exe", "python")
@Input(doc="Python script", required=false)
var python_script: File = _
protected var python_script_name : String = _
def setPythonScript(script:String) { setPythonScript(script,"") }
def setPythonScript(script:String, subpackage:String) {
python_script_name = script
python_script = new File(".queue/tmp/" + subpackage + python_script_name)
if (!python_script.getParentFile.exists) python_script.getParentFile.mkdirs
val is = getClass.getResourceAsStream(subpackage + python_script_name)
val os = new FileOutputStream(python_script)
org.apache.commons.io.IOUtils.copy(is, os)
os.close()
}
def getPythonCommand() : String = {
required(executeble) + required(python_script)
}
}
package nl.lumc.sasc.biopet.function
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline._
import java.io.File
class Sha1sum(val globalConfig: Config) extends CommandLineFunction {
def this() = this(new Config(Map()))
this.analysisName = "sha1sum"
val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig)
logger.debug("Config for " + this.analysisName + ": " + config)
class Sha1sum(val globalConfig: Config) extends BiopetCommandLineFunction {
@Input(doc="Zipped file")
var input: File = _
@Input(doc="Zipped file") var in: File = _
@Output(doc="Unzipped file") var out: File = _
@Output(doc="Unzipped file")
var output: File = _
def commandLine = "sha1sum %s > %s".format(in, out)
executeble = config.getAsString("exe","sha1sum")
def cmdLine = required(executeble) + required(input) + " > " + required(output)
}
\ No newline at end of file
package nl.lumc.sasc.biopet.function
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.sting.queue.function.CommandLineFunction
//import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline._
import java.io.File
class Zcat(val globalConfig: Config) extends CommandLineFunction {
def this() = this(new Config(Map()))
this.analysisName = "zcat"
val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig)
logger.debug("Config for " + this.analysisName + ": " + config)
class Zcat(val globalConfig: Config) extends BiopetCommandLineFunction {
@Input(doc="Zipped file")
var input: File = _
@Input(doc="Zipped file") var in: File = _
@Output(doc="Unzipped file") var out: File = _
@Output(doc="Unzipped file")
var output: File = _
def commandLine = "zcat %s > %s".format(in, out)
executeble = config.getAsString("exe", "zcat")
def cmdLine = required(executeble) + required(input) + " > " + required(output)
}
\ No newline at end of file
package nl.lumc.sasc.biopet.function.fastq
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline._
import java.io.File
import scala.io.Source._
import scala.sys.process._
class Cutadapt(val globalConfig: Config) extends CommandLineFunction {
def this() = this(new Config(Map()))
analysisName = "cutadapt"
val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig)
logger.debug("Config for " + this.analysisName + ": " + config)
class Cutadapt(val globalConfig: Config) extends BiopetCommandLineFunction {
@Input(doc="Input fastq file")
var fastq_input: File = _
@Input(doc="Cutadapt exe", required=false)
var cutadapt_exe: File = new File(config.getAsString("exe","/usr/local/bin/cutadapt"))
@Input(doc="Input fastq file") var fastq_input: File = _
@Input(doc="Fastq contams file", required=false) var contams_file: File = _
@Output(doc="Output fastq file") var fastq_output: File = _
@Input(doc="Fastq contams file", required=false)
var contams_file: File = _
@Output(doc="Output fastq file")
var fastq_output: File = _
executeble = config.getAsString("exe","cutadapt")
var default_clip_mode = config.getAsString("default_clip_mode", "3")
var opt_adapter: Set[String] = config.getAsListOfStrings("adapter", Nil).to[Set]
......@@ -28,15 +27,20 @@ class Cutadapt(val globalConfig: Config) extends CommandLineFunction {
var opt_minimum_length: String = config.getAsInt("minimum_length", 1).toString
var opt_maximum_length: String = config.getAsString("maximum_length", null)
def init() {
this.addJobReportBinding("version", getVersion)
override val versionRegex = """(.*)""".r
override def afterGraph() {
versionCommand = executeble + " --version"
}
override def beforeCmd() {
this.getContamsFromFile
}
def commandLine = {
init()
def cmdLine = {
if (!opt_adapter.isEmpty || !opt_anywhere.isEmpty || !opt_front.isEmpty) {
required(cutadapt_exe) +
analysisName = getClass.getName
required(executeble) +
// options
repeat("-a", opt_adapter) +
repeat("-b", opt_anywhere) +
......@@ -48,6 +52,7 @@ class Cutadapt(val globalConfig: Config) extends CommandLineFunction {
required(fastq_input) +
" > " + required(fastq_output)
} else {
analysisName = getClass.getName + "-ln"
"ln -sf " +
required(fastq_input) +
required(fastq_output)
......@@ -71,24 +76,4 @@ class Cutadapt(val globalConfig: Config) extends CommandLineFunction {
} else logger.warn("File : " + contams_file + " does not exist")
}
}
private var version: String = _
var versionCommand = cutadapt_exe + " --version"
var versionRegex = """(.*)"""
def getVersion: String = getVersion(versionCommand, versionRegex)
def getVersion(cmd:String, regex:String) : String = {
val REG = regex.r
if (cmd.! != 0) {
logger.warn("Version command: '" + cmd + "' give a none-zero exit code, version not found")
return "NA"
}
for (line <- cmd.!!.split("\n")) {
line match {
case REG(m) => return m
case _ =>
}
}
logger.warn("Version command: '" + cmd + "' give a exit code 0 but no version was found, executeble oke?")
return "NA"
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.function.fastq
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline._
import java.io.File
import scala.sys.process._
class Fastqc(val globalConfig: Config) extends CommandLineFunction {
def this() = this(new Config(Map()))
this.analysisName = "fastqc"
val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig)
logger.debug("Config for " + this.analysisName + ": " + config)
@Input(doc="fastqc executeble", shortName="Fastqc_Exe")
var fastqc_exe: File = new File(config.getAsString("exe","/usr/local/FastQC/FastQC_v0.10.1/fastqc"))
@Argument(doc="java vm executeble", shortName="Java_Exe", required=false)
var java_exe: String = globalConfig.getAsConfig("java").getAsString("exe", "java")
@Argument(doc="kmers", required=false) var kmers: Int = config.getAsInt("kmers", 5)
@Argument(doc="threads", required=false) var threads: Int = config.getAsInt("threads", 4)
@Argument(doc="quiet", required=false) var quiet: Boolean = config.getAsBoolean("quiet", false)
@Argument(doc="noextract", required=false) var noextract: Boolean = config.getAsBoolean("noextract", false)
@Argument(doc="nogroup", required=false) var nogroup: Boolean = config.getAsBoolean("nogroup", false)
class Fastqc(val globalConfig: Config) extends BiopetCommandLineFunction {
@Input(doc="Contaminants", required=false)
var contaminants: File = new File(config.getAsString("contaminants",fastqc_exe.getParent() + "/Contaminants/contaminant_list.txt"))
@Input(doc="Fastq file", shortName="FQ") var fastqfile: File = _
@Output(doc="Output", shortName="out") var output: File = _
var contaminants: File = _
@Input(doc="Fastq file", shortName="FQ")
var fastqfile: File = _
if (config.contains("vmem")) jobResourceRequests :+= "h_vmem=" + config.getAsString("vmem")
@Output(doc="Output", shortName="out")
var output: File = _
executeble = config.getAsString("exe","fastqc")
var java_exe: String = config.getAsConfig("java").getAsString("exe", "java")
var kmers: Int = config.getAsInt("kmers", 5)
var quiet: Boolean = config.getAsBoolean("quiet", false)
var noextract: Boolean = config.getAsBoolean("noextract", false)
var nogroup: Boolean = config.getAsBoolean("nogroup", false)
def init() {
this.addJobReportBinding("version", getVersion)
var maxThreads: Int = config.getAsInt("maxthreads", 24)
if (threads > maxThreads) threads = maxThreads
nCoresRequest = Option(threads)
this.jobNativeArgs :+= "-l h_vmem="+config.getAsString("vmem", "4G")
}
override val versionRegex = """FastQC (.*)""".r
override val defaultThreads = 4
def commandLine = {
init()
required(fastqc_exe) +
override def afterGraph {
this.checkExecuteble
val fastqcDir = executeble.substring(0, executeble.lastIndexOf("/"))
if (contaminants == null) contaminants = new File(fastqcDir + "/Contaminants/contaminant_list.txt")
versionCommand = executeble + " --version"
}
def cmdLine = {
required(executeble) +
optional("--java", java_exe) +
optional("--threads",threads) +
optional("--contaminants",contaminants) +
......@@ -50,24 +45,4 @@ class Fastqc(val globalConfig: Config) extends CommandLineFunction {
required(fastqfile) +
required(" > ", output, escape=false)
}
private var version: String = _
var versionCommand = fastqc_exe + " --version"
var versionRegex = """FastQC (.*)"""
def getVersion: String = getVersion(versionCommand, versionRegex)
def getVersion(cmd:String, regex:String) : String = {
val REG = regex.r
if (cmd.! != 0) {
logger.warn("Version command: '" + cmd + "' give a none-zero exit code, version not found")
return "NA"
}
for (line <- cmd.!!.split("\n")) {
line match {
case REG(m) => return m
case _ =>
}
}
logger.warn("Version command: '" + cmd + "' give a exit code 0 but no version was found, executeble oke?")
return "NA"
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.function.fastq
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.sting.queue.function.CommandLineFunction
import org.broadinstitute.sting.commandline._
import java.io.File
import scala.io.Source._
import scala.sys.process._
class Sickle(val globalConfig: Config) extends CommandLineFunction {
def this() = this(new Config(Map()))
this.analysisName = "sickle"
val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig)
logger.debug("Config for " + this.analysisName + ": " + config)
class Sickle(val globalConfig: Config) extends BiopetCommandLineFunction {
@Input(doc="R1 input")
var input_R1: File = null
@Input(doc="Sickle exe", required=false) var sickle_exe: File = new File(config.getAsString("exe", "/usr/local/bin/sickle"))
@Input(doc="R1 input") var input_R1: File = null
@Input(doc="R2 input", required=false) var input_R2: File = null
@Output(doc="R1 output") var output_R1: File = null
@Output(doc="R2 output", required=false) var output_R2: File = null
@Output(doc="singles output", required=false) var output_singles: File = null
@Output(doc="stats output") var output_stats: File = null
@Input(doc="qualityType file", required=false) var qualityTypeFile: File = null
@Argument(doc="Quality Type", required=false) var qualityType: String = config.getAsString("qualitytype", null)
@Input(doc="deps", required=false) var deps: List[File] = Nil
@Input(doc="R2 input", required=false)
var input_R2: File = null
var defaultQualityType: String = config.getAsString("defaultqualitytype", "sanger")
@Input(doc="qualityType file", required=false)
var qualityTypeFile: File = _
def init() {
this.addJobReportBinding("version", getVersion)
this.getQualityTypeFromFile
@Output(doc="R1 output")
var output_R1: File = null
@Output(doc="R2 output", required=false)
var output_R2: File = null
@Output(doc="singles output", required=false)
var output_singles: File = null
@Output(doc="stats output")
var output_stats: File = null
executeble = config.getAsString("exe", "sickle")
var qualityType: String = config.getAsString("qualitytype", null)
var defaultQualityType: String = _
override val versionRegex = """sickle version (.*)""".r
override def afterGraph {
if (defaultQualityType == null) defaultQualityType = config.getAsString("defaultqualitytype", "sanger")
if (qualityType == null && defaultQualityType != null) qualityType = defaultQualityType
versionCommand = executeble + " --version"
}
def commandLine = {
init()
var cmd: String = required(sickle_exe)
override def beforeCmd {
qualityType = getQualityTypeFromFile
}
def cmdLine = {
var cmd: String = required(executeble)
if (input_R2 != null) {
cmd += required("pe") +
required("-r", input_R2) +
......@@ -49,34 +61,15 @@ class Sickle(val globalConfig: Config) extends CommandLineFunction {
" > " + required(output_stats)
}
def getQualityTypeFromFile {
def getQualityTypeFromFile: String = {
if (qualityType == null && qualityTypeFile != null) {
if (qualityTypeFile.exists()) {
for (line <- fromFile(qualityTypeFile).getLines) {
var s: String = line.substring(0,line.lastIndexOf("\t"))
qualityType = s
return s
}
} else logger.warn("File : " + qualityTypeFile + " does not exist")
}
}
private var version: String = _
var versionCommand = sickle_exe + " --version"
var versionRegex = """sickle version (.*)"""
def getVersion: String = getVersion(versionCommand, versionRegex)
def getVersion(cmd:String, regex:String) : String = {
val REG = regex.r
if (cmd.! != 0) {
logger.warn("Version command: '" + cmd + "' give a none-zero exit code, version not found")
return "NA"
}
for (line <- cmd.!!.split("\n")) {
line match {
case REG(m) => return m
case _ =>
}
}
logger.warn("Version command: '" + cmd + "' give a exit code 0 but no version was found, executeble oke?")
return "NA"
return null
}
}
\ No newline at end of file
......@@ -2,19 +2,13 @@ package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts
//import java.io.FileOutputStream
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.function._
import org.broadinstitute.sting.queue.function.CommandLineFunction
import nl.lumc.sasc.biopet.function.PythonCommandLineFunction
import org.broadinstitute.sting.commandline._
import java.io.File
class FastqSync(val globalConfig: Config) extends Python {
def this() = this(new Config(Map()))
analysisName = "fastqsync"
val config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName.toLowerCase), globalConfig)
logger.debug("Config for " + analysisName + ": " + config)
setPythonScript("__init__.py", "scripts/pyfastqc/")
setPythonScript("sync_paired_end_reads.py", "scripts/")
class FastqSync(val globalConfig: Config) extends PythonCommandLineFunction {
setPythonScript("__init__.py", "pyfastqc/")
setPythonScript("sync_paired_end_reads.py")
@Input(doc="Start fastq") var input_start_fastq: File = _
@Input(doc="R1 input") var input_R1: File = _
......@@ -23,7 +17,7 @@ class FastqSync(val globalConfig: Config) extends Python {
@Output(doc="R2 output") var output_R2: File = _
var output_stats: File = _
def commandLine = {
def cmdLine = {
getPythonCommand +
required(input_start_fastq) +
required(input_R1) +
......
package nl.lumc.sasc.biopet.pipelines.flexiprep.scripts
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.sting.queue.function.CommandLineFunction
import nl.lumc.sasc.biopet.function.Python
import nl.lumc.sasc.biopet.function.PythonCommandLineFunction
import org.broadinstitute.sting.commandline._
import java.io.File
class FastqcToContams(val globalConfig: Config) extends CommandLineFunction with Python {
def this() = this(new Config(Map()))
analysisName = "getcontams"
val config: Config = Config.mergeConfigs(globalConfig.getAsConfig(analysisName), globalConfig)
logger.debug("Config for " + analysisName + ": " + config)
setPythonScript("__init__.py", "scripts/pyfastqc/")
setPythonScript("fastqc_contam.py", "scripts/")
class FastqcToContams(val globalConfig: Config) extends PythonCommandLineFunction {
setPythonScript("__init__.py", "pyfastqc/")
setPythonScript("fastqc_contam.py")
@Input(doc="Fa