Skip to content
Snippets Groups Projects
Commit f1441306 authored by bow's avatar bow
Browse files

Merge branch 'develop' into feature-gentrap

Conflicts:
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/Tophat.scala
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectAlignmentSummaryMetrics.scala
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectInsertSizeMetrics.scala
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/MarkDuplicates.scala
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/Picard.scala
parents 848279f4 5d854a9f
No related branches found
No related tags found
No related merge requests found
Showing
with 188 additions and 266 deletions
......@@ -133,7 +133,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
raxmlBoot.N = 1
raxmlBoot.n = outputName + "_boot_" + t
add(raxmlBoot)
raxmlBoot.getBootstrapFile
raxmlBoot.getBootstrapFile.get
}
val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
......@@ -142,7 +142,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
val raxmlBi = new Raxml(this)
raxmlBi.input = concensusVariants
raxmlBi.t = raxmlMl.getBestTreeFile
raxmlBi.z = cat.output
raxmlBi.z = Some(cat.output)
raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBi.p = seed
raxmlBi.f = "b"
......@@ -152,7 +152,7 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
val gubbins = new RunGubbins(this)
gubbins.fastafile = concensusVariants
gubbins.startingTree = Some(raxmlBi.getBipartitionsFile)
gubbins.startingTree = raxmlBi.getBipartitionsFile
gubbins.outputDirectory = dirSufixGubbins
add(gubbins)
}
......
......@@ -77,7 +77,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
}
List(bamFile)
} else {
val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam"))
val markDup = MarkDuplicates(this, files, new File(outputDir, outputName + ".dedup.bam"))
markDup.isIntermediate = useIndelRealigner
add(markDup)
if (useIndelRealigner) {
......
<?xml version="1.0" encoding="UTF-8"?>
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project-shared-configuration>
<!--
This file contains additional configuration written by modules in the NetBeans IDE.
The configuration is intended to be shared among all the users of project and
therefore it is assumed to be part of version control checkout.
Without this configuration present, some functionality in the IDE may be limited or fail altogether.
-->
<config-data xmlns="http://www.netbeans.org/ns/maven-config-data/1">
<configurations>
<configuration id="yamsvp" profiles=""/>
</configurations>
</config-data>
<properties xmlns="http://www.netbeans.org/ns/maven-properties-data/1">
<!--
Properties that influence various parts of the IDE, especially code formatting and the like.
You can copy and paste the single properties, into the pom.xml file and the IDE will pick them up.
That way multiple projects can share the same settings (useful for formatting rules for example).
Any value defined here will override the pom.xml file value but is only applicable to the current project.
-->
<netbeans.hint.license>apache20</netbeans.hint.license>
<com-junichi11-netbeans-changelf.enable>true</com-junichi11-netbeans-changelf.enable>
<com-junichi11-netbeans-changelf.use-project>true</com-junichi11-netbeans-changelf.use-project>
<com-junichi11-netbeans-changelf.lf-kind>LF</com-junichi11-netbeans-changelf.lf-kind>
<com-junichi11-netbeans-changelf.use-global>false</com-junichi11-netbeans-changelf.use-global>
<com-junichi11-netbeans-changelf.show-dialog>true</com-junichi11-netbeans-changelf.show-dialog>
<org-netbeans-modules-javascript2-requirejs.enabled>true</org-netbeans-modules-javascript2-requirejs.enabled>
</properties>
</project-shared-configuration>
<?xml version="1.0" encoding="UTF-8"?>
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<actions>
<action>
<actionName>run</actionName>
<preAction>build-with-dependencies</preAction>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
<action>
<actionName>debug</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<jpda.listen>true</jpda.listen>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
<action>
<actionName>profile</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
</actions>
<?xml version="1.0" encoding="UTF-8"?>
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<actions>
<action>
<actionName>run</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
</properties>
</action>
<action>
<actionName>debug</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args>
<exec.executable>java</exec.executable>
<jpda.listen>true</jpda.listen>
<exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
</properties>
</action>
<action>
<actionName>profile</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
</properties>
</action>
</actions>
......@@ -25,9 +25,7 @@ import scala.util.matching.Regex
import java.io.FileInputStream
import java.security.MessageDigest
/**
* Biopet command line trait to auto check executable and cluster values
*/
/** Biopet command line trait to auto check executable and cluster values */
trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurable {
analysisName = configName
......@@ -47,14 +45,10 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
*/
protected[core] def beforeCmd {}
/**
* Can override this method. This is executed after the script is done en queue starts to generate the graph
*/
/** Can override this method. This is executed after the script is done en queue starts to generate the graph */
protected[core] def beforeGraph {}
/**
* Set default output file, threads and vmem for current job
*/
/** Set default output file, threads and vmem for current job */
override def freezeFieldValues() {
preProcesExecutable
beforeGraph
......@@ -118,9 +112,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
addJobReportBinding("md5sum_exe", md5.getOrElse("None"))
}
/**
* executes checkExecutable method and fill job report
*/
/** executes checkExecutable method and fill job report */
final protected def preCmdInternal {
preProcesExecutable
......@@ -133,10 +125,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
addJobReportBinding("version", getVersion)
}
/**
* Command to get version of executable
* @return
*/
/** Command to get version of executable */
protected def versionCommand: String = null
/** Regex to get version from version command output */
......@@ -146,10 +135,10 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
protected val versionExitcode = List(0)
/** Executes the version command */
private def getVersionInternal: String = {
if (versionCommand == null || versionRegex == null) return "N/A"
private def getVersionInternal: Option[String] = {
if (versionCommand == null || versionRegex == null) return None
val exe = new File(versionCommand.trim.split(" ")(0))
if (!exe.exists()) return "N/A"
if (!exe.exists()) return None
val stdout = new StringBuffer()
val stderr = new StringBuffer()
def outputLog = "Version command: \n" + versionCommand +
......@@ -158,25 +147,28 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
val process = Process(versionCommand).run(ProcessLogger(stdout append _ + "\n", stderr append _ + "\n"))
if (!versionExitcode.contains(process.exitValue)) {
logger.warn("getVersion give exit code " + process.exitValue + ", version not found \n" + outputLog)
return "N/A"
return None
}
for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) {
line match {
case versionRegex(m) => return m
case versionRegex(m) => return Some(m)
case _ =>
}
}
logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog)
return "N/A"
return None
}
/** Get version from cache otherwise execute the version command */
def getVersion: String = {
def getVersion: Option[String] = {
if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable))
preProcesExecutable
if (!BiopetCommandLineFunctionTrait.versionCache.contains(executable))
BiopetCommandLineFunctionTrait.versionCache += executable -> getVersionInternal
return BiopetCommandLineFunctionTrait.versionCache(executable)
if (!BiopetCommandLineFunctionTrait.versionCache.contains(versionCommand))
getVersionInternal match {
case Some(version) => BiopetCommandLineFunctionTrait.versionCache += versionCommand -> version
case _ =>
}
BiopetCommandLineFunctionTrait.versionCache.get(versionCommand)
}
/**
......@@ -205,9 +197,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
}
}
/**
* stores global caches
*/
/** stores global caches */
object BiopetCommandLineFunctionTrait {
import scala.collection.mutable.Map
private val versionCache: Map[String, String] = Map()
......
......@@ -21,6 +21,11 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Extension for bowtie 1
*
* Based on version 1.1.1
*/
class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Fastq file R1", shortName = "R1")
var R1: File = null
......@@ -53,6 +58,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
var strata: Boolean = config("strata", default = false)
var maqerr: Option[Int] = config("maqerr")
/** return commandline to execute */
def cmdLine = {
required(executable) +
optional("--threads", nCoresRequest) +
......
......@@ -20,6 +20,9 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
/**
* Extension for GNU cat
*/
class Cat(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input file", required = true)
var input: List[File] = Nil
......@@ -29,10 +32,21 @@ class Cat(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "cat")
/** return commandline to execute */
def cmdLine = required(executable) + repeat(input) + " > " + required(output)
}
/**
* Object for constructors for cat
*/
object Cat {
/**
* Basis constructor
* @param root root object for config
* @param input list of files to use
* @param output output File
* @return
*/
def apply(root: Configurable, input: List[File], output: File): Cat = {
val cat = new Cat(root)
cat.input = input
......
......@@ -24,6 +24,10 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import scala.collection.mutable
import scala.io.Source
/**
* Extension for cutadept
* Based on version 1.5
*/
class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable {
@Input(doc = "Input fastq file")
var fastq_input: File = _
......@@ -39,17 +43,15 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
override val versionRegex = """(.*)""".r
var default_clip_mode: String = config("default_clip_mode", default = "3")
var opt_adapter: Set[String] = Set()
if (config.contains("adapter")) for (adapter <- config("adapter").asList) opt_adapter += adapter.toString
var opt_anywhere: Set[String] = Set()
if (config.contains("anywhere")) for (adapter <- config("anywhere").asList) opt_anywhere += adapter.toString
var opt_front: Set[String] = Set()
if (config.contains("front")) for (adapter <- config("front").asList) opt_front += adapter.toString
var opt_adapter: Set[String] = config("adapter", default = Nil)
var opt_anywhere: Set[String] = config("anywhere", default = Nil)
var opt_front: Set[String] = config("front", default = Nil)
var opt_discard: Boolean = config("discard", default = false)
var opt_minimum_length: Int = config("minimum_length", 1)
var opt_maximum_length: Option[Int] = config("maximum_length")
/** return commandline to execute */
def cmdLine = required(executable) +
// options
repeat("-a", opt_adapter) +
......@@ -63,6 +65,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
required("--output", fastq_output) +
" > " + required(stats_output)
/** Output summary stats */
def summaryStats: Map[String, Any] = {
val trimR = """.*Trimmed reads: *(\d*) .*""".r
val tooShortR = """.*Too short reads: *(\d*) .*""".r
......@@ -89,6 +92,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
)
}
/** Merges values that can be merged for the summary */
override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
(v1, v2) match {
case (v1: Int, v2: Int) => v1 + v2
......
......@@ -22,6 +22,10 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
/**
* Extension for fastqc
* Based on version 0.10.1 and 0.11.2
*/
class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Contaminants", required = false)
......@@ -48,6 +52,7 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
override def versionCommand = executable + " --version"
override val defaultThreads = 4
/** Sets contaminants and adapters when not yet set */
override def beforeGraph {
this.preProcesExecutable
......@@ -59,8 +64,8 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
// otherwise, use default contaminants file (depending on FastQC version)
case None =>
val defaultContams = getVersion match {
case "v0.11.2" => new File(fastqcDir + "/Configuration/contaminant_list.txt")
case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt")
case Some("v0.11.2") => new File(fastqcDir + "/Configuration/contaminant_list.txt")
case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt")
}
config("contaminants", default = defaultContams)
}
......@@ -71,13 +76,14 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
// otherwise, check if adapters are already present (depending on FastQC version)
case None =>
val defaultAdapters = getVersion match {
case "v0.11.2" => Option(new File(fastqcDir + "/Configuration/adapter_list.txt"))
case _ => None
case Some("v0.11.2") => Option(new File(fastqcDir + "/Configuration/adapter_list.txt"))
case _ => None
}
defaultAdapters.collect { case adp => config("adapters", default = adp) }
}
}
/** return commandline to execute */
def cmdLine = required(executable) +
optional("--java", java_exe) +
optional("--threads", threads) +
......
......@@ -21,6 +21,9 @@ import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.core.config.Configurable
/**
* This class can execute ln as InProcessFunction or used to only generate the ln command
*/
class Ln(val root: Configurable) extends InProcessFunction with Configurable {
this.analysisName = getClass.getSimpleName
......@@ -35,12 +38,14 @@ class Ln(val root: Configurable) extends InProcessFunction with Configurable {
var relative: Boolean = true
/** Generate out file for job */
override def freezeFieldValues(): Unit = {
val outLog: String = ".%s.%s.out".format(output.getName, analysisName)
jobOutputFile = new File(output.getAbsoluteFile.getParentFile, outLog)
super.freezeFieldValues()
}
/** return commandline to execute */
lazy val cmd: String = {
lazy val inCanonical: String = {
// need to remove "/~" to correctly expand path with tilde
......@@ -113,7 +118,17 @@ class Ln(val root: Configurable) extends InProcessFunction with Configurable {
}
}
/** Object for constructors for ln */
object Ln {
/**
* Basis constructor
* @param root root object for config
* @param input list of files to use
* @param output output File
* @param relative make reletive links (default true)
* @return
*/
def apply(root: Configurable, input: File, output: File, relative: Boolean = true): Ln = {
val ln = new Ln(root)
ln.input = input
......
......@@ -23,6 +23,7 @@ import argonaut._, Argonaut._
import scalaz._, Scalaz._
import scala.io.Source
/** Extension for md5sum */
class Md5sum(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input")
var input: File = _
......@@ -32,17 +33,13 @@ class Md5sum(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "md5sum")
/** return commandline to execute */
def cmdLine = required(executable) + required(input) + " > " + required(output)
def getSummary: Json = {
val data = Source.fromFile(output).mkString.split(" ")
return ("path" := output.getAbsolutePath) ->:
("md5sum" := data(0)) ->:
jEmptyObject
}
}
/** Object for constructors for md5sum */
object Md5sum {
/** Makes md5sum with md5 file in given dir */
def apply(root: Configurable, fastqfile: File, outDir: File): Md5sum = {
val md5sum = new Md5sum(root)
md5sum.input = fastqfile
......@@ -50,6 +47,7 @@ object Md5sum {
return md5sum
}
/** Makes md5sum with md5 file in same dir as input file */
def apply(root: Configurable, file: File): Md5sum = {
val md5sum = new Md5sum(root)
md5sum.input = file
......
......@@ -20,6 +20,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
/** Extension for pbzip2 */
class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Zipped file")
var input: File = _
......@@ -39,6 +40,7 @@ class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction {
if (!memory.isEmpty) memory = Option(memory.get * threads)
}
/** return commandline to execute */
def cmdLine = required(executable) +
conditional(decomrpess, "-d") +
conditional(!decomrpess, "-z") +
......@@ -48,7 +50,9 @@ class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction {
required(input)
}
/** Object for constructors for Pbzip2 */
object Pbzip2 {
/** Default constructor */
def apply(root: Configurable, input: File, output: File): Pbzip2 = {
val pbzip2 = new Pbzip2(root)
pbzip2.input = input
......
......@@ -28,6 +28,11 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
executable = config("exe", default = "python", submodule = "python")
protected var python_script_name: String = _
/**
* checks if script already exist in jar otherwise try to fetch from the jar
* @param script name / location of script
*/
def setPythonScript(script: String) {
python_script = new File(script)
if (!python_script.exists()) {
......@@ -36,6 +41,12 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
python_script_name = script
}
}
/**
* Set and extract python script from jar file
* @param script name of script in jar
* @param subpackage location of script in jar
*/
def setPythonScript(script: String, subpackage: String) {
python_script_name = script
python_script = new File(".queue/tmp/" + subpackage + python_script_name)
......@@ -46,6 +57,7 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
os.close()
}
/** return basic command to prefix the complete command with */
def getPythonCommand(): String = {
required(executable) + required(python_script)
}
......
......@@ -19,7 +19,12 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import java.io.File
import scalaz.std.boolean.option
/**
* extension for raxml
* based on version 8.1.3
*/
class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
override val defaultThreads = 1
......@@ -50,49 +55,51 @@ class Raxml(val root: Configurable) extends BiopetCommandLineFunction {
@Argument(doc = "Output directory", required = true)
var w: File = null
var noBfgs: Boolean = config("no_bfgs", default = false)
@Input(required = false)
var t: File = _
var t: Option[File] = _
@Input(required = false)
var z: File = _
var z: Option[File] = _
@Output(doc = "Output files", required = false)
private var out: List[File] = Nil
var noBfgs: Boolean = config("no_bfgs", default = false)
var executableNonThreads: String = config("exe", default = "raxmlHPC")
var executableThreads: Option[String] = config("exe_pthreads")
/** Sets correct output files to job */
override def beforeGraph {
require(w != null)
if (threads == 0) threads = getThreads(defaultThreads)
executable = if (threads > 1 && executableThreads.isDefined) executableThreads.get else executableNonThreads
super.beforeGraph
out +:= getInfoFile
out :::= List(Some(getInfoFile), getBestTreeFile, getBootstrapFile, getBipartitionsFile).flatten
f match {
case "d" if b.isEmpty => {
out +:= getBestTreeFile
for (t <- 0 until N.getOrElse(1)) {
out +:= new File(w, "RAxML_log." + n + ".RUN." + t)
out +:= new File(w, "RAxML_parsimonyTree." + n + ".RUN." + t)
out +:= new File(w, "RAxML_result." + n + ".RUN." + t)
}
}
case "d" if b.isDefined => out +:= getBootstrapFile
case "b" => {
out +:= new File(w, "RAxML_bipartitionsBranchLabels." + n)
out +:= new File(w, "RAxML_bipartitions." + n)
case "d" if b.isEmpty => for (t <- 0 until N.getOrElse(1)) {
out +:= new File(w, "RAxML_log." + n + ".RUN." + t)
out +:= new File(w, "RAxML_parsimonyTree." + n + ".RUN." + t)
out +:= new File(w, "RAxML_result." + n + ".RUN." + t)
}
case _ =>
case "b" => out +:= new File(w, "RAxML_bipartitionsBranchLabels." + n)
case _ =>
}
}
def getBestTreeFile: File = new File(w, "RAxML_bestTree." + n)
def getBootstrapFile: File = new File(w, "RAxML_bootstrap." + n)
def getBipartitionsFile: File = new File(w, "RAxML_bipartitions." + n)
def getInfoFile: File = new File(w, "RAxML_info." + n)
/** Returns bestTree file */
def getBestTreeFile = option(f == "d" && b.isEmpty, new File(w, "RAxML_bestTree." + n))
/** Returns bootstrap file */
def getBootstrapFile = option(f == "d" && b.isDefined, new File(w, "RAxML_bootstrap." + n))
/** Returns bipartitions file */
def getBipartitionsFile = option(f == "b", new File(w, "RAxML_bipartitions." + n))
/** Returns info file */
def getInfoFile = new File(w, "RAxML_info." + n)
/** return commandline to execute */
def cmdLine = required(executable) +
required("-m", m) +
required("-s", input) +
......
......@@ -21,6 +21,11 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Extension for gubbins
* See; https://github.com/sanger-pathogens/gubbins
* No version known
*/
class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Contaminants", required = false)
......@@ -47,6 +52,7 @@ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction {
var verbose: Boolean = config("verbose", default = false)
var noCleanup: Boolean = config("no_cleanup", default = false)
/** Set correct output files */
override def beforeGraph: Unit = {
super.beforeGraph
require(outputDirectory != null)
......@@ -63,6 +69,7 @@ class RunGubbins(val root: Configurable) extends BiopetCommandLineFunction {
for (t <- out) outputFiles ::= new File(outputDirectory + File.separator + prefix.getOrElse("gubbins") + t)
}
/** Return command to execute */
def cmdLine = required("cd", outputDirectory) + " && " + required(executable) +
optional("--outgroup", outgroup) +
optional("--starting_tree", startingTree) +
......
......@@ -23,6 +23,7 @@ import argonaut._, Argonaut._
import scalaz._, Scalaz._
import scala.io.Source
/** Extension for sha1sum */
class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input file")
var input: File = _
......@@ -32,21 +33,16 @@ class Sha1sum(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "sha1sum")
/** Set correct output files */
def cmdLine = required(executable) + required(input) + " > " + required(output)
def getSummary: Json = {
val data = Source.fromFile(output).mkString.split(" ")
return ("path" := output.getAbsolutePath) ->:
("sha1sum" := data(0)) ->:
jEmptyObject
}
}
object Sha1sum {
def apply(root: Configurable, fastqfile: File, outDir: String): Sha1sum = {
/** Create default sha1sum */
def apply(root: Configurable, input: File, outDir: File): Sha1sum = {
val sha1sum = new Sha1sum(root)
sha1sum.input = fastqfile
sha1sum.output = new File(outDir + fastqfile.getName + ".sha1")
sha1sum.input = input
sha1sum.output = new File(outDir, input.getName + ".sha1")
return sha1sum
}
}
......@@ -25,6 +25,10 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.collection.mutable
import scala.io.Source
/**
* Extension for sickle
* Based on version 1.33
*/
class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summarizable {
@Input(doc = "R1 input")
var input_R1: File = _
......@@ -57,10 +61,12 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ
override val versionRegex = """sickle version (.*)""".r
override def versionCommand = executable + " --version"
/** Sets qualityType is still empty */
override def beforeGraph {
if (qualityType.isEmpty) qualityType = Some(defaultQualityType)
}
/** Return command to execute */
def cmdLine = {
var cmd: String = required(executable)
if (input_R2 != null) {
......@@ -81,6 +87,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ
" > " + required(output_stats)
}
/** returns stats map for summary */
def summaryStats: Map[String, Any] = {
// regex for single run
val sKept = """FastQ records kept: (\d+)""".r
......@@ -123,6 +130,7 @@ class Sickle(val root: Configurable) extends BiopetCommandLineFunction with Summ
stats.toMap
}
/** Merge stats incase of chunking */
override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
(v1, v2) match {
case (v1: Int, v2: Int) => v1 + v2
......
......@@ -21,6 +21,7 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for stampy */
class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "FastQ file R1", shortName = "R1")
var R1: File = _
......@@ -68,12 +69,14 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
override def versionCommand = executable + " --help"
/** Sets readgroup when not set yet */
override def beforeGraph: Unit = {
super.beforeGraph
require(readgroup != null)
}
def cmdLine: String = {
/** Returns command to execute */
def cmdLine = {
var cmd: String = required(executable) +
optional("-t", nCoresRequest) +
conditional(solexa, "--solexa") +
......@@ -99,6 +102,6 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
" -h " + required(hash) +
" -o " + required(output) +
" -M " + required(R1) + optional(R2)
return cmd
cmd
}
}
......@@ -21,6 +21,9 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Extension for STAR
*/
class Star(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "The reference file for the bam files.", required = false)
var reference: File = new File(config("reference"))
......@@ -62,6 +65,7 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction {
override val defaultVmem = "6G"
override val defaultThreads = 8
/** Sets output files for the graph */
override def beforeGraph() {
if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix += "."
val prefix = if (outFileNamePrefix != null) outputDir + outFileNamePrefix else outputDir
......@@ -77,7 +81,8 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction {
}
}
def cmdLine: String = {
/** Returns command to execute */
def cmdLine = {
var cmd: String = required("cd", outputDir) + "&&" + required(executable)
if (runmode != null && runmode == "genomeGenerate") { // Create index
cmd += required("--runMode", runmode) +
......@@ -91,11 +96,22 @@ class Star(val root: Configurable) extends BiopetCommandLineFunction {
optional("--outFileNamePrefix", outFileNamePrefix)
if (sjdbOverhang > 0) cmd += optional("--sjdbOverhang", sjdbOverhang)
return cmd
cmd
}
}
object Star {
/**
* Create default star
* @param configurable root object
* @param R1 R1 fastq file
* @param R2 R2 fastq file
* @param outputDir Outputdir for Star
* @param isIntermediate
* @param deps Deps to add to wait on run
* @return Return Star
*
*/
def apply(configurable: Configurable, R1: File, R2: File, outputDir: File, isIntermediate: Boolean = false, deps: List[File] = Nil): Star = {
val star = new Star(configurable)
star.R1 = R1
......@@ -107,7 +123,22 @@ object Star {
return star
}
def _2pass(configurable: Configurable, R1: File, R2: File, outputDir: File, isIntermediate: Boolean = false, deps: List[File] = Nil): (File, List[Star]) = {
/**
* returns Star with 2pass star method
* @param configurable root object
* @param R1 R1 fastq file
* @param R2 R2 fastq file
* @param outputDir Outputdir for Star
* @param isIntermediate
* @param deps Deps to add to wait on run
* @return Return Star
*/
def _2pass(configurable: Configurable,
R1: File,
R2: File,
outputDir: File,
isIntermediate: Boolean = false,
deps: List[File] = Nil): (File, List[Star]) = {
val starCommand_pass1 = Star(configurable, R1, if (R2 != null) R2 else null, new File(outputDir, "aln-pass1"))
starCommand_pass1.isIntermediate = isIntermediate
starCommand_pass1.deps = deps
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment