Commit b9117b9d authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'develop' into feature-shiva

Conflicts:
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/summary/WriteSummary.scala
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala
parents 9451debe 7339ef7b
......@@ -77,7 +77,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
}
List(bamFile)
} else {
val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam"))
val markDup = MarkDuplicates(this, files, new File(outputDir, outputName + ".dedup.bam"))
markDup.isIntermediate = useIndelRealigner
add(markDup)
if (useIndelRealigner) {
......
......@@ -129,7 +129,7 @@ trait BastyTrait extends MultiSampleQScript {
raxmlBoot.N = Some(1)
raxmlBoot.n = outputName + "_boot_" + t
add(raxmlBoot)
raxmlBoot.getBootstrapFile
raxmlBoot.getBootstrapFile.get
}
val cat = Cat(this, bootList.toList, new File(outputDir, "/boot_list"))
......@@ -138,7 +138,7 @@ trait BastyTrait extends MultiSampleQScript {
val raxmlBi = new Raxml(this)
raxmlBi.input = concensusVariants
raxmlBi.t = raxmlMl.getBestTreeFile
raxmlBi.z = cat.output
raxmlBi.z = Some(cat.output)
raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBi.p = Some(seed)
raxmlBi.f = "b"
......@@ -148,7 +148,7 @@ trait BastyTrait extends MultiSampleQScript {
val gubbins = new RunGubbins(this)
gubbins.fastafile = concensusVariants
gubbins.startingTree = Some(raxmlBi.getBipartitionsFile)
gubbins.startingTree = raxmlBi.getBipartitionsFile
gubbins.outputDirectory = dirSufixGubbins
add(gubbins)
}
......
<?xml version="1.0" encoding="UTF-8"?>
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project-shared-configuration>
<!--
This file contains additional configuration written by modules in the NetBeans IDE.
The configuration is intended to be shared among all the users of project and
therefore it is assumed to be part of version control checkout.
Without this configuration present, some functionality in the IDE may be limited or fail altogether.
-->
<config-data xmlns="http://www.netbeans.org/ns/maven-config-data/1">
<configurations>
<configuration id="yamsvp" profiles=""/>
</configurations>
</config-data>
<properties xmlns="http://www.netbeans.org/ns/maven-properties-data/1">
<!--
Properties that influence various parts of the IDE, especially code formatting and the like.
You can copy and paste the single properties, into the pom.xml file and the IDE will pick them up.
That way multiple projects can share the same settings (useful for formatting rules for example).
Any value defined here will override the pom.xml file value but is only applicable to the current project.
-->
<netbeans.hint.license>apache20</netbeans.hint.license>
<com-junichi11-netbeans-changelf.enable>true</com-junichi11-netbeans-changelf.enable>
<com-junichi11-netbeans-changelf.use-project>true</com-junichi11-netbeans-changelf.use-project>
<com-junichi11-netbeans-changelf.lf-kind>LF</com-junichi11-netbeans-changelf.lf-kind>
<com-junichi11-netbeans-changelf.use-global>false</com-junichi11-netbeans-changelf.use-global>
<com-junichi11-netbeans-changelf.show-dialog>true</com-junichi11-netbeans-changelf.show-dialog>
<org-netbeans-modules-javascript2-requirejs.enabled>true</org-netbeans-modules-javascript2-requirejs.enabled>
</properties>
</project-shared-configuration>
<?xml version="1.0" encoding="UTF-8"?>
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<actions>
<action>
<actionName>run</actionName>
<preAction>build-with-dependencies</preAction>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
<action>
<actionName>debug</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<jpda.listen>true</jpda.listen>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
<action>
<actionName>profile</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
</actions>
<?xml version="1.0" encoding="UTF-8"?>
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<actions>
<action>
<actionName>run</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
</properties>
</action>
<action>
<actionName>debug</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args>
<exec.executable>java</exec.executable>
<jpda.listen>true</jpda.listen>
<exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
</properties>
</action>
<action>
<actionName>profile</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/compare_test/samples.json -config /data/DIV5/SASC/project-062-snake/analysis/scripts/biopet/config.json -outDir /home/pjvan_thof/pipelines/test</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>/home/pjvan_thof/pipelines/test</exec.workingdir>
</properties>
</action>
</actions>
......@@ -25,9 +25,7 @@ import scala.util.matching.Regex
import java.io.FileInputStream
import java.security.MessageDigest
/**
* Biopet command line trait to auto check executable and cluster values
*/
/** Biopet command line trait to auto check executable and cluster values */
trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurable {
analysisName = configName
......@@ -47,14 +45,10 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
*/
protected[core] def beforeCmd {}
/**
* Can override this method. This is executed after the script is done en queue starts to generate the graph
*/
/** Can override this method. This is executed after the script is done en queue starts to generate the graph */
protected[core] def beforeGraph {}
/**
* Set default output file, threads and vmem for current job
*/
/** Set default output file, threads and vmem for current job */
override def freezeFieldValues() {
preProcesExecutable
beforeGraph
......@@ -118,9 +112,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
addJobReportBinding("md5sum_exe", md5.getOrElse("None"))
}
/**
* executes checkExecutable method and fill job report
*/
/** executes checkExecutable method and fill job report */
final protected def preCmdInternal {
preProcesExecutable
......@@ -133,10 +125,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
addJobReportBinding("version", getVersion)
}
/**
* Command to get version of executable
* @return
*/
/** Command to get version of executable */
protected def versionCommand: String = null
/** Regex to get version from version command output */
......@@ -146,10 +135,10 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
protected val versionExitcode = List(0)
/** Executes the version command */
private def getVersionInternal: String = {
if (versionCommand == null || versionRegex == null) return "N/A"
private def getVersionInternal: Option[String] = {
if (versionCommand == null || versionRegex == null) return None
val exe = new File(versionCommand.trim.split(" ")(0))
if (!exe.exists()) return "N/A"
if (!exe.exists()) return None
val stdout = new StringBuffer()
val stderr = new StringBuffer()
def outputLog = "Version command: \n" + versionCommand +
......@@ -158,25 +147,28 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
val process = Process(versionCommand).run(ProcessLogger(stdout append _ + "\n", stderr append _ + "\n"))
if (!versionExitcode.contains(process.exitValue)) {
logger.warn("getVersion give exit code " + process.exitValue + ", version not found \n" + outputLog)
return "N/A"
return None
}
for (line <- stdout.toString.split("\n") ++ stderr.toString.split("\n")) {
line match {
case versionRegex(m) => return m
case versionRegex(m) => return Some(m)
case _ =>
}
}
logger.warn("getVersion give a exit code " + process.exitValue + " but no version was found, executable correct? \n" + outputLog)
return "N/A"
return None
}
/** Get version from cache otherwise execute the version command */
def getVersion: String = {
def getVersion: Option[String] = {
if (!BiopetCommandLineFunctionTrait.executableCache.contains(executable))
preProcesExecutable
if (!BiopetCommandLineFunctionTrait.versionCache.contains(executable))
BiopetCommandLineFunctionTrait.versionCache += executable -> getVersionInternal
return BiopetCommandLineFunctionTrait.versionCache(executable)
if (!BiopetCommandLineFunctionTrait.versionCache.contains(versionCommand))
getVersionInternal match {
case Some(version) => BiopetCommandLineFunctionTrait.versionCache += versionCommand -> version
case _ =>
}
BiopetCommandLineFunctionTrait.versionCache.get(versionCommand)
}
/**
......@@ -205,9 +197,7 @@ trait BiopetCommandLineFunctionTrait extends CommandLineFunction with Configurab
}
}
/**
* stores global caches
*/
/** stores global caches */
object BiopetCommandLineFunctionTrait {
import scala.collection.mutable.Map
private val versionCache: Map[String, String] = Map()
......
......@@ -57,12 +57,12 @@ class WriteSummary(val root: Configurable) extends InProcessFunction with Config
(for (f <- qscript.functions if f.isInstanceOf[BiopetCommandLineFunctionTrait]) yield {
f match {
case f: BiopetJavaCommandLineFunction => {
f.configName -> Map("version" -> f.getVersion,
f.configName -> Map("version" -> f.getVersion.getOrElse(None),
"java_md5" -> BiopetCommandLineFunctionTrait.executableMd5Cache.getOrElse(f.executable, None),
"jar_md5" -> SummaryQScript.md5sumCache.getOrElse(f.jarFile, None))
}
case f: BiopetCommandLineFunction => {
f.configName -> Map("version" -> f.getVersion,
f.configName -> Map("version" -> f.getVersion.getOrElse(None),
"md5" -> BiopetCommandLineFunctionTrait.executableMd5Cache.getOrElse(f.executable, None))
}
case _ => throw new IllegalStateException("This should not be possible")
......
......@@ -21,6 +21,11 @@ import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Extension for bowtie 1
*
* Based on version 1.1.1
*/
class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Fastq file R1", shortName = "R1")
var R1: File = null
......@@ -53,6 +58,7 @@ class Bowtie(val root: Configurable) extends BiopetCommandLineFunction {
var strata: Boolean = config("strata", default = false)
var maqerr: Option[Int] = config("maqerr")
/** return commandline to execute */
def cmdLine = {
required(executable) +
optional("--threads", nCoresRequest) +
......
......@@ -20,6 +20,9 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
/**
* Extension for GNU cat
*/
class Cat(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input file", required = true)
var input: List[File] = Nil
......@@ -29,10 +32,21 @@ class Cat(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "cat")
/** return commandline to execute */
def cmdLine = required(executable) + repeat(input) + " > " + required(output)
}
/**
* Object for constructors for cat
*/
object Cat {
/**
* Basis constructor
* @param root root object for config
* @param input list of files to use
* @param output output File
* @return
*/
def apply(root: Configurable, input: List[File], output: File): Cat = {
val cat = new Cat(root)
cat.input = input
......
......@@ -24,6 +24,10 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import scala.collection.mutable
import scala.io.Source
/**
* Extension for cutadept
* Based on version 1.5
*/
class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Summarizable {
@Input(doc = "Input fastq file")
var fastq_input: File = _
......@@ -39,17 +43,15 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
override val versionRegex = """(.*)""".r
var default_clip_mode: String = config("default_clip_mode", default = "3")
var opt_adapter: Set[String] = Set()
if (config.contains("adapter")) for (adapter <- config("adapter").asList) opt_adapter += adapter.toString
var opt_anywhere: Set[String] = Set()
if (config.contains("anywhere")) for (adapter <- config("anywhere").asList) opt_anywhere += adapter.toString
var opt_front: Set[String] = Set()
if (config.contains("front")) for (adapter <- config("front").asList) opt_front += adapter.toString
var opt_adapter: Set[String] = config("adapter", default = Nil)
var opt_anywhere: Set[String] = config("anywhere", default = Nil)
var opt_front: Set[String] = config("front", default = Nil)
var opt_discard: Boolean = config("discard", default = false)
var opt_minimum_length: Int = config("minimum_length", 1)
var opt_maximum_length: Option[Int] = config("maximum_length")
/** return commandline to execute */
def cmdLine = required(executable) +
// options
repeat("-a", opt_adapter) +
......@@ -63,6 +65,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
required("--output", fastq_output) +
" > " + required(stats_output)
/** Output summary stats */
def summaryStats: Map[String, Any] = {
val trimR = """.*Trimmed reads: *(\d*) .*""".r
val tooShortR = """.*Too short reads: *(\d*) .*""".r
......@@ -89,6 +92,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su
)
}
/** Merges values that can be merged for the summary */
override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
(v1, v2) match {
case (v1: Int, v2: Int) => v1 + v2
......
......@@ -22,6 +22,10 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
/**
* Extension for fastqc
* Based on version 0.10.1 and 0.11.2
*/
class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Contaminants", required = false)
......@@ -48,6 +52,7 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
override def versionCommand = executable + " --version"
override val defaultThreads = 4
/** Sets contaminants and adapters when not yet set */
override def beforeGraph {
this.preProcesExecutable
......@@ -59,8 +64,8 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
// otherwise, use default contaminants file (depending on FastQC version)
case None =>
val defaultContams = getVersion match {
case "v0.11.2" => new File(fastqcDir + "/Configuration/contaminant_list.txt")
case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt")
case Some("v0.11.2") => new File(fastqcDir + "/Configuration/contaminant_list.txt")
case _ => new File(fastqcDir + "/Contaminants/contaminant_list.txt")
}
config("contaminants", default = defaultContams)
}
......@@ -71,13 +76,14 @@ class Fastqc(val root: Configurable) extends BiopetCommandLineFunction {
// otherwise, check if adapters are already present (depending on FastQC version)
case None =>
val defaultAdapters = getVersion match {
case "v0.11.2" => Option(new File(fastqcDir + "/Configuration/adapter_list.txt"))
case _ => None
case Some("v0.11.2") => Option(new File(fastqcDir + "/Configuration/adapter_list.txt"))
case _ => None
}
defaultAdapters.collect { case adp => config("adapters", default = adp) }
}
}
/** return commandline to execute */
def cmdLine = required(executable) +
optional("--java", java_exe) +
optional("--threads", threads) +
......
......@@ -21,6 +21,9 @@ import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import nl.lumc.sasc.biopet.core.config.Configurable
/**
* This class can execute ln as InProcessFunction or used to only generate the ln command
*/
class Ln(val root: Configurable) extends InProcessFunction with Configurable {
this.analysisName = getClass.getSimpleName
......@@ -35,12 +38,14 @@ class Ln(val root: Configurable) extends InProcessFunction with Configurable {
var relative: Boolean = true
/** Generate out file for job */
override def freezeFieldValues(): Unit = {
val outLog: String = ".%s.%s.out".format(out.getName, analysisName)
jobOutputFile = new File(out.getAbsoluteFile.getParentFile, outLog)
super.freezeFieldValues()
}
/** return commandline to execute */
lazy val cmd: String = {
lazy val inCanonical: String = {
// need to remove "/~" to correctly expand path with tilde
......@@ -113,7 +118,17 @@ class Ln(val root: Configurable) extends InProcessFunction with Configurable {
}
}
/** Object for constructors for ln */
object Ln {
/**
* Basis constructor
* @param root root object for config
* @param input list of files to use
* @param output output File
* @param relative make reletive links (default true)
* @return
*/
def apply(root: Configurable, input: File, output: File, relative: Boolean = true): Ln = {
val ln = new Ln(root)
ln.in = input
......
......@@ -23,6 +23,7 @@ import argonaut._, Argonaut._
import scalaz._, Scalaz._
import scala.io.Source
/** Extension for md5sum */
class Md5sum(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input")
var input: File = _
......@@ -32,17 +33,13 @@ class Md5sum(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "md5sum")
/** return commandline to execute */
def cmdLine = required(executable) + required(input) + " > " + required(output)
def getSummary: Json = {
val data = Source.fromFile(output).mkString.split(" ")
return ("path" := output.getAbsolutePath) ->:
("md5sum" := data(0)) ->:
jEmptyObject
}
}
/** Object for constructors for md5sum */
object Md5sum {
/** Makes md5sum with md5 file in given dir */
def apply(root: Configurable, fastqfile: File, outDir: File): Md5sum = {
val md5sum = new Md5sum(root)
md5sum.input = fastqfile
......@@ -50,6 +47,7 @@ object Md5sum {
return md5sum
}
/** Makes md5sum with md5 file in same dir as input file */
def apply(root: Configurable, file: File): Md5sum = {
val md5sum = new Md5sum(root)
md5sum.input = file
......
......@@ -20,6 +20,7 @@ import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
/** Extension for pbzip2 */
class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Zipped file")
var input: File = _
......@@ -39,6 +40,7 @@ class Pbzip2(val root: Configurable) extends BiopetCommandLineFunction {
if (!memory.isEmpty) memory = Option(memory.get * threads)
}
/** return commandline to execute */