Commit a4687aab authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'waiyi' into 'master'

Merging branch waiyi back to master

See merge request !6
parents 9cde95b2 6a83f9d3
dependency-reduced-pom.xml
*~
*.swp
/Biopet/nbproject/private/
/Biopet/build/
\ No newline at end of file
test/
<?xml version="1.0" encoding="UTF-8"?>
<project-shared-configuration>
<!--
This file contains additional configuration written by modules in the NetBeans IDE.
The configuration is intended to be shared among all the users of project and
therefore it is assumed to be part of version control checkout.
Without this configuration present, some functionality in the IDE may be limited or fail altogether.
-->
<config-data xmlns="http://www.netbeans.org/ns/maven-config-data/1">
<configurations>
<configuration id="yamsvp" profiles=""/>
</configurations>
</config-data>
<properties xmlns="http://www.netbeans.org/ns/maven-properties-data/1">
<!--
Properties that influence various parts of the IDE, especially code formatting and the like.
You can copy and paste the single properties, into the pom.xml file and the IDE will pick them up.
That way multiple projects can share the same settings (useful for formatting rules for example).
Any value defined here will override the pom.xml file value but is only applicable to the current project.
-->
<netbeans.hint.license>apache20</netbeans.hint.license>
<com-junichi11-netbeans-changelf.enable>true</com-junichi11-netbeans-changelf.enable>
<com-junichi11-netbeans-changelf.use-project>true</com-junichi11-netbeans-changelf.use-project>
<com-junichi11-netbeans-changelf.lf-kind>CRLF</com-junichi11-netbeans-changelf.lf-kind>
<com-junichi11-netbeans-changelf.use-global>false</com-junichi11-netbeans-changelf.use-global>
<netbeans.checkstyle.format>true</netbeans.checkstyle.format>
</properties>
</project-shared-configuration>
<?xml version="1.0" encoding="UTF-8"?>
<actions>
<action>
<actionName>run</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
<action>
<actionName>debug</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<jpda.listen>true</jpda.listen>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
<action>
<actionName>profile</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp -outDir /usr/local/waiyi/ -config ../test/samples.json -run -l debug</exec.args>
<exec.executable>java</exec.executable>
<exec.workingdir>../test</exec.workingdir>
</properties>
</action>
</actions>
package nl.lumc.sasc.biopet.core
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.basty.Basty
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import nl.lumc.sasc.biopet.pipelines.gatk.GatkBenchmarkGenotyping
import nl.lumc.sasc.biopet.pipelines.gatk.GatkGenotyping
import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantRecalibration
import nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantcalling
import nl.lumc.sasc.biopet.pipelines.gatk.GatkVcfSampleCompare
import nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.pipelines.sage.Sage
import nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp
object BiopetExecutable {
val modules: Map[String, List[MainCommand]] = Map(
......@@ -18,9 +32,8 @@ object BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.basty.Basty),
"tool" -> List(
nl.lumc.sasc.biopet.core.apps.WipeReads,
nl.lumc.sasc.biopet.core.apps.BiopetFlagstat)
)
nl.lumc.sasc.biopet.core.apps.BiopetFlagstat))
/**
* @param args the command line arguments
*/
......@@ -29,7 +42,7 @@ object BiopetExecutable {
def toBulletedList(m: List[MainCommand], kind: String = "", bullet: String = "-") =
"Available %ss:\n ".format(kind) + bullet + " " + m.map(x => x.commandName).sorted.mkString("\n " + bullet + " ")
def usage(module:String = null): String = {
def usage(module: String = null): String = {
if (module != null) checkModule(module)
val usage: String = {
val set = if (module == null) modules.keySet else Set(module)
......@@ -42,17 +55,17 @@ object BiopetExecutable {
|%s
|
|Questions or comments? Email sasc@lumc.nl or check out the project page at https://git.lumc.nl/biopet/biopet.git
""".stripMargin.format(modules.keys.mkString(","),usage)
""".stripMargin.format(modules.keys.mkString(","), usage)
}
def checkModule(module:String) {
def checkModule(module: String) {
if (!modules.contains(module)) {
System.err.println(s"ERROR: module '$module' does not exist\n" + usage())
System.exit(1)
System.err.println(s"ERROR: module '$module' does not exist\n" + usage())
System.exit(1)
}
}
def getCommand(module:String, name:String) : MainCommand = {
def getCommand(module: String, name: String): MainCommand = {
checkModule(module)
val command = modules(module).find(p => p.commandName.toLowerCase == name.toLowerCase)
if (command == None) {
......@@ -61,18 +74,18 @@ object BiopetExecutable {
}
return command.get
}
args match {
case Array(module, name, passArgs @ _*) => {
getCommand(module, name).main(passArgs.toArray)
getCommand(module, name).main(passArgs.toArray)
}
case Array(module) => {
System.err.println(usage(module))
System.exit(1)
System.err.println(usage(module))
System.exit(1)
}
case _ => {
System.err.println(usage())
System.exit(1)
System.err.println(usage())
System.exit(1)
}
}
}
......
......@@ -45,7 +45,7 @@ class Bwa(val root: Configurable) extends BiopetCommandLineFunction {
var a: Boolean = config("a")
var C: Boolean = config("C")
var Y: Boolean = config("Y")
var I: String = config("I")
var I: String = config("I")
executable = config("exe", default = "bwa", freeVar = false)
override val versionRegex = """Version: (.*)""".r
......@@ -80,14 +80,14 @@ class Bwa(val root: Configurable) extends BiopetCommandLineFunction {
optional("-L", L) +
optional("-U", U) +
optional("-x", x) +
conditional(p, "-p") +
conditional(p, "-p") +
optional("-v", v) +
optional("-T", T) +
optional("-h", h) +
conditional(a, "-a") +
conditional(C, "-C") +
conditional(Y, "-Y") +
optional("-I", I) +
optional("-I", I) +
required(reference) +
required(R1) +
optional(R2) +
......
package nl.lumc.sasc.biopet.extensions.aligners
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import java.io.File
class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "FastQ file R1", shortName = "R1")
var R1: File = _
@Input(doc = "FastQ file R2", shortName = "R2", required = false)
var R2: File = _
@Input(doc = "The reference file for the bam files.", shortName = "ref")
var reference: File = config("reference", required = true)
@Input(doc = "The genome prefix.")
var genome: File = config("genome", required = true)
@Input(doc = "The hash prefix")
var hash: File = config("hash", required = true)
@Output(doc = "Output file SAM", shortName = "output")
var output: File = _
// options set via API or config
// var numrecords: String = config("numrecords", default = "all")
var solexa: Boolean = config("solexa", default = false)
var solexaold: Boolean = config("solexaold", default = false)
var sanger: Boolean = config("sanger", default = false)
var insertsize: Option[Int] = config("insertsize", default = 250)
var insertsd: Option[Int] = config("insertsd", default = 60)
var insertsize2: Option[Int] = config("insertsize2", default = -2000)
var insertsd2: Option[Int] = config("insertsd2", default = -1)
var sensitive: Boolean = config("sensitive", default = false)
var fast: Boolean = config("fast", default = false)
var readgroup: String = config("readgroup")
var verbosity: Option[Int] = config("verbosity", default = 2)
var logfile: String = config("logfile")
executable = config("exe", default = "stampy.py", freeVar = false)
override val versionRegex = """stampy v(.*) \(.*\), .*""".r
override val versionExitcode = List(0, 1)
/// Stampy uses approx factor 1.1 times the size of the genome in memory.
override val defaultVmem = "4G"
override val defaultThreads = 8
override def versionCommand = executable + " --help"
def cmdLine : String = {
var cmd: String = required(executable) +
optional("-t", nCoresRequest) +
conditional(solexa, "--solexa") +
conditional(solexaold, "--solexaold") +
conditional(sanger, "--sanger") +
optional("--insertsize", insertsize) +
optional("--insertsd", insertsd)
// Optionally start Mate Pair alignment, if set, the aligner will
// assign MP reads as MP, otherwise in PE mode, these reads will
// be aligned with the bits RR or FF showing a False Inversion event
if ( insertsd2.getOrElse(-1) != -1 ) {
cmd += optional("--insertsize2", insertsize2) +
optional("--insertsd2", insertsd2)
}
cmd += conditional(sensitive, "--sensitive") +
conditional(fast, "--fast") +
optional("--readgroup", readgroup) +
optional("-v", verbosity) +
optional("--logfile", logfile) +
" -g " + required(genome) +
" -h " + required(hash) +
" -o " + required(output) +
" -M " + required(R1) + optional(R2)
return cmd
}
}
package nl.lumc.sasc.biopet.extensions.aligners
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import java.io.File
class TopHat(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "FastQ file R1", shortName = "R1")
var R1: File = _
@Input(doc = "FastQ file R2", shortName = "R2", required = false)
var R2: File = _
@Input(doc = "Bowtie index", shortName = "bti")
var bowtie_index: File = config("bowtie_index", required = true)
@Argument(doc = "Output Directory")
var outputDir: String = _
@Output(doc = "Output file SAM", shortName = "output")
var output: File = _
// options set via API or config
// var numrecords: String = config("numrecords", default = "all")
// var solexa: Boolean = config("solexa", default = false)
// var solexaold: Boolean = config("solexaold", default = false)
// var sanger: Boolean = config("sanger", default = false)
//
// var insertsize: Option[Int] = config("insertsize", default = 250)
// var insertsd: Option[Int] = config("insertsd", default = 60)
// var insertsize2: Option[Int] = config("insertsize2", default = -2000)
// var insertsd2: Option[Int] = config("insertsd2", default = -1)
//
// var sensitive: Boolean = config("sensitive", default = false)
// var fast: Boolean = config("fast", default = false)
//
// var readgroup: String = config("readgroup")
// var verbosity: Option[Int] = config("verbosity", default = 2)
// var logfile: String = config("logfile")
executable = config("exe", default = "tophat", freeVar = false)
override val versionRegex = """TopHat v(.*)""".r
override val versionExitcode = List(0, 1)
override val defaultVmem = "4G"
override val defaultThreads = 8
override def versionCommand = executable + " --version"
override def afterGraph() {
if (!outputDir.endsWith("/")) outputDir += "/"
output = new File(outputDir + "accepted_hits.bam")
}
def cmdLine: String = {
var cmd: String = required(executable) +
optional("-p", nCoresRequest) +
"--no-convert-bam" +
required(bowtie_index) +
required(R1) + optional(R2)
return cmd
}
}
package nl.lumc.sasc.biopet.extensions.svcallers
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import org.broadinstitute.gatk.queue.QScript
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import java.io.File
class BreakdancerConfig(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "bam2cfg.pl", freeVar = false)
@Input(doc = "Bam File")
var input: File = _
@Output(doc = "Output File")
var output: File = _
var MIN_MQ: Option[Int] = config("min_mq", default = 20) // minimum of MQ to consider for taking read into histogram
var USE_MQ: Boolean = config("use_mq", default = false)
var MIN_INSERTSIZE: Option[Int] = config("min_insertsize", default = 450)
var SOLID_DATA: Boolean = config("solid", default = false)
var SD_CUTOFF: Option[Int] = config("sd_cutoff", default = 4) // Cutoff in unit of standard deviation [4]
// we set this to a higher number to avoid biases in small numbers in sorted bams
var MIN_OBSERVATIONS: Option[Int] = config("min_observations", default = 1000000) // Number of observation required to estimate mean and s.d. insert size [10_000]
var COEFVAR_CUTOFF: Option[Int] = config("coef_cutoff", default = 1) // Cutoff on coefficients of variation [1]
var HISTOGRAM_BINS: Option[Int] = config("histogram_bins", default = 50) // Number of bins in the histogram [50]
def cmdLine = required(executable) +
optional("-q", MIN_MQ) +
conditional(USE_MQ, "-m") +
optional("-s", MIN_INSERTSIZE) +
conditional(SOLID_DATA, "-s") +
optional("-c", SD_CUTOFF) +
optional("-n", MIN_OBSERVATIONS) +
optional("-v", COEFVAR_CUTOFF) +
optional("-b", HISTOGRAM_BINS)
required(input) + " > " + required(output)
}
object BreakdancerConfig {
def apply(root: Configurable, input: File, output: File): BreakdancerConfig = {
val bdconf = new BreakdancerConfig(root)
bdconf.input = input
bdconf.output = output
return bdconf
}
def apply(root: Configurable, input: File, outputDir: String): BreakdancerConfig = {
val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/"
val outputFile = new File(dir + swapExtension(input.getName))
return apply(root, input, outputFile)
}
def apply(root: Configurable, input: File): BreakdancerConfig = {
return apply(root, input, new File(swapExtension(input.getAbsolutePath)))
}
private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".breakdancer.cfg"
}
class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "breakdancer-max", freeVar = false)
override val defaultVmem = "4G"
override val defaultThreads = 8
override val versionRegex = """.*[Vv]ersion:? (.*)""".r
// override val versionExitcode = List(0, 1)
override def versionCommand = executable
@Input(doc = "Input file (bam)")
var input: File = _
@Argument(doc = "Work directory")
var workdir: String = _
@Output(doc = "Breakdancer VCF output")
var output: File = _
// var T: Option[Int] = config("T", default = defaultThreads)
var f: Boolean = config("f", default = true) // delete work directory before running
// var w: String = config("w", default = workdir + "/work")
var a: Boolean = config("a", default = false) // don't recompute AS tags
var k: Boolean = config("k", default = false) // keep working directory
var r: Boolean = config("r", default = false) // take read groups into account
override def beforeCmd {
if (workdir == null) throw new Exception("Breakdancer :: Workdirectory is not defined")
// if (input.getName.endsWith(".sort.bam")) sorted = true
}
def cmdLine = required(executable) +
required(input) + ">" + required(output)
}
object BreakdancerCaller {
def apply(root: Configurable, input: File, output: File): BreakdancerCaller = {
val bdcaller = new BreakdancerCaller(root)
bdcaller.input = input
bdcaller.output = output
return bdcaller
}
}
/// Breakdancer is actually a mini pipeline executing binaries from the breakdancer package
class Breakdancer(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
@Input(doc = "Input file (bam)")
var input: File = _
@Input(doc = "Reference Fasta file")
var reference: File = _
@Argument(doc = "Work directory")
var workdir: String = _
@Output(doc = "Breakdancer VCF output")
var output: File = _
override def init() {
}
def biopetScript() {
// write the pipeline here
// start with QC, alignment, call sambamba, call sv callers, reporting
// read config and set all parameters for the pipeline
logger.info("Starting Breakdancer")
val bdcfg = BreakdancerConfig(this, input, workdir)
outputFiles += ("breakdancer_cfg" -> bdcfg.output )
add( bdcfg )
val output_vcf: File = new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.tsv")
val breakdancer = BreakdancerCaller( this, input, output_vcf )
// convert this tsv to vcf using the python script
}
private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".breakdancer.tsv"
}
object Breakdancer {
def apply(root: Configurable, input: File, reference: File, runDir: String): Breakdancer = {
val breakdancer = new Breakdancer(root)
breakdancer.input = input
breakdancer.reference = reference
breakdancer.workdir = runDir
return breakdancer
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.svcallers
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import org.broadinstitute.gatk.queue.QScript
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.PipelineCommand
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import java.io.File
class Clever(val root: Configurable) extends BiopetCommandLineFunction {
executable = config("exe", default = "clever")
private lazy val versionexecutable: File = config("version_exe", default = (new File(executable).getParent + "/ctk-version"))
override val defaultVmem = "4G"
override val defaultThreads = 8
override def versionCommand = versionexecutable.getAbsolutePath
override val versionRegex = """(.*)""".r
@Input(doc = "Input file (bam)")
var input: File = _
@Input(doc = "Reference")
var reference: File = _
@Argument(doc = "Work directory")
var workdir: String = _
@Output(doc = "Clever VCF output")
lazy val outputvcf: File = {
new File(workdir + "predictions.vcf")
}
@Output(doc = "Clever raw output")
lazy val outputraw: File = {
new File(workdir + "predictions.raw.txt")
}
// var T: Option[Int] = config("T", default = defaultThreads)
var f: Boolean = config("f", default = true) // delete work directory before running
// var w: String = config("w", default = workdir + "/work")
var a: Boolean = config("a", default = false) // don't recompute AS tags
var k: Boolean = config("k", default = false) // keep working directory
var r: Boolean = config("r", default = false) // take read groups into account
override def beforeCmd {
if (workdir == null) throw new Exception("Clever :: Workdirectory is not defined")
// if (input.getName.endsWith(".sort.bam")) sorted = true
}