Commit d980652c authored by wyleung's avatar wyleung
Browse files

Parts for the SV calling pipeline, formatting and adding breakdancer as subpipeline

parent 60c0ac17
......@@ -24,25 +24,24 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
@Output(doc = "Output file SAM", shortName = "output")
var output: File = _
// options set via API or config
// var numrecords: String = config("numrecords", default = "all")
// var numrecords: String = config("numrecords", default = "all")
var solexa: Boolean = config("solexa", default = false)
var solexaold: Boolean = config("solexaold", default = false)
var sanger: Boolean = config("sanger", default = false)
var insertsize: Option[Int] = config("insertsize", default = 250)
var insertsd: Option[Int] = config("insertsd", default = 60)
var insertsize2: Option[Int] = config("insertsize2", default = -2000)
var insertsd2: Option[Int] = config("insertsd2", default = -1)
var sensitive: Boolean = config("sensitive", default = false)
var fast: Boolean = config("fast", default = false)
var readgroup: String = config("readgroup")
var verbosity: Option[Int] = config("verbosity", default = 2)
var logfile: String = config("logfile")
executable = config("exe", default = "stampy.py", freeVar = false)
override val versionRegex = """stampy v(.*) \(.*\), .*""".r
override val versionExitcode = List(0, 1)
......@@ -52,33 +51,33 @@ class Stampy(val root: Configurable) extends BiopetCommandLineFunction {
override val defaultThreads = 8
override def versionCommand = executable + " --help"
def cmdLine : String = {
def cmdLine: String = {
var cmd: String = required(executable) +
optional("-t", nCoresRequest) +
conditional(solexa, "--solexa") +
conditional(solexaold, "--solexaold") +
conditional(sanger, "--sanger") +
optional("--insertsize", insertsize) +
optional("--insertsd", insertsd)
// Optionally start Mate Pair alignment, if set, the aligner will
// assign MP reads as MP, otherwise in PE mode, these reads will
optional("-t", nCoresRequest) +
conditional(solexa, "--solexa") +
conditional(solexaold, "--solexaold") +
conditional(sanger, "--sanger") +
optional("--insertsize", insertsize) +
optional("--insertsd", insertsd)
// Optionally start Mate Pair alignment, if set, the aligner will
// assign MP reads as MP, otherwise in PE mode, these reads will
// be aligned with the bits RR or FF showing a False Inversion event
if ( insertsd2.getOrElse(-1) != -1 ) {
if (insertsd2.getOrElse(-1) != -1) {
cmd += optional("--insertsize2", insertsize2) +
optional("--insertsd2", insertsd2)
optional("--insertsd2", insertsd2)
}
cmd += conditional(sensitive, "--sensitive") +
conditional(fast, "--fast") +
optional("--readgroup", readgroup) +
optional("-v", verbosity) +
optional("--logfile", logfile) +
" -g " + required(genome) +
" -h " + required(hash) +
" -o " + required(output) +
" -M " + required(R1) + optional(R2)
conditional(fast, "--fast") +
optional("--readgroup", readgroup) +
optional("-v", verbosity) +
optional("--logfile", logfile) +
" -g " + required(genome) +
" -h " + required(hash) +
" -o " + required(output) +
" -M " + required(R1) + optional(R2)
return cmd
}
}
......@@ -3,6 +3,7 @@ package nl.lumc.sasc.biopet.extensions.svcallers
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import org.broadinstitute.gatk.queue.QScript
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
import java.io.File
......@@ -63,7 +64,10 @@ object BreakdancerConfig {
/*
* The caller
*
**/
class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunction {
......@@ -77,7 +81,7 @@ class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunctio
override def versionCommand = executable
@Input(doc = "Input file (bam)")
@Input(doc = "The breakdancer configuration file")
var input: File = _
@Argument(doc = "Work directory")
......@@ -86,12 +90,39 @@ class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunctio
@Output(doc = "Breakdancer VCF output")
var output: File = _
// var T: Option[Int] = config("T", default = defaultThreads)
var f: Boolean = config("f", default = true) // delete work directory before running
// var w: String = config("w", default = workdir + "/work")
var a: Boolean = config("a", default = false) // don't recompute AS tags
var k: Boolean = config("k", default = false) // keep working directory
var r: Boolean = config("r", default = false) // take read groups into account
/*
Options:
-o STRING operate on a single chromosome [all chromosome]
-s INT minimum length of a region [7]
-c INT cutoff in unit of standard deviation [3]
-m INT maximum SV size [1000000000]
-q INT minimum alternative mapping quality [35]
-r INT minimum number of read pairs required to establish a connection [2]
-x INT maximum threshold of haploid sequence coverage for regions to be ignored [1000]
-b INT buffer size for building connection [100]
-t only detect transchromosomal rearrangement, by default off
-d STRING prefix of fastq files that SV supporting reads will be saved by library
-g STRING dump SVs and supporting reads in BED format for GBrowse
-l analyze Illumina long insert (mate-pair) library
-a print out copy number and support reads per library rather than per bam, by default off
-h print out Allele Frequency column, by default off
-y INT output score filter [30]
*/
var s: Option[Int] = config("s", default = 7)
var c: Option[Int] = config("c", default = 3)
var m: Option[Int] = config("m", default = 1000000000)
var q: Option[Int] = config("qs", default = 35)
var r: Option[Int] = config("r", default = 2)
var x: Option[Int] = config("x", default = 1000)
var b: Option[Int] = config("b", default = 100)
var t: Boolean = config("t", default = false)
var d: String = config("d")
var g: String = config("g")
var l: Boolean = config("l", default = false)
var a: Boolean = config("a", default = false)
var h: Boolean = config("h", default = false)
var y: Option[Int] = config("y", default = 30)
override def beforeCmd {
if (workdir == null) throw new Exception("Breakdancer :: Workdirectory is not defined")
......@@ -99,7 +130,23 @@ class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunctio
}
def cmdLine = required(executable) +
required(input) + ">" + required(output)
optional("-s", s) +
optional("-c", c) +
optional("-m", m) +
optional("-q", q) +
optional("-r", r) +
optional("-x", x) +
optional("-b", b) +
conditional(t ,"-t") +
optional("-d", d) +
optional("-g", g) +
conditional(l ,"-l") +
conditional(a ,"-a") +
conditional(h ,"-h") +
optional("-y", y) +
required(input) +
">" +
required(output)
}
object BreakdancerCaller {
......@@ -124,40 +171,49 @@ class Breakdancer(val root: Configurable) extends QScript with BiopetQScript {
@Argument(doc = "Work directory")
var workdir: String = _
@Output(doc = "Breakdancer VCF output")
var output: File = _
// @Output(doc = "Breakdancer VCF output")
// lazy val outputvcf: File = {
// new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.vcf")
// }
@Output(doc = "Breakdancer raw output")
lazy val outputraw: File = {
new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.tsv")
}
override def init() {
}
def biopetScript() {
// write the pipeline here
// start with QC, alignment, call sambamba, call sv callers, reporting
// read config and set all parameters for the pipeline
logger.info("Starting Breakdancer")
logger.info("Starting Breakdancer configuration")
val bdcfg = BreakdancerConfig(this, input, workdir)
outputFiles += ("breakdancer_cfg" -> bdcfg.output )
add( bdcfg )
val output_vcf: File = new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.tsv")
val breakdancer = BreakdancerCaller( this, input, output_vcf )
val output_tsv: File = this.outputraw
val breakdancer = BreakdancerCaller( this, bdcfg.output, output_tsv )
add( breakdancer )
outputFiles += ("breakdancer_tsv" -> breakdancer.output )
// val output_vcf: File = this.outputvcf
// convert this tsv to vcf using the python script
}
private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".breakdancer.tsv"
// private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".breakdancer.tsv"
}
object Breakdancer {
def apply(root: Configurable, input: File, reference: File, runDir: String): Breakdancer = {
object Breakdancer extends PipelineCommand {
def apply(root: Configurable, input: File, reference: File, runDir: String): Breakdancer = {
val breakdancer = new Breakdancer(root)
breakdancer.input = input
breakdancer.reference = reference
breakdancer.workdir = runDir
breakdancer.init
breakdancer.biopetScript
return breakdancer
}
}
\ No newline at end of file
......@@ -29,10 +29,12 @@ class Clever(val root: Configurable) extends BiopetCommandLineFunction {
@Argument(doc = "Work directory")
var workdir: String = _
var cwd: String = _
@Output(doc = "Clever VCF output")
lazy val outputvcf: File = {
new File(workdir + "predictions.vcf")
new File(cwd + "predictions.vcf")
}
@Output(doc = "Clever raw output")
......@@ -66,10 +68,11 @@ class Clever(val root: Configurable) extends BiopetCommandLineFunction {
}
object Clever {
def apply(root: Configurable, input: File, reference: File, runDir: String): Clever = {
def apply(root: Configurable, input: File, reference: File, svDir: String, runDir: String): Clever = {
val clever = new Clever(root)
clever.input = input
clever.reference = reference
clever.cwd = svDir
clever.workdir = runDir
return clever
}
......@@ -89,6 +92,10 @@ class CleverPipeline(val root: Configurable) extends QScript with BiopetQScript
@Argument(doc = "Work directory")
var workdir: String = _
@Argument(doc = "Current working directory")
var cwd: String = _
override def init() {
}
......@@ -97,7 +104,7 @@ class CleverPipeline(val root: Configurable) extends QScript with BiopetQScript
logger.info("Starting Clever Pipeline")
/// start clever and then copy the vcf into the root directory "<sample>.clever/"
val clever = Clever(this, input, reference, workdir )
val clever = Clever(this, input, reference, cwd, workdir )
outputFiles += ("clever_vcf" -> clever.outputvcf )
add( clever )
}
......
......@@ -18,15 +18,12 @@ import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function._
import org.broadinstitute.gatk.queue.engine.JobRunInfo
class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
def this() = this(null)
var reference: File = _
var finalBamFiles: List[File] = Nil
class LibraryOutput extends AbstractLibraryOutput {
var mappedBamFile: File = _
}
......@@ -36,7 +33,6 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
var mappedBamFile: File = _
}
override def init() {
for (file <- configfiles) globalConfig.loadConfigFile(file)
reference = config("reference", required = true)
......@@ -53,14 +49,14 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
// read config and set all parameters for the pipeline
logger.info("Starting YAM SV Pipeline")
runSamplesJobs
//
//
}
override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) {
logger.info("YAM SV Pipeline has run ...............................................................")
}
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
......@@ -88,20 +84,20 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
// val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"
val cleverDir = svcallingDir + sampleID + ".clever/"
val clever = Clever(this, bamFile, this.reference, cleverDir)
val clever = Clever(this, bamFile, this.reference, svcallingDir, cleverDir)
sampleOutput.vcf += ("clever" -> List(clever.outputvcf))
add(clever)
// val breakdancerDir = sampleDir + sampleID + ".breakdancer/"
// val breakdancer = Breakdancer(this, bamFile, this.reference, breakdancerDir )
// outputFiles += ("breakdancer_vcf" -> List(breakdancer.output) )
// addAll(breakdancer.functions)
val breakdancerDir = sampleDir + sampleID + ".breakdancer/"
val breakdancer = Breakdancer(this, bamFile, this.reference, breakdancerDir)
sampleOutput.vcf += ("breakdancer" -> List(breakdancer.outputraw))
addAll(breakdancer.functions)
return sampleOutput
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
val libraryOutput = new LibraryOutput
var outputFiles: Map[String, File] = Map()
......@@ -111,13 +107,13 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
val runDir: String = alignmentDir + "run_" + runID + "/"
if (runConfig.contains("R1")) {
// val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
// val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
val mapping = new Mapping(this)
mapping.defaultAligner = "stampy"
mapping.skipFlexiprep = false
mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba
if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
mapping.paired = (mapping.input_R2 != null)
......@@ -133,16 +129,14 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
addAll(mapping.functions)
// start sambamba dedup
// outputFiles += ("final_bam" -> mapping.outputFiles("finalBamFile"))
// outputFiles += ("final_bam" -> mapping.outputFiles("finalBamFile"))
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
return libraryOutput
// logger.debug(outputFiles)
// return outputFiles
// logger.debug(outputFiles)
// return outputFiles
}
}
object Yamsvp extends PipelineCommand {
override val pipeline = "/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.class"
}
object Yamsvp extends PipelineCommand
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment