Commit 8337024e authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'feature-mergesvcalls' into 'develop'

Feature mergesvcalls

This branch will introduce the SV-merging tool for the VCF files generated by SV-callers in the ShivaSVcalling pipeline.

We now use the PySVtools made by @wyleung 

closes #236

See merge request !301
parents 54aeb54f c34c6160
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline._
/**
* Created by wyleung on 8-1-16.
*/
class Pysvtools(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input file", required = true)
var input: List[File] = Nil
@Argument(doc = "Set flanking amount")
var flanking: Option[Int] = config("flanking")
var exclusionRegions: List[File] = config("exclusion_regions")
var translocationsOnly: Boolean = config("translocations_only", default = false)
@Output(doc = "Unzipped file", required = true)
var output: File = _
var tsvoutput: File = _
var bedoutput: File = _
var regionsoutput: File = _
executable = config("exe", default = "vcf_merge_sv_events")
def versionRegex = """PySVtools (.*)""".r
def versionCommand = executable + " --version"
override def defaultThreads = 2
override def beforeGraph(): Unit = {
// TODO: we might want to validate the VCF before we start to tool? or is this a responsibility of the tool itself?
if (input.isEmpty) {
Logging.addError("No input VCF is given")
}
// redefine the tsv, bed and regions output
val outputNamePrefix = output.getAbsolutePath.stripSuffix(".vcf")
tsvoutput = new File(outputNamePrefix + ".tsv")
bedoutput = new File(outputNamePrefix + ".bed")
regionsoutput = new File(outputNamePrefix + ".regions.bed")
}
/** return commandline to execute */
def cmdLine = required(executable) +
repeat("-c", input) +
optional("-f", flanking) +
"-i " + repeat(input) +
"-o " + required(tsvoutput) +
"-b " + required(bedoutput) +
"-v " + required(output) +
"-r " + required(regionsoutput)
}
\ No newline at end of file
......@@ -17,9 +17,10 @@ package nl.lumc.sasc.biopet.pipelines.shiva
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.Pysvtools
import nl.lumc.sasc.biopet.pipelines.shiva.svcallers._
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
import org.broadinstitute.gatk.queue.QScript
/**
......@@ -32,6 +33,9 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
def this() = this(null)
var outputMergedVCFbySample: Map[String, File] = Map()
var outputMergedVCF: File = _
@Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
protected[shiva] var inputBamsArg: List[File] = Nil
......@@ -40,6 +44,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
/** Executed before script */
def init(): Unit = {
if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
outputMergedVCF = new File(outputDir, "allsamples.merged.vcf")
}
/** Variantcallers requested by the config */
......@@ -55,7 +60,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
val callers = callersList.filter(x => configCallers.contains(x.name))
require(inputBams.nonEmpty, "No input bams found")
require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))
require(callers.nonEmpty, "Please select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))
callers.foreach { caller =>
caller.inputBams = inputBams
......@@ -63,6 +68,30 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
add(caller)
}
// merge VCF by sample
for ((sample, bamFile) <- inputBams) {
var sampleVCFS: List[Option[File]] = List.empty
callers.foreach { caller =>
sampleVCFS ::= caller.outputVCF(sample)
}
val mergeSVcalls = new Pysvtools(this)
mergeSVcalls.input = sampleVCFS.flatten
mergeSVcalls.output = new File(outputDir, sample + ".merged.vcf")
add(mergeSVcalls)
outputMergedVCFbySample += (sample -> mergeSVcalls.output)
}
// merge all files from all samples in project
val mergeSVcallsProject = new Pysvtools(this)
mergeSVcallsProject.input = outputMergedVCFbySample.values.toList
mergeSVcallsProject.output = outputMergedVCF
add(mergeSVcallsProject)
// merging the VCF calls by project
// basicly this will do all samples from this pipeline run
// group by "tags"
// sample tagging is however not available within this pipeline
addSummaryJobs()
}
......@@ -79,7 +108,10 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript
def summaryFiles: Map[String, File] = {
val callers: Set[String] = configCallers
//callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
Map()
Map(
"final_mergedvcf" -> outputMergedVCF
)
}
}
......
......@@ -18,6 +18,8 @@ class Breakdancer(val root: Configurable) extends SvCaller {
val breakdancer = BreakdancerCaller(this, bdcfg.output, new File(breakdancerSampleDir, sample + ".breakdancer.tsv"))
val bdvcf = BreakdancerVCF(this, breakdancer.output, new File(breakdancerSampleDir, sample + ".breakdancer.vcf"))
add(bdcfg, breakdancer, bdvcf)
addVCF(sample, bdvcf.output)
}
}
}
......@@ -13,6 +13,8 @@ class Clever(val root: Configurable) extends SvCaller {
val cleverDir = new File(outputDir, sample)
val clever = CleverCaller(this, bamFile, cleverDir)
add(clever)
addVCF(sample, clever.outputvcf)
}
}
}
......@@ -4,7 +4,7 @@ import nl.lumc.sasc.biopet.extensions.delly.DellyCaller
import nl.lumc.sasc.biopet.extensions.gatk.CatVariants
import nl.lumc.sasc.biopet.utils.config.Configurable
/** Script for sv caler delly */
/** Script for sv caller delly */
class Delly(val root: Configurable) extends SvCaller {
def name = "delly"
......@@ -20,7 +20,6 @@ class Delly(val root: Configurable) extends SvCaller {
val catVariants = new CatVariants(this)
catVariants.outputFile = new File(dellyDir, sample + ".delly.vcf.gz")
/// start delly and then copy the vcf into the root directory "<sample>.delly/"
if (del) {
val delly = new DellyCaller(this)
delly.input = bamFile
......@@ -57,6 +56,7 @@ class Delly(val root: Configurable) extends SvCaller {
require(catVariants.inputFiles.nonEmpty, "Must atleast 1 SV-type be selected for Delly")
add(catVariants)
addVCF(sample, catVariants.outputFile)
}
}
}
......@@ -57,6 +57,8 @@ class Pindel(val root: Configurable) extends SvCaller {
pindelVcf.rDate = todayformat.format(today) // officially, we should enter the date of the genome here
pindelVcf.outputVCF = new File(pindelDir, s"${sample}.pindel.vcf")
add(pindelVcf)
addVCF(sample, pindelVcf.outputVCF)
}
}
......
......@@ -13,7 +13,20 @@ trait SvCaller extends QScript with BiopetQScript with Reference {
var namePrefix: String = _
var inputBams: Map[String, File] = _
var inputBams: Map[String, File] = Map.empty
def outputVCF(sample: String): Option[File] = {
outputVCFs.get(sample) match {
case Some(file) => Some(file)
case _ => None
}
}
protected var outputVCFs: Map[String, File] = Map.empty
protected def addVCF(sampleId: String, outputVCF: File) = {
outputVCFs += (sampleId -> outputVCF)
}
def init() = {}
}
......@@ -217,6 +217,10 @@ object ShivaSvCallingTest {
"pindelvcf" -> Map("exe" -> "test"),
"clever" -> Map("exe" -> "test"),
"delly" -> Map("exe" -> "test"),
"varscan_jar" -> "test"
"varscan_jar" -> "test",
"pysvtools" -> Map(
"exe" -> "test",
"exclusion_regions" -> "test",
"translocations_only" -> false)
)
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment