Commit 9b41313c authored by bow's avatar bow
Browse files

Merge branch 'feature-yamsvp' into 'develop'

Feature yamsvp

Convert yamsvp to also work from a bam file and be included in Shiva

See merge request !201
parents 7a2b2219 05a50457
......@@ -179,7 +179,7 @@ trait BastyTrait extends MultiSampleQScript {
snpsOnly: Boolean = false): FastaOutput = {
val bastyGenerateFasta = new BastyGenerateFasta(this)
bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
bastyGenerateFasta.inputVcf = shiva.variantcalling.get.finalFile
bastyGenerateFasta.inputVcf = shiva.variantCalling.get.finalFile
if (shiva.samples.contains(sampleName)) {
bastyGenerateFasta.bamFile = shiva.samples(sampleName).preProcessBam.get
}
......
......@@ -18,38 +18,35 @@ package nl.lumc.sasc.biopet.extensions.breakdancer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript, PipelineCommand }
import org.broadinstitute.gatk.queue.QScript
/// Breakdancer is actually a mini pipeline executing binaries from the breakdancer package
class Breakdancer(val root: Configurable) extends QScript with BiopetQScript {
class Breakdancer(val root: Configurable) extends QScript with BiopetQScript with Reference {
def this() = this(null)
@Input(doc = "Input file (bam)")
var input: File = _
@Input(doc = "Reference Fasta file")
var reference: File = _
@Argument(doc = "Work directory")
var workdir: String = _
var workDir: File = _
var deps: List[File] = Nil
@Output(doc = "Breakdancer config")
lazy val configfile: File = {
new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.cfg")
new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.cfg")
}
@Output(doc = "Breakdancer raw output")
lazy val outputraw: File = {
new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.tsv")
new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.tsv")
}
@Output(doc = "Breakdancer VCF output")
lazy val outputvcf: File = {
new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.vcf")
new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.vcf")
}
override def init() {
override def init(): Unit = {
}
def biopetScript() {
......@@ -72,11 +69,10 @@ class Breakdancer(val root: Configurable) extends QScript with BiopetQScript {
}
object Breakdancer extends PipelineCommand {
def apply(root: Configurable, input: File, reference: File, runDir: String): Breakdancer = {
def apply(root: Configurable, input: File, runDir: File): Breakdancer = {
val breakdancer = new Breakdancer(root)
breakdancer.input = input
breakdancer.reference = reference
breakdancer.workdir = runDir
breakdancer.workDir = runDir
breakdancer.init()
breakdancer.biopetScript()
breakdancer
......
......@@ -51,8 +51,8 @@ class BreakdancerCaller(val root: Configurable) extends BiopetCommandLineFunctio
var x: Option[Int] = config("x", default = 1000)
var b: Option[Int] = config("b", default = 100)
var t: Boolean = config("t", default = false)
var d: String = config("d")
var g: String = config("g")
var d: Option[String] = config("d")
var g: Option[String] = config("g")
var l: Boolean = config("l", default = false)
var a: Boolean = config("a", default = false)
var h: Boolean = config("h", default = false)
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.clever
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import org.broadinstitute.gatk.queue.QScript
class Clever(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
@Input(doc = "Input file (bam)")
var input: File = _
@Input(doc = "Reference")
var reference: File = _
@Argument(doc = "Work directory")
var workdir: String = _
@Argument(doc = "Current working directory")
var cwd: String = _
override def init() {
}
def biopetScript() {
// write the pipeline here
logger.info("Starting Clever Pipeline")
/// start clever and then copy the vcf into the root directory "<sample>.clever/"
val clever = CleverCaller(this, input, reference, cwd, workdir)
outputFiles += ("clever_vcf" -> clever.outputvcf)
add(clever)
}
}
object Clever extends PipelineCommand {
override val pipeline = "/nl/lumc/sasc/biopet/extensions/svcallers/Clever/Clever.class"
def apply(root: Configurable, input: File, runDir: String): Clever = {
val cleverpipeline = new Clever(root)
cleverpipeline.input = input
cleverpipeline.workdir = runDir
cleverpipeline.init()
cleverpipeline.biopetScript()
cleverpipeline
}
}
\ No newline at end of file
......@@ -2,18 +2,19 @@ package nl.lumc.sasc.biopet.extensions.clever
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class CleverCaller(val root: Configurable) extends BiopetCommandLineFunction {
class CleverCaller(val root: Configurable) extends BiopetCommandLineFunction with Reference {
executable = config("exe", default = "clever")
private lazy val versionexecutable: File = config("version_exe", default = new File(executable).getParent + "/ctk-version")
private lazy val versionExecutable: File = config("version_exe", default = new File(executable).getParent + "/ctk-version")
override def defaultThreads = 8
override def defaultCoreMemory = 3.0
override def versionCommand = versionexecutable.getAbsolutePath
override def versionCommand = versionExecutable.getAbsolutePath
override def versionRegex = """(.*)""".r
override def versionExitcode = List(0, 1)
......@@ -23,19 +24,17 @@ class CleverCaller(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Reference")
var reference: File = _
@Argument(doc = "Work directory")
var workdir: String = _
var cwd: String = _
protected def workDir: File = new File(cwd, "work")
var cwd: File = _
@Output(doc = "Clever VCF output")
lazy val outputvcf: File = {
new File(cwd + "predictions.vcf")
new File(cwd, "predictions.vcf")
}
@Output(doc = "Clever raw output")
lazy val outputraw: File = {
new File(workdir + "predictions.raw.txt")
new File(workDir, "predictions.raw.txt")
}
// var T: Option[Int] = config("T", default = defaultThreads)
......@@ -45,31 +44,30 @@ class CleverCaller(val root: Configurable) extends BiopetCommandLineFunction {
var k: Boolean = config("k", default = false) // keep working directory
var r: Boolean = config("r", default = false) // take read groups into account
override def beforeCmd() {
if (workdir == null) throw new Exception("Clever :: Workdirectory is not defined")
// if (input.getName.endsWith(".sort.bam")) sorted = true
override def beforeGraph() {
super.beforeGraph()
if (workDir == null) throw new Exception("Clever :: Workdirectory is not defined")
if (reference == null) reference = referenceFasta()
}
def cmdLine = required(executable) +
" --sorted " +
" --use_xa " +
optional("-T", nCoresRequest) +
optional("-T", threads) +
conditional(f, "-f") +
conditional(a, "-a") +
conditional(k, "-k") +
conditional(r, "-r") +
required(this.input) +
required(this.reference) +
required(this.workdir)
required(input) +
required(reference) +
required(workDir)
}
object CleverCaller {
def apply(root: Configurable, input: File, reference: File, svDir: String, runDir: String): CleverCaller = {
def apply(root: Configurable, input: File, svDir: File): CleverCaller = {
val clever = new CleverCaller(root)
clever.input = input
clever.reference = reference
clever.cwd = svDir
clever.workdir = runDir
clever
}
}
\ No newline at end of file
......@@ -2,33 +2,34 @@ package nl.lumc.sasc.biopet.extensions.delly
import java.io.File
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.Ln
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.extensions.gatk.CatVariants
class Delly(val root: Configurable) extends QScript with BiopetQScript {
class Delly(val root: Configurable) extends QScript with BiopetQScript with Reference {
def this() = this(null)
@Input(doc = "Input file (bam)")
var input: File = _
@Argument(doc = "Work directory")
var workdir: String = _
var workDir: File = _
@Output(doc = "Delly result VCF")
var outputvcf: File = _
var outputVcf: File = _
var outputBaseName: String = _
var outputName: String = _
// select the analysis types DEL,DUP,INV,TRA
var del: Boolean = config("DEL", default = false)
var dup: Boolean = config("DUP", default = false)
var inv: Boolean = config("INV", default = false)
var tra: Boolean = config("TRA", default = false)
override def init() {
var del: Boolean = config("DEL", default = true)
var dup: Boolean = config("DUP", default = true)
var inv: Boolean = config("INV", default = true)
var tra: Boolean = config("TRA", default = true)
override def init(): Unit = {
if (outputName == null) outputName = input.getName.stripSuffix(".bam")
if (outputVcf == null) outputVcf = new File(workDir, outputName + ".delly.vcf")
}
def biopetScript() {
......@@ -37,15 +38,12 @@ class Delly(val root: Configurable) extends QScript with BiopetQScript {
var outputFiles: Map[String, File] = Map()
var vcfFiles: Map[String, File] = Map()
this.outputBaseName = workdir + input.getName.substring(0, input.getName.lastIndexOf(".bam"))
this.outputvcf = outputBaseName + ".delly.vcf"
/// start delly and then copy the vcf into the root directory "<sample>.delly/"
if (del) {
val delly = new DellyCaller(this)
delly.input = input
delly.analysistype = "DEL"
delly.outputvcf = outputBaseName + ".delly.del.vcf"
delly.outputvcf = new File(workDir, outputName + ".delly.del.vcf")
add(delly)
vcfFiles += ("DEL" -> delly.outputvcf)
}
......@@ -53,7 +51,7 @@ class Delly(val root: Configurable) extends QScript with BiopetQScript {
val delly = new DellyCaller(this)
delly.input = input
delly.analysistype = "DUP"
delly.outputvcf = outputBaseName + ".delly.dup.vcf"
delly.outputvcf = new File(workDir, outputName + ".delly.dup.vcf")
add(delly)
vcfFiles += ("DUP" -> delly.outputvcf)
}
......@@ -61,7 +59,7 @@ class Delly(val root: Configurable) extends QScript with BiopetQScript {
val delly = new DellyCaller(this)
delly.input = input
delly.analysistype = "INV"
delly.outputvcf = outputBaseName + ".delly.inv.vcf"
delly.outputvcf = new File(workDir, outputName + ".delly.inv.vcf")
add(delly)
vcfFiles += ("INV" -> delly.outputvcf)
}
......@@ -69,47 +67,43 @@ class Delly(val root: Configurable) extends QScript with BiopetQScript {
val delly = new DellyCaller(this)
delly.input = input
delly.analysistype = "TRA"
delly.outputvcf = outputBaseName + ".delly.tra.vcf"
delly.outputvcf = new File(workDir, outputName + ".delly.tra.vcf")
// vcfFiles += ("TRA" -> delly.outputvcf)
add(delly)
}
// we need to merge the vcf's
var finalVCF = if (vcfFiles.size > 1) {
val finalVCF = if (vcfFiles.size > 1) {
// do merging
// CatVariants is a $org.broadinstitute.gatk.utils.commandline.CommandLineProgram$;
//TODO: convert to biopet extension
val variants = new CatVariants()
variants.jobResourceRequests :+= "h_vmem=4G"
variants.nCoresRequest = 1
variants.memoryLimit = Option(2.0)
variants.variant = vcfFiles.values.toList
variants.reference = config("reference")
variants.outputFile = this.outputvcf
variants.outputFile = this.outputVcf
variants.reference = referenceFasta()
// add the job
add(variants)
this.outputvcf
} else {
//add(variants)
Some(outputVcf)
} else if (vcfFiles.size == 1) {
// TODO: pretify this
val ln = Ln(this, vcfFiles.head._2, this.outputvcf, relative = true)
add(ln)
ln.output
}
val ln = Ln(this, vcfFiles.head._2, this.outputVcf, relative = true)
//add(ln)
Some(ln.output)
} else None
outputFiles += ("vcf" -> this.outputvcf)
finalVCF.foreach(file => outputFiles += ("vcf" -> file))
}
private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".delly.vcf"
}
object Delly extends PipelineCommand {
override val pipeline = "/nl/lumc/sasc/biopet/extensions/svcallers/Delly/Delly.class"
def apply(root: Configurable, input: File, runDir: String): Delly = {
val dellypipeline = new Delly(root)
dellypipeline.input = input
dellypipeline.workdir = runDir
dellypipeline.init()
dellypipeline.biopetScript()
dellypipeline
def apply(root: Configurable, input: File, workDir: File): Delly = {
val dellyPipeline = new Delly(root)
dellyPipeline.input = input
dellyPipeline.workDir = workDir
dellyPipeline.init()
dellyPipeline.biopetScript()
dellyPipeline
}
}
\ No newline at end of file
......@@ -12,6 +12,7 @@ class DellyCaller(val root: Configurable) extends BiopetCommandLineFunction {
private lazy val versionexecutable: File = new File(executable)
override def defaultThreads = 1
override def defaultCoreMemory = 4.0
override def versionCommand = versionexecutable.getAbsolutePath
override def versionRegex = """DELLY \(Version: (.*)\)""".r
......
......@@ -42,7 +42,7 @@ class VcfFilter(val root: Configurable) extends ToolCommandFuntion {
var minSamplesPass: Option[Int] = config("min_samples_pass")
var filterRefCalls: Boolean = config("filter_ref_calls", default = false)
override def defaultCoreMemory = 1.0
override def defaultCoreMemory = 3.0
override def commandLine = super.commandLine +
required("-I", inputVcf) +
......
......@@ -267,6 +267,22 @@ object ConfigUtils extends Logging {
any2list(any).map(_.toString)
}
/** Convert Any to List[Any], fallback on list with 1 value */
def any2set(any: Any): Set[Any] = {
if (any == null) return null
any match {
case s: Set[_] => s.toSet
case l: List[_] => l.toSet
case _ => Set(any)
}
}
/** Convert Any to List[String] */
def any2stringSet(any: Any): Set[String] = {
if (any == null) return null
any2set(any).map(_.toString)
}
/** Convert Any to List[File] */
def any2fileList(any: Any): List[File] = {
if (any == null) return null
......@@ -414,7 +430,7 @@ object ConfigUtils extends Logging {
/** Convert ConfigValue to Set[String] */
implicit def configValue2stringSet(value: ConfigValue): Set[String] = {
if (requiredValue(value)) any2stringList(value.value).toSet
if (requiredValue(value)) any2stringSet(value.value)
else Set()
}
......
......@@ -24,7 +24,8 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.sage.Sage,
nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig,
nl.lumc.sasc.biopet.pipelines.carp.Carp,
nl.lumc.sasc.biopet.pipelines.toucan.Toucan
nl.lumc.sasc.biopet.pipelines.toucan.Toucan,
nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling
)
def pipelines: List[MainCommand] = List(
......
......@@ -33,7 +33,7 @@
<module>mapping</module>
<module>sage</module>
<module>kopisu</module>
<module>yamsvp</module>
<!-- <module>yamsvp</module> -->
<module>gears</module>
<module>bam2wig</module>
<module>carp</module>
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import java.io.File
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ PipelineCommand, BiopetQScript, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer
import nl.lumc.sasc.biopet.extensions.clever.CleverCaller
import nl.lumc.sasc.biopet.extensions.delly.Delly
import nl.lumc.sasc.biopet.tools.VcfStats
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.Input
import scala.collection.JavaConversions._
/**
* Common trait for ShivaVariantcalling
*
* Created by pjvan_thof on 2/26/15.
*/
class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
qscript =>
def this() = this(null)
@Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
protected var inputBamsArg: List[File] = Nil
protected var inputBams: Map[String, File] = Map()
def addBamFile(file: File, sampleId: Option[String] = None): Unit = {
sampleId match {
case Some(sample) => inputBams += sample -> file
case _ if !file.exists() => throw new IllegalArgumentException("Bam file does not exits: " + file)
case _ => {
val inputSam = SamReaderFactory.makeDefault.open(file)
val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct
if (samples.size == 1) {
inputBams += samples.head -> file
} else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file)
}
}
}
/** Executed before script */
def init(): Unit = {
inputBamsArg.foreach(addBamFile(_))
}
/** Variantcallers requested by the config */
protected val configCallers: Set[String] = config("sv_callers", default = Set("breakdancer", "clever", "delly"))
/** This will add jobs for this pipeline */
def biopetScript(): Unit = {
for (cal <- configCallers) {
if (!callersList.exists(_.name == cal))
BiopetQScript.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
}
val callers = callersList.filter(x => configCallers.contains(x.name))
require(inputBams.nonEmpty, "No input bams found")
require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))
callers.foreach(_.addJobs())
addSummaryJobs()
}
/** Will generate all available variantcallers */
protected def callersList: List[SvCaller] = List(new Breakdancer, new Clever, new Delly)
/** General trait for a variantcaller mode */
trait SvCaller {
/** Name of mode, this should also be used in the config */
val name: String
/** Output dir for this mode */
def outputDir = new File(qscript.outputDir, name)