Commit bdaab956 authored by Peter van 't Hof's avatar Peter van 't Hof

Merge remote-tracking branch 'remotes/origin/develop' into feature-scatter_annotation

# Conflicts:
#	toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
parents 819de8e1 766d624d
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 30/03/16.
*/
class Grep(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input file", required = true)
var input: File = _
@Output(doc = "Output file", required = true)
var output: File = _
executable = config("exe", default = "grep")
var grepFor: String = null
var invertMatch: Boolean = false
var regex: Boolean = false
var perlRegexp: Boolean = false
/** return commandline to execute */
def cmdLine = required(executable) +
conditional(invertMatch, "-v") +
conditional(regex, "-e") +
conditional(perlRegexp, "-P") +
required(grepFor) +
(if (inputAsStdin) "" else required(input)) +
(if (outputAsStsout) "" else " > " + required(output))
}
object Grep {
def apply(root: Configurable,
grepFor: String,
regex: Boolean = false,
invertMatch: Boolean = false,
perlRegexp: Boolean = false): Grep = {
val grep = new Grep(root)
grep.grepFor = grepFor
grep.regex = regex
grep.perlRegexp = perlRegexp
grep.invertMatch = invertMatch
grep
}
}
......@@ -156,9 +156,9 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
override def beforeGraph(): Unit = {
super.beforeGraph()
if (!cache && !database) {
Logging.addError("Must supply either cache or database for VariantEffectPredictor")
Logging.addError("Must either set 'cache' or 'database' to true for VariantEffectPredictor")
} else if (cache && dir.isEmpty) {
Logging.addError("Must supply dir to cache for VariantEffectPredictor")
Logging.addError("Must supply 'dir_cache' to cache for VariantEffectPredictor")
}
if (statsText) _summary = new File(output.getAbsolutePath + "_summary.txt")
}
......
......@@ -83,6 +83,25 @@ The following config values are optional:
Annotation queries can be set by the `annotation_queries` config value in the `manwe` config namespace.
By default, a global query is returned.
###Groups
In case you want to add your samples to a specific group in your varda database, you can use the tagging system in your sample config.
Specifically, the `varda_group` tag should be a list of strings pointing to group.
E.g. :
```json
{
"samples": {
"sample1": {
"tags": {
"varda_group": ["group1", "group2"]
}
}
}
}
```
Running the pipeline
---------------
The command to run the pipeline is:
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.toucan
import java.io.File
......@@ -31,10 +31,10 @@ import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import org.broadinstitute.gatk.queue.QScript
/**
* Pipeline to annotate a vcf file with VEP
*
* Created by ahbbollen on 15-1-15.
*/
* Pipeline to annotate a vcf file with VEP
*
* Created by ahbbollen on 15-1-15.
*/
class Toucan(val root: Configurable) extends QScript with BiopetQScript with SummaryQScript with Reference {
def this() = this(null)
......@@ -44,20 +44,22 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
@Input(doc = "Input GVCF file", shortName = "gvcf", required = false)
var inputGvcf: Option[File] = None
var outputVcf: Option[File] = None
def sampleInfo: Map[String, Map[String, Any]] = root match {
case m: MultiSampleQScript => m.samples.map { case (sampleId, sample) => sampleId -> sample.sampleTags }
case null => VcfUtils.getSampleIds(inputVcf).map(x => x -> Map[String, Any]()).toMap
case s: SampleLibraryTag => s.sampleId.map(x => x -> Map[String, Any]()).toMap
case _ => throw new IllegalArgumentException("")
}
lazy val gonlVcfFile: Option[File] = config("gonl_vcf")
lazy val exacVcfFile: Option[File] = config("exac_vcf")
lazy val doVarda: Boolean = config("use_varda", default = false)
var sampleIds: List[String] = Nil
def init(): Unit = {
require(inputVcf != null, "No Input vcf given")
inputFiles :+= new InputFile(inputVcf)
sampleIds = root match {
case m: MultiSampleQScript => m.samples.keys.toList
case null => VcfUtils.getSampleIds(inputVcf)
case s: SampleLibraryTag => s.sampleId.toList
case _ => throw new IllegalArgumentException("You don't have any samples")
}
}
override def defaults = Map(
......@@ -76,58 +78,58 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
.scatter(config("bin_size", default = 50000000))
.allRecords.map { region =>
val chunkName = s"${region.chr}-${region.start}-${region.end}"
val chunkDir = new File(outputDir, "chunk" + File.separator + chunkName)
val sv = new SelectVariants(this)
sv.inputFiles :+= useVcf
sv.outputFile = new File(chunkDir, chunkName + ".vcf.gz")
sv.isIntermediate = true
add(sv)
val vep = new VariantEffectPredictor(this)
vep.input = sv.outputFile
vep.output = new File(chunkDir, chunkName + ".vep.vcf")
vep.isIntermediate = true
add(vep)
addSummarizable(vep, "variant_effect_predictor")
val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output
normalizer.outputVcf = new File(chunkDir, chunkName + ".normalized.vcf.gz")
normalizer.isIntermediate = true
add(normalizer)
var outputFile = normalizer.outputVcf
gonlVcfFile match {
case Some(gonlFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = gonlFile
vcfWithVcf.output = swapExt(chunkDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_gonl", None)
vcfWithVcf.isIntermediate = true
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
exacVcfFile match {
case Some(exacFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = exacFile
vcfWithVcf.output = swapExt(chunkDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_exac", None)
vcfWithVcf.isIntermediate = true
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
outputFile
val chunkName = s"${region.chr}-${region.start}-${region.end}"
val chunkDir = new File(outputDir, "chunk" + File.separator + chunkName)
val sv = new SelectVariants(this)
sv.inputFiles :+= useVcf
sv.outputFile = new File(chunkDir, chunkName + ".vcf.gz")
sv.isIntermediate = true
add(sv)
val vep = new VariantEffectPredictor(this)
vep.input = sv.outputFile
vep.output = new File(chunkDir, chunkName + ".vep.vcf")
vep.isIntermediate = true
add(vep)
addSummarizable(vep, "variant_effect_predictor")
val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output
normalizer.outputVcf = new File(chunkDir, chunkName + ".normalized.vcf.gz")
normalizer.isIntermediate = true
add(normalizer)
var outputFile = normalizer.outputVcf
gonlVcfFile match {
case Some(gonlFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = gonlFile
vcfWithVcf.output = swapExt(chunkDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_gonl", None)
vcfWithVcf.isIntermediate = true
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
exacVcfFile match {
case Some(exacFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = exacFile
vcfWithVcf.output = swapExt(chunkDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_exac", None)
vcfWithVcf.isIntermediate = true
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
outputFile
}
val cv = new CatVariants(this)
cv.inputFiles = outputVcfFiles.toList
cv.outputFile = (gonlVcfFile, exacVcfFile) match {
......@@ -142,15 +144,15 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
}
/**
* Performs the varda import and activate for one sample
*
* @param sampleID the sampleID to be used
* @param inputVcf the input VCF
* @param gVCF the gVCF for coverage
* @param annotation: ManweDownloadAnnotateVcf object of annotated vcf
* @return
*/
def importAndActivateSample(sampleID: String, inputVcf: File,
* Performs the varda import and activate for one sample
*
* @param sampleID the sampleID to be used
* @param inputVcf the input VCF
* @param gVCF the gVCF for coverage
* @param annotation: ManweDownloadAnnotateVcf object of annotated vcf
* @return
*/
def importAndActivateSample(sampleID: String, sampleGroups: List[String], inputVcf: File,
gVCF: File, annotation: ManweAnnotateVcf): ManweActivateAfterAnnotImport = {
val minGQ: Int = config("minimum_genome_quality", default = 20, namespace = "manwe")
......@@ -199,6 +201,7 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
imported.beds = List(bgzippedBed.output)
imported.name = Some(sampleID)
imported.public = isPublic
imported.group = sampleGroups
imported.waitToComplete = false
imported.isIntermediate = true
imported.output = swapExt(outputDir, intersected.output, ".vcf.gz", ".manwe.import")
......@@ -212,16 +215,15 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
}
/**
* Perform varda analysis
*
* @param vcf input vcf
* @param gVcf The gVCF to be used for coverage calculations
* @return return vcf
*/
* Perform varda analysis
*
* @param vcf input vcf
* @param gVcf The gVCF to be used for coverage calculations
* @return return vcf
*/
def varda(vcf: File, gVcf: File): File = {
val annotationQueries: List[String] = config("annotation_queries", default = List("GLOBAL *"), namespace = "manwe")
//TODO: add groups!!! Need sample-specific group tags for this
val annotate = new ManweAnnotateVcf(this)
annotate.vcf = vcf
......@@ -237,7 +239,14 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
annotatedVcf.output = swapExt(outputDir, annotate.output, ".manwe.annot", "manwe.annot.vcf.gz")
add(annotatedVcf)
val activates = sampleIds map { x => importAndActivateSample(x, vcf, gVcf, annotate) }
val activates = sampleInfo map { x =>
val sampleGroup = x._2.getOrElse("varda_group", Nil) match {
case x: List[String] => x
case Nil => Nil
case _ => throw new IllegalArgumentException("Sample tag 'varda_group' is not a list of strings")
}
importAndActivateSample(x._1, sampleGroup, vcf, gVcf, annotate)
}
val finalLn = new Ln(this)
activates.foreach(x => finalLn.deps :+= x.output)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment