Commit 7278bcf2 authored by Peter van 't Hof's avatar Peter van 't Hof

Added scattering on annotation

parent 180bb97e
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Extension for CombineVariants from GATK
*
* Created by pjvan_thof on 2/26/15.
*/
class SelectVariants(val root: Configurable) extends Gatk {
val analysisType = "SelectVariants"
@Input(doc = "", required = true)
var inputFiles: List[File] = Nil
@Output(doc = "", required = true)
var outputFile: File = null
var excludeNonVariants: Boolean = false
var inputMap: Map[File, String] = Map()
def addInput(file: File, name: String): Unit = {
inputFiles :+= file
inputMap += file -> name
}
override def beforeGraph(): Unit = {
super.beforeGraph()
if (outputFile.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputFile.getAbsolutePath + ".tbi")
deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
deps = deps.distinct
}
override def cmdLine = super.cmdLine +
(for (file <- inputFiles) yield {
inputMap.get(file) match {
case Some(name) => required("-V:" + name, file)
case _ => required("-V", file)
}
}).mkString +
required("-o", outputFile) +
conditional(excludeNonVariants, "--excludeNonVariants")
}
......@@ -15,15 +15,19 @@
*/
package nl.lumc.sasc.biopet.pipelines.toucan
import java.io.File
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsIntersect, BedtoolsMerge }
import nl.lumc.sasc.biopet.extensions.manwe.{ ManweAnnotateVcf, ManweSamplesImport }
import nl.lumc.sasc.biopet.extensions.tools.{ GvcfToBed, VcfWithVcf, VepNormalizer }
import nl.lumc.sasc.biopet.extensions.{ Bgzip, Ln, VariantEffectPredictor }
import nl.lumc.sasc.biopet.extensions.bedtools.{BedtoolsIntersect, BedtoolsMerge}
import nl.lumc.sasc.biopet.extensions.gatk.{CatVariants, SelectVariants}
import nl.lumc.sasc.biopet.extensions.manwe.{ManweAnnotateVcf, ManweSamplesImport}
import nl.lumc.sasc.biopet.extensions.tools.{GvcfToBed, VcfWithVcf, VepNormalizer}
import nl.lumc.sasc.biopet.extensions.{Bgzip, Ln, VariantEffectPredictor}
import nl.lumc.sasc.biopet.utils.VcfUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
import org.broadinstitute.gatk.queue.QScript
/**
......@@ -40,6 +44,9 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
@Input(doc = "Input GVCF file", shortName = "gvcf", required = false)
var inputGvcf: Option[File] = None
lazy val gonlVcfFile: Option[File] = config("gonl_vcf")
lazy val exacVcfFile: Option[File] = config("exac_vcf")
var sampleIds: List[String] = Nil
def init(): Unit = {
inputFiles :+= new InputFile(inputVCF)
......@@ -63,47 +70,69 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
case _ => throw new IllegalArgumentException("You have not specified a GVCF file")
}
} else inputVCF
val vep = new VariantEffectPredictor(this)
vep.input = useVcf
vep.output = new File(outputDir, inputVCF.getName.stripSuffix(".gz").stripSuffix(".vcf") + ".vep.vcf")
vep.isIntermediate = true
add(vep)
addSummarizable(vep, "variant_effect_predictor")
val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output
normalizer.outputVcf = swapExt(outputDir, vep.output, ".vcf", ".normalized.vcf.gz")
add(normalizer)
// Optional annotation steps, depend is some files existing in the config
val gonlVcfFile: Option[File] = config("gonl_vcf")
val exacVcfFile: Option[File] = config("exac_vcf")
var outputFile = normalizer.outputVcf
gonlVcfFile match {
case Some(gonlFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = gonlFile
vcfWithVcf.output = swapExt(outputDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_gonl", None)
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
val outputVcfFiles = BedRecordList.fromReference(referenceFasta())
.scatter(config("bin_size", default = 50000000))
.allRecords.map { region =>
val chunkName = s"${region.chr}-${region.start}-${region.end}"
val chunkDir = new File(outputDir, "chunk" + File.separator + chunkName)
val sv = new SelectVariants(this)
sv.inputFiles :+= useVcf
sv.outputFile = new File(chunkDir, chunkName + ".vcf.gz")
sv.isIntermediate = true
add(sv)
val vep = new VariantEffectPredictor(this)
vep.input = sv.outputFile
vep.output = new File(chunkDir, chunkName + ".vep.vcf")
vep.isIntermediate = true
add(vep)
addSummarizable(vep, "variant_effect_predictor")
val normalizer = new VepNormalizer(this)
normalizer.inputVCF = vep.output
normalizer.outputVcf = new File(chunkDir, chunkName + ".normalized.vcf.gz")
add(normalizer)
var outputFile = normalizer.outputVcf
gonlVcfFile match {
case Some(gonlFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = gonlFile
vcfWithVcf.output = swapExt(chunkDir, normalizer.outputVcf, ".vcf.gz", ".gonl.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_gonl", None)
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
exacVcfFile match {
case Some(exacFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = exacFile
vcfWithVcf.output = swapExt(chunkDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_exac", None)
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
}
outputFile
}
exacVcfFile match {
case Some(exacFile) =>
val vcfWithVcf = new VcfWithVcf(this)
vcfWithVcf.input = outputFile
vcfWithVcf.secondaryVcf = exacFile
vcfWithVcf.output = swapExt(outputDir, outputFile, ".vcf.gz", ".exac.vcf.gz")
vcfWithVcf.fields ::= ("AF", "AF_exac", None)
add(vcfWithVcf)
outputFile = vcfWithVcf.output
case _ =>
val cv = new CatVariants(this)
cv.inputFiles = outputVcfFiles.toList
cv.outputFile = (gonlVcfFile, exacVcfFile) match {
case (Some(_), Some(_)) => swapExt(outputDir, inputVCF, ".vcf.gz", ".vep.normalized.gonl.exac.vcf.gz")
case (Some(_), _) => swapExt(outputDir, inputVCF, ".vcf.gz", ".vep.normalized.gonl.vcf.gz")
case (_, Some(_)) => swapExt(outputDir, inputVCF, ".vcf.gz", ".vep.normalized.exac.vcf.gz")
case _ => swapExt(outputDir, inputVCF, ".vcf.gz", ".vep.normalized.vcf.gz")
}
add(cv)
addSummaryJobs()
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment