Commit a827c3b4 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added variant recalibration as separated pipeline

parent 8685e51a
......@@ -5,6 +5,7 @@ import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import nl.lumc.sasc.biopet.pipelines.gatk.GatkBenchmarkGenotyping
import nl.lumc.sasc.biopet.pipelines.gatk.GatkGenotyping
import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantRecalibration
import nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantcalling
import nl.lumc.sasc.biopet.pipelines.gatk.GatkVcfSampleCompare
import nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap
......@@ -22,6 +23,7 @@ object BiopetExecutable {
"gatk-genotyping" -> GatkGenotyping,
"gatk-variantcalling" -> GatkVariantcalling,
"gatk-pipeline" -> GatkPipeline,
"gatk-variant-recalibration" -> GatkVariantRecalibration,
"gatk-vcf-sample-compare" -> GatkVcfSampleCompare,
"sage" -> Sage
)
......
......@@ -4,7 +4,7 @@ import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import java.io.File
import nl.lumc.sasc.biopet.extensions.gatk.{ApplyRecalibration, CombineGVCFs, VariantAnnotator, VariantRecalibrator}
import nl.lumc.sasc.biopet.extensions.gatk.CombineGVCFs
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Argument }
......@@ -58,9 +58,13 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
var vcfFile = gatkGenotyping.outputFile
if (config("inputtype", default = "dna").getString != "rna") {
vcfFile = addVariantAnnotator(vcfFile, finalBamFiles, outputDir + "recalibration/")
vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir + "recalibration/")
vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir + "recalibration/")
val recalibration = new GatkVariantRecalibration(this)
recalibration.inputVcf = vcfFile
recalibration.bamFiles = finalBamFiles
recalibration.outputDir = outputDir + "recalibration/"
recalibration.init
recalibration.biopetScript
vcfFile = recalibration.outputVcf
}
} else logger.warn("No gVCFs to genotype")
} else runSingleSampleJobs(onlySample)
......@@ -106,46 +110,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
} else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
return outputFiles
}
def addSnpVariantRecalibrator(inputVcf: File, dir: String): File = {
val snpRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"),
swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = false)
if (!snpRecal.resource.isEmpty) {
add(snpRecal)
val snpApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"),
snpRecal.recal_file, snpRecal.tranches_file, indel = false)
add(snpApply)
return snpApply.out
} else {
logger.warn("Skipped snp Recalibration, resource is missing")
return inputVcf
}
}
def addIndelVariantRecalibrator(inputVcf: File, dir: String): File = {
val indelRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"),
swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = true)
if (!indelRecal.resource.isEmpty) {
add(indelRecal)
val indelApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"),
indelRecal.recal_file, indelRecal.tranches_file, indel = true)
add(indelApply)
return indelApply.out
} else {
logger.warn("Skipped indel Recalibration, resource is missing")
return inputVcf
}
}
def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: String): File = {
val variantAnnotator = VariantAnnotator(this, inputvcf, bamfiles, swapExt(dir, inputvcf, ".vcf", ".anotated.vcf"))
add(variantAnnotator)
return variantAnnotator.out
}
}
object GatkPipeline extends PipelineCommand {
......
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.ApplyRecalibration
import nl.lumc.sasc.biopet.extensions.gatk.VariantAnnotator
import nl.lumc.sasc.biopet.extensions.gatk.VariantRecalibrator
import org.broadinstitute.gatk.queue.QScript
class GatkVariantRecalibration(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
@Input(doc = "input vcf file", shortName = "I")
var inputVcf: File = _
@Input(doc = "input vcf file", shortName = "BAM", required = false)
var bamFiles: List[File] = Nil
@Output(doc = "output vcf file", shortName = "out")
var outputVcf: File = _
def init() {
if (inputVcf == null) throw new IllegalStateException("Missing Output directory on gatk module")
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
}
def biopetScript() {
var vcfFile: File = if (!bamFiles.isEmpty) addVariantAnnotator(inputVcf, bamFiles, outputDir) else inputVcf
vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir)
vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir)
}
def addSnpVariantRecalibrator(inputVcf: File, dir: String): File = {
val snpRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"),
swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = false)
if (!snpRecal.resource.isEmpty) {
add(snpRecal)
val snpApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"),
snpRecal.recal_file, snpRecal.tranches_file, indel = false)
add(snpApply)
return snpApply.out
} else {
logger.warn("Skipped snp Recalibration, resource is missing")
return inputVcf
}
}
def addIndelVariantRecalibrator(inputVcf: File, dir: String): File = {
val indelRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"),
swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = true)
if (!indelRecal.resource.isEmpty) {
add(indelRecal)
val indelApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"),
indelRecal.recal_file, indelRecal.tranches_file, indel = true)
add(indelApply)
return indelApply.out
} else {
logger.warn("Skipped indel Recalibration, resource is missing")
return inputVcf
}
}
def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: String): File = {
val variantAnnotator = VariantAnnotator(this, inputvcf, bamfiles, swapExt(dir, inputvcf, ".vcf", ".anotated.vcf"))
add(variantAnnotator)
return variantAnnotator.out
}
}
object GatkVariantRecalibration extends PipelineCommand {
override val pipeline = "/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.class"
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment