Commit ba60c6d4 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Adding validateVcf to shiva on dbsnp

parent 831f9ae4
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.{Reference, ToolCommandFunction}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
class ValidateVcf(val root: Configurable) extends ToolCommandFunction with Reference {
def toolObject = nl.lumc.sasc.biopet.tools.ValidateVcf
@Input(required = true)
var inputVcf: File = _
@Input(required = true)
var reference: File = _
var disableFail: Boolean = false
override def defaultCoreMemory = 4.0
override def beforeGraph(): Unit = {
super.beforeGraph()
if (reference == null) reference = referenceFasta()
}
override def cmdLine = super.cmdLine +
required("-i", inputVcf) +
required("-R", reference) +
conditional(disableFail, "--disableFail")
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import java.io.File
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.Input
import scala.io.Source
/**
* This class checks md5sums and give an exit code 1 when md5sum is not the same
*
* Created by pjvanthof on 16/08/15.
*/
class CheckValidateVcf extends InProcessFunction {
@Input(required = true)
var inputLogFile: File = _
/** Exits whenever the input md5sum is not the same as the output md5sum */
def run: Unit = {
val reader = Source.fromFile(inputLogFile)
reader.getLines().foreach { line =>
if (line.startsWith("ERROR")) {
logger.error("Corrupt vcf file found, aborting pipeline")
// 130 Simulates a ctr-C
Runtime.getRuntime.halt(130)
}
}
reader.close()
}
}
\ No newline at end of file
......@@ -14,15 +14,19 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
import java.io.File
import nl.lumc.sasc.biopet.core.{PipelineCommand, Reference}
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.extensions.gatk._
import nl.lumc.sasc.biopet.extensions.tools.ValidateVcf
import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions
import nl.lumc.sasc.biopet.pipelines.kopisu.Kopisu
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
import nl.lumc.sasc.biopet.pipelines.toucan.Toucan
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function.QFunction
/**
* This is a trait for the Shiva pipeline
......@@ -175,6 +179,8 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait
override def addMultiSampleJobs() = {
super.addMultiSampleJobs()
addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta())).getOrElse(Nil))
multisampleVariantCalling.foreach(vc => {
vc.outputDir = new File(outputDir, "variantcalling")
vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) }
......@@ -251,4 +257,23 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait
}
/** This object give a default main method to the pipelines */
object Shiva extends PipelineCommand
\ No newline at end of file
object Shiva extends PipelineCommand {
// This is used to only execute 1 validation per vcf file
private var validateVcfSeen: Set[(File, File)] = Set()
def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File): List[QFunction] = {
if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
else {
validateVcfSeen ++= Set((vcfFile, referenceFile))
val validateVcf = new ValidateVcf(root)
validateVcf.inputVcf = vcfFile
validateVcf.reference = referenceFile
val checkValidateVcf = new CheckValidateVcf
checkValidateVcf.inputLogFile = validateVcf.jobOutputFile
List(validateVcf, checkValidateVcf)
}
}
}
\ No newline at end of file
......@@ -85,6 +85,8 @@ class ShivaVariantcalling(val root: Configurable) extends QScript
require(inputBams.nonEmpty, "No input bams found")
require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", "))
addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta())).getOrElse(Nil))
val cv = new CombineVariants(qscript)
cv.out = finalFile
cv.setKey = Some("VariantCaller")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment