Commit 22b4b757 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added filter step

parent f0fd2b99
package nl.lumc.sasc.biopet.core.apps
import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import htsjdk.variant.vcf.VCFFileReader
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
import scala.collection.JavaConversions._
class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
javaMainClass = getClass.getName
@Input(doc = "Input vcf", shortName = "I", required = true)
var inputVcf: File = _
@Output(doc = "Output vcf", shortName = "o", required = false)
var outputVcf: File = _
var minSampleDepth: Option[Int] = _
var minTotalDepth: Option[Int] = _
var minAlternateDepth: Option[Int] = _
var minSamplesPass: Option[Int] = _
override val defaultVmem = "8G"
memoryLimit = Option(4.0)
override def afterGraph {
minSampleDepth = config("min_sample_depth")
minTotalDepth = config("min_total_depth")
minAlternateDepth = config("min_alternate_depth")
minSamplesPass = config("min_samples_pass")
}
override def commandLine = super.commandLine +
required("-I", inputVcf) +
required("-o", outputVcf) +
optional("-minSampleDepth", minSampleDepth) +
optional("-minTotalDepth", minTotalDepth) +
optional("-minAlternateDepth", minAlternateDepth) +
optional("-minSamplesPass", minSamplesPass)
}
object VcfFilter {
var inputVcf: File = _
var outputVcf: File = _
var minSampleDepth = -1
var minTotalDepth = -1
var minAlternateDepth = -1
var minSamplesPass = 0
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
for (t <- 0 until args.size) {
args(t) match {
case "-I" => inputVcf = new File(args(t+1))
case "-o" => outputVcf = new File(args(t+1))
case "-minSampleDepth" => minSampleDepth = args(t+1).toInt
case "-minTotalDepth" => minTotalDepth = args(t+1).toInt
case "-minAlternateDepth" => minAlternateDepth = args(t+1).toInt
case "-minSamplesPass" => minSamplesPass = args(t+1).toInt
case _ =>
}
}
if (inputVcf == null) throw new IllegalStateException("No inputVcf, use -I")
if (outputVcf == null) throw new IllegalStateException("No outputVcf, use -o")
val reader = new VCFFileReader(inputVcf)
val header = reader.getFileHeader
val builder = new VariantContextWriterBuilder().setOutputFile(outputVcf)
val writer = new AsyncVariantContextWriter(builder.build)
writer.writeHeader(header)
for (record <- reader) {
val chr = record.getChr
val pos = record.getStart
val genotypes = for (genotype <- record.getGenotypes) yield {
genotype.getDP >= minSampleDepth &&
List(genotype.getAD:_*).tail.count(_ >= minAlternateDepth) > 0
}
if (record.getAttributeAsInt("DP", -1) >= minTotalDepth && genotypes.count(_ == true) >= minSamplesPass) {
writer.add(record)
}
}
reader.close
writer.close
}
}
\ No newline at end of file
......@@ -199,7 +199,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
gatkVariantcalling.biopetScript
addAll(gatkVariantcalling.functions)
libraryOutput.variantcalling = gatkVariantcalling.scriptOutput
//libraryOutput.preProcesBamFile = gatkVariantcalling.outputFiles("final_bam")
return libraryOutput
}
......
......@@ -2,7 +2,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import java.io.File
import nl.lumc.sasc.biopet.core.apps.MpileupToVcf
import nl.lumc.sasc.biopet.core.apps.{ MpileupToVcf, VcfFilter }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.{AnalyzeCovariates,BaseRecalibrator,GenotypeGVCFs,HaplotypeCaller,IndelRealigner,PrintReads,RealignerTargetCreator}
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
......@@ -90,10 +90,16 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr
add(m2v)
scriptOutput.rawVcfFile = m2v.output
val vcfFilter = new VcfFilter(this)
vcfFilter.defaults ++= Map("min_sample_depth" -> 8, "min_alternate_depth" -> 4, "min_samples_pass" -> 1)
vcfFilter.inputVcf = m2v.output
vcfFilter.outputVcf = this.swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz")
add(vcfFilter)
val hcAlleles = new HaplotypeCaller(this)
hcAlleles.input_file = scriptOutput.bamFiles
hcAlleles.out = outputDir + outputName + ".genotype_raw_alleles.vcf.gz"
hcAlleles.alleles = m2v.output
hcAlleles.alleles = vcfFilter.outputVcf
hcAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
add(hcAlleles)
scriptOutput.rawGenotypeVcf = hcAlleles.out
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment