Skip to content
Snippets Groups Projects
Commit ecff240c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added 4 new options for vcfFilter

parent d057ab01
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,8 @@ package nl.lumc.sasc.biopet.tools
import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder
import htsjdk.variant.vcf.VCFFileReader
import htsjdk.variant.vcf.{ VCFHeader, VCFFileReader }
import htsjdk.variant.variantcontext.VariantContext
import java.io.File
import java.util.ArrayList
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
......@@ -48,8 +49,18 @@ class VcfFilter(val root: Configurable) extends BiopetJavaCommandLineFunction {
}
object VcfFilter extends ToolCommand {
case class Args(inputVcf: File = null, outputVcf: File = null, minSampleDepth: Int = -1, minTotalDepth: Int = -1,
minAlternateDepth: Int = -1, minSamplesPass: Int = 0, minBamAlternateDepth: Int = 0, filterRefCalls: Boolean = false) extends AbstractArgs
case class Args(inputVcf: File = null,
outputVcf: File = null,
minSampleDepth: Int = -1,
minTotalDepth: Int = -1,
minAlternateDepth: Int = -1,
minSamplesPass: Int = 0,
minBamAlternateDepth: Int = 0,
mustHaveVariant: List[String] = Nil,
denovoInSample: String = null,
notSameGenotype: List[(String, String)] = Nil,
filterHetVarToHomVar: List[(String, String)] = Nil,
filterRefCalls: Boolean = false) extends AbstractArgs
class OptParser extends AbstractOptParser {
opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) =>
......@@ -73,24 +84,37 @@ object VcfFilter extends ToolCommand {
opt[Int]("minBamAlternateDepth") unbounded () action { (x, c) =>
c.copy(minBamAlternateDepth = x)
}
opt[String]("denovoInSample") maxOccurs (1) unbounded () action { (x, c) =>
c.copy(denovoInSample = x)
}
opt[String]("mustHaveVariant") unbounded () action { (x, c) =>
c.copy(mustHaveVariant = x :: c.mustHaveVariant)
}
opt[String]("notSameGenotype") unbounded () action { (x, c) =>
c.copy(notSameGenotype = (x.split(":")(0), x.split(":")(1)) :: c.notSameGenotype)
} validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample") }
opt[String]("filterHetVarToHomVar") unbounded () action { (x, c) =>
c.copy(filterHetVarToHomVar = (x.split(":")(0), x.split(":")(1)) :: c.filterHetVarToHomVar)
} validate { x => if (x.split(":").length == 2) success else failure("--filterHetVarToHomVar should be in this format: sample:sample") }
opt[Unit]("filterRefCalls") unbounded () action { (x, c) =>
c.copy(filterRefCalls = true)
}
}
var commandArgs: Args = _
/**
* @param args the command line arguments
*/
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
val reader = new VCFFileReader(commandArgs.inputVcf, false)
val header = reader.getFileHeader
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build)
writer.writeHeader(header)
val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList
val bamDPFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-DP-")) yield line.getID).toList
for (record <- reader) {
......@@ -102,27 +126,74 @@ object VcfFilter extends ToolCommand {
!(commandArgs.filterRefCalls && genotype.isHomRef)
}
val bamADvalues = (for (field <- bamADFields) yield {
record.getAttribute(field, new ArrayList) match {
case t: ArrayList[_] if t.length > 1 => {
for (i <- 1 until t.size) yield {
t(i) match {
case a: Int => a > commandArgs.minBamAlternateDepth
case a: String => a.toInt > commandArgs.minBamAlternateDepth
case _ => false
}
}
}
case _ => List(false)
}
}).flatten
if (record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth &&
genotypes.count(_ == true) >= commandArgs.minSamplesPass &&
(commandArgs.minBamAlternateDepth <= 0 || bamADvalues.count(_ == true) >= commandArgs.minSamplesPass))
minBamAlternateDepth(record, header) &&
mustHaveVariant(record) &&
notSameGenotype(record) &&
filterHetVarToHomVar(record) &&
denovoInSample(record))
writer.add(record)
}
reader.close
writer.close
}
def minBamAlternateDepth(record: VariantContext, header: VCFHeader): Boolean = {
val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList
val bamADvalues = (for (field <- bamADFields) yield {
record.getAttribute(field, new ArrayList) match {
case t: ArrayList[_] if t.length > 1 => {
for (i <- 1 until t.size) yield {
t(i) match {
case a: Int => a > commandArgs.minBamAlternateDepth
case a: String => a.toInt > commandArgs.minBamAlternateDepth
case _ => false
}
}
}
case _ => List(false)
}
}).flatten
return commandArgs.minBamAlternateDepth <= 0 || bamADvalues.count(_ == true) >= commandArgs.minSamplesPass
}
def mustHaveVariant(record: VariantContext): Boolean = {
return !commandArgs.mustHaveVariant.map(record.getGenotype(_)).exists(a => a.isHomRef || a.isNoCall)
}
def notSameGenotype(record: VariantContext): Boolean = {
for ((sample1, sample2) <- commandArgs.notSameGenotype) {
val genotype1 = record.getGenotype(sample1)
val genotype2 = record.getGenotype(sample2)
if (genotype1.sameGenotype(genotype2)) return false
}
return true
}
def filterHetVarToHomVar(record: VariantContext): Boolean = {
for ((sample1, sample2) <- commandArgs.filterHetVarToHomVar) {
val genotype1 = record.getGenotype(sample1)
val genotype2 = record.getGenotype(sample2)
if (genotype1.isHet && !genotype1.getAlleles.forall(_.isNonReference)) {
for (allele <- genotype1.getAlleles if allele.isNonReference) {
if (genotype2.getAlleles.forall(_.basesMatch(allele))) return false
}
}
}
return true
}
def denovoInSample(record: VariantContext): Boolean = {
if (commandArgs.denovoInSample == null) return true
val genotype = record.getGenotype(commandArgs.denovoInSample)
for (allele <- genotype.getAlleles if allele.isNonReference) {
for (g <- record.getGenotypes if g.getSampleName != commandArgs.denovoInSample) {
if (g.getAlleles.exists(_.basesMatch(allele))) return false
}
}
return true
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment