diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index aacc32bb1aff5fa5644ebab4e54cc109e649698f..85b2e236f77aab5e3f40d8de3c367caab989d98e 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -60,7 +60,8 @@ object VcfFilter extends ToolCommand { denovoInSample: String = null, diffGenotype: List[(String, String)] = Nil, filterHetVarToHomVar: List[(String, String)] = Nil, - filterRefCalls: Boolean = false) extends AbstractArgs + filterRefCalls: Boolean = false, + filterNoCalls: Boolean = false) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('I', "inputVcf") required () maxOccurs (1) valueName ("<file>") action { (x, c) => @@ -101,6 +102,9 @@ object VcfFilter extends ToolCommand { opt[Unit]("filterRefCalls") unbounded () action { (x, c) => c.copy(filterRefCalls = true) } text ("Filter when there are only ref calls") + opt[Unit]("filterNoCalls") unbounded () action { (x, c) => + c.copy(filterNoCalls = true) + } text ("Filter when there are only no calls") } var commandArgs: Args = _ @@ -117,19 +121,12 @@ object VcfFilter extends ToolCommand { val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().setOutputFile(commandArgs.outputVcf).build) writer.writeHeader(header) - val bamDPFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-DP-")) yield line.getID).toList - for (record <- reader) { - val genotypes = for (genotype <- record.getGenotypes) yield { - val DP = if (genotype.hasDP) genotype.getDP else -1 - val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil - DP >= commandArgs.minSampleDepth && - (if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true) && - !(commandArgs.filterRefCalls && genotype.isHomRef) - } - - if (record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth && - genotypes.count(_ == true) >= commandArgs.minSamplesPass && + if (filterRefCalls(record) && + filterNoCalls(record) && + minTotalDepth(record) && + minSampleDepth(record) && + minAlternateDepth(record) && minBamAlternateDepth(record, header) && mustHaveVariant(record) && notSameGenotype(record) && @@ -142,6 +139,34 @@ object VcfFilter extends ToolCommand { writer.close } + def filterRefCalls(record: VariantContext): Boolean = { + if (commandArgs.filterNoCalls) record.getGenotypes.exists(g => !g.isHomRef) + else true + } + + def filterNoCalls(record: VariantContext): Boolean = { + if (commandArgs.filterNoCalls) record.getGenotypes.exists(g => !g.isNoCall) + else true + } + + def minTotalDepth(record: VariantContext): Boolean = { + record.getAttributeAsInt("DP", -1) >= commandArgs.minTotalDepth + } + + def minSampleDepth(record: VariantContext): Boolean = { + record.getGenotypes.count(genotype => { + val DP = if (genotype.hasDP) genotype.getDP else -1 + DP >= commandArgs.minSampleDepth + }) >= commandArgs.minSamplesPass + } + + def minAlternateDepth(record: VariantContext): Boolean = { + record.getGenotypes.count(genotype => { + val AD = if (genotype.hasAD) List(genotype.getAD: _*) else Nil + if (!AD.isEmpty) AD.tail.count(_ >= commandArgs.minAlternateDepth) > 0 else true + }) >= commandArgs.minSamplesPass + } + def minBamAlternateDepth(record: VariantContext, header: VCFHeader): Boolean = { val bamADFields = (for (line <- header.getInfoHeaderLines if line.getID.startsWith("BAM-AD-")) yield line.getID).toList