From 1bbf15aaedc3f3c2e75889888fa8278d9cee62d4 Mon Sep 17 00:00:00 2001 From: Sander Bollen <a.h.b.bollen@lumc.nl> Date: Thu, 6 Aug 2015 17:40:38 +0200 Subject: [PATCH] VcfFilter addition for musthavegenotypes. See #186. Ended up using HTSJDK nomenclature for genotype types I.e: HET, HOM_REF, HOM_VAR, MIXED, NO_CALL and UNAVAILABLE --- .../nl/lumc/sasc/biopet/tools/VcfFilter.scala | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index 92e82b968..0ddfc864a 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.tools import java.io.File -import htsjdk.variant.variantcontext.VariantContext +import htsjdk.variant.variantcontext.{ GenotypeType, VariantContext } import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } import htsjdk.variant.vcf.VCFFileReader import nl.lumc.sasc.biopet.core.config.Configurable @@ -72,6 +72,7 @@ object VcfFilter extends ToolCommand { minSamplesPass: Int = 1, mustHaveVariant: List[String] = Nil, calledIn: List[String] = Nil, + mustHaveGenotype: List[String] = Nil, deNovoInSample: String = null, resToDom: List[Trio] = Nil, trioCompound: List[Trio] = Nil, @@ -126,6 +127,10 @@ object VcfFilter extends ToolCommand { opt[String]("calledIn") unbounded () valueName "<sample>" action { (x, c) => c.copy(calledIn = x :: c.calledIn) } text "Must be called in this sample" + opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) => + c.copy(mustHaveGenotype = x :: c.mustHaveGenotype) + } validate { x => if (x.split(":").length == 2 && Set("HET", "HOM_REF", "HOM_VAR", "MIXED", "NO_CALL", "UNAVAILABLE").contains(x.split(":")(1))) success else failure("--mustHaveGenotype should be in this format: sample:genotype") + } text "Must have genotoype <genotype> for this sample. Genotype can be HET, HOM_REF, HOM_VAR, MIXED, NO_CALL and UNAVAILABLE" opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) => c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype) } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample") @@ -183,7 +188,7 @@ object VcfFilter extends ToolCommand { hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) && minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) && (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) && - calledIn(record, cmdArgs.calledIn) && + calledIn(record, cmdArgs.calledIn) && hasGenotype(record, cmdArgs.mustHaveGenotype) && (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) && ( cmdArgs.filterHetVarToHomVar.isEmpty || @@ -220,6 +225,17 @@ object VcfFilter extends ToolCommand { else true } + /** + * Checks if given genotypes for given samples are there + * @param record VCF record + * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype) + * @return false when filter fails + */ + def hasGenotype(record: VariantContext, samplesGenotypes: List[String]): Boolean = { + if (!samplesGenotypes.forall(x => record.getGenotype(x.split(":")(0)).getType == GenotypeType.valueOf(x.split(":")(1)))) false + else true + } + /** * Checks if record has atleast minQualScore * @param record VCF record -- GitLab