From d55c268446ca2d371cfc54a6cb56efc9fb2428ce Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Wed, 12 Aug 2015 17:15:17 +0200 Subject: [PATCH] Review refactor --- .../nl/lumc/sasc/biopet/tools/VcfFilter.scala | 18 +++++++------ .../sasc/biopet/tools/VcfFilterTest.scala | 25 ++++++++++--------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala index 0ddfc864a..981203de4 100644 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala +++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/VcfFilter.scala @@ -72,7 +72,7 @@ object VcfFilter extends ToolCommand { minSamplesPass: Int = 1, mustHaveVariant: List[String] = Nil, calledIn: List[String] = Nil, - mustHaveGenotype: List[String] = Nil, + mustHaveGenotype: List[(String, GenotypeType)] = Nil, deNovoInSample: String = null, resToDom: List[Trio] = Nil, trioCompound: List[Trio] = Nil, @@ -128,9 +128,11 @@ object VcfFilter extends ToolCommand { c.copy(calledIn = x :: c.calledIn) } text "Must be called in this sample" opt[String]("mustHaveGenotype") unbounded () valueName "<sample:genotype>" action { (x, c) => - c.copy(mustHaveGenotype = x :: c.mustHaveGenotype) - } validate { x => if (x.split(":").length == 2 && Set("HET", "HOM_REF", "HOM_VAR", "MIXED", "NO_CALL", "UNAVAILABLE").contains(x.split(":")(1))) success else failure("--mustHaveGenotype should be in this format: sample:genotype") - } text "Must have genotoype <genotype> for this sample. Genotype can be HET, HOM_REF, HOM_VAR, MIXED, NO_CALL and UNAVAILABLE" + c.copy(mustHaveGenotype = (x.split(":")(0), GenotypeType.valueOf(x.split(":")(1))) :: c.mustHaveGenotype) + } validate { x => + if (x.split(":").length == 2 && GenotypeType.values().contains(x.split(":")(1))) success + else failure("--mustHaveGenotype should be in this format: sample:genotype") + } text "Must have genotoype <genotype> for this sample. Genotype can be " + GenotypeType.values().mkString(", ") opt[String]("diffGenotype") unbounded () valueName "<sample:sample>" action { (x, c) => c.copy(diffGenotype = (x.split(":")(0), x.split(":")(1)) :: c.diffGenotype) } validate { x => if (x.split(":").length == 2) success else failure("--notSameGenotype should be in this format: sample:sample") @@ -188,7 +190,8 @@ object VcfFilter extends ToolCommand { hasMinSampleDepth(record, cmdArgs.minSampleDepth, cmdArgs.minSamplesPass) && minAlternateDepth(record, cmdArgs.minAlternateDepth, cmdArgs.minSamplesPass) && (cmdArgs.mustHaveVariant.isEmpty || mustHaveVariant(record, cmdArgs.mustHaveVariant)) && - calledIn(record, cmdArgs.calledIn) && hasGenotype(record, cmdArgs.mustHaveGenotype) && + calledIn(record, cmdArgs.calledIn) && + hasGenotype(record, cmdArgs.mustHaveGenotype) && (cmdArgs.diffGenotype.isEmpty || cmdArgs.diffGenotype.forall(x => notSameGenotype(record, x._1, x._2))) && ( cmdArgs.filterHetVarToHomVar.isEmpty || @@ -231,9 +234,8 @@ object VcfFilter extends ToolCommand { * @param samplesGenotypes samples and their associated genotypes to be checked (of format sample:genotype) * @return false when filter fails */ - def hasGenotype(record: VariantContext, samplesGenotypes: List[String]): Boolean = { - if (!samplesGenotypes.forall(x => record.getGenotype(x.split(":")(0)).getType == GenotypeType.valueOf(x.split(":")(1)))) false - else true + def hasGenotype(record: VariantContext, samplesGenotypes: List[(String, GenotypeType)]): Boolean = { + samplesGenotypes.forall(x => record.getGenotype(x._1).getType == x._2) } /** diff --git a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala index 2f4709a9c..2b47405a4 100644 --- a/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala +++ b/public/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/VcfFilterTest.scala @@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.tools import java.io.File import java.nio.file.Paths +import htsjdk.variant.variantcontext.GenotypeType import htsjdk.variant.vcf.VCFFileReader import org.scalatest.Matchers import org.scalatest.mock.MockitoSugar @@ -64,20 +65,20 @@ class VcfFilterTest extends TestNGSuite with MockitoSugar with Matchers { val reader = new VCFFileReader(vepped, false) val record = reader.iterator().next() - hasGenotype(record, List("Child_7006504:HET")) shouldBe true - hasGenotype(record, List("Child_7006504:HOM_VAR")) shouldBe false - hasGenotype(record, List("Child_7006504:HOM_REF")) shouldBe false - hasGenotype(record, List("Child_7006504:NO_CALL")) shouldBe false - hasGenotype(record, List("Child_7006504:MIXED")) shouldBe false + hasGenotype(record, List(("Child_7006504", GenotypeType.HET))) shouldBe true + hasGenotype(record, List(("Child_7006504", GenotypeType.HOM_VAR))) shouldBe false + hasGenotype(record, List(("Child_7006504", GenotypeType.HOM_REF))) shouldBe false + hasGenotype(record, List(("Child_7006504", GenotypeType.NO_CALL))) shouldBe false + hasGenotype(record, List(("Child_7006504", GenotypeType.MIXED))) shouldBe false - hasGenotype(record, List("Mother_7006508:HET")) shouldBe false - hasGenotype(record, List("Mother_7006508:HOM_VAR")) shouldBe false - hasGenotype(record, List("Mother_7006508:HOM_REF")) shouldBe true - hasGenotype(record, List("Mother_7006508:NO_CALL")) shouldBe false - hasGenotype(record, List("Mother_7006508:MIXED")) shouldBe false + hasGenotype(record, List(("Mother_7006508", GenotypeType.HET))) shouldBe false + hasGenotype(record, List(("Mother_7006508", GenotypeType.HOM_VAR))) shouldBe false + hasGenotype(record, List(("Mother_7006508", GenotypeType.HOM_REF))) shouldBe true + hasGenotype(record, List(("Mother_7006508", GenotypeType.NO_CALL))) shouldBe false + hasGenotype(record, List(("Mother_7006508", GenotypeType.MIXED))) shouldBe false - hasGenotype(record, List("Mother_7006508:HOM_REF", "Child_7006504:HET")) shouldBe true - hasGenotype(record, List("Mother_7006508:HET", "Child_7006504:HOM_HET")) shouldBe false + hasGenotype(record, List(("Mother_7006508", GenotypeType.HOM_REF), ("Child_7006504", GenotypeType.HET))) shouldBe true + hasGenotype(record, List(("Mother_7006508", GenotypeType.HET), ("Child_7006504", GenotypeType.HOM_REF))) shouldBe false } } -- GitLab