From 0ab4c7018932a91dd51dc97887cc6c4772840c84 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <RedmarvandenBerg@lumc.nl> Date: Fri, 25 Oct 2019 10:09:49 +0200 Subject: [PATCH] Fixed bug with partial no calls To fix this, we had to rewrite the logic used when comparing variants. As a side effect, 'allele_no_call' is now only counted for partial calls when they occur in the call set. The test case test_partial_positive_no_call was updated to reflect this change. Also fixed an error where the known and called variants were switched when they were being passed to parse_variants. --- tests/test_evaluate.py | 2 +- vtools/evaluate.py | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index e62dda0..26e142d 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -261,7 +261,7 @@ def test_partial_positive_concordant(partial_positive_file): def test_partial_positive_no_call(partial_positive_file): - assert partial_positive_file['alleles_no_call'] == 6 + assert partial_positive_file['alleles_no_call'] == 0 @pytest.fixture(scope='module') diff --git a/vtools/evaluate.py b/vtools/evaluate.py index 0369ff0..c5a657b 100644 --- a/vtools/evaluate.py +++ b/vtools/evaluate.py @@ -16,16 +16,16 @@ def parse_variants(ref: str, call: List[str], pos: List[str], results: Dict[str, int]): """ Parse the variants and add to results """ - call_variant = sorted(call) - pos_variant = sorted(pos) + call_variant = set(call) + pos_variant = set(pos) # The types of concordant calls are counted separately if call_variant == pos_variant: # These variants are homozygous reference - if call_variant[0] == call_variant[1] and call_variant[0] == ref: + if len(call_variant) == 1 and next(iter(call_variant)) == ref: results['alleles_hom_ref_concordant'] += 2 # These variants are heterozygous, since they have different calls - elif call_variant[0] != call_variant[1]: + elif len(call_variant) > 1: results['alleles_het_concordant'] += 2 # If they are not homozygous reference, and not heterozygous, they must # be homozygous alternative, for whichever alt allele they have @@ -33,12 +33,14 @@ def parse_variants(ref: str, call: List[str], pos: List[str], results['alleles_hom_alt_concordant'] += 2 # Here we count all alleles independently, also for the concordant calls - for i, j in zip(call_variant, pos_variant): - # If one of the variants is partial no call - if i == '.' or j == '.': + for allele in call: + if allele == '.': results['alleles_no_call'] += 1 - elif i == j: + elif allele in pos: results['alleles_concordant'] += 1 + # We cannot match an A/A call with A/G, so we need to remove the + # A call from the positive set once we have 'used' it + pos.remove(allele) else: results['alleles_discordant'] += 1 @@ -204,7 +206,7 @@ def site_concordancy(call_vcf: VCF, # We also need to know the reference call to determine if variants # are hom ref. For this, we take the positive vcf REF call to be # the truth - parse_variants(pos_record.REF, pos_gt, cal_gt, d) + parse_variants(pos_record.REF, cal_gt, pos_gt, d) # The current variant is discordant if d['alleles_discordant'] > discordant_count: -- GitLab