From 0ab4c7018932a91dd51dc97887cc6c4772840c84 Mon Sep 17 00:00:00 2001
From: Redmar van den Berg <RedmarvandenBerg@lumc.nl>
Date: Fri, 25 Oct 2019 10:09:49 +0200
Subject: [PATCH] Fixed bug with partial no calls

To fix this, we had to rewrite the logic used when comparing variants.
As a side effect, 'allele_no_call' is now only counted for partial calls
when they occur in the call set. The test case
test_partial_positive_no_call was updated to reflect this change.

Also fixed an error where the known and called variants were switched
when they were being passed to parse_variants.
---
 tests/test_evaluate.py |  2 +-
 vtools/evaluate.py     | 20 +++++++++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index e62dda0..26e142d 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -261,7 +261,7 @@ def test_partial_positive_concordant(partial_positive_file):
 
 
 def test_partial_positive_no_call(partial_positive_file):
-    assert partial_positive_file['alleles_no_call'] == 6
+    assert partial_positive_file['alleles_no_call'] == 0
 
 
 @pytest.fixture(scope='module')
diff --git a/vtools/evaluate.py b/vtools/evaluate.py
index 0369ff0..c5a657b 100644
--- a/vtools/evaluate.py
+++ b/vtools/evaluate.py
@@ -16,16 +16,16 @@ def parse_variants(ref: str, call: List[str], pos: List[str],
                    results: Dict[str, int]):
     """ Parse the variants and add to results """
 
-    call_variant = sorted(call)
-    pos_variant = sorted(pos)
+    call_variant = set(call)
+    pos_variant = set(pos)
 
     # The types of concordant calls are counted separately
     if call_variant == pos_variant:
         # These variants are homozygous reference
-        if call_variant[0] == call_variant[1] and call_variant[0] == ref:
+        if len(call_variant) == 1 and next(iter(call_variant)) == ref:
             results['alleles_hom_ref_concordant'] += 2
         # These variants are heterozygous, since they have different calls
-        elif call_variant[0] != call_variant[1]:
+        elif len(call_variant) > 1:
             results['alleles_het_concordant'] += 2
         # If they are not homozygous reference, and not heterozygous, they must
         # be homozygous alternative, for whichever alt allele they have
@@ -33,12 +33,14 @@ def parse_variants(ref: str, call: List[str], pos: List[str],
             results['alleles_hom_alt_concordant'] += 2
 
     # Here we count all alleles independently, also for the concordant calls
-    for i, j in zip(call_variant, pos_variant):
-        # If one of the variants is partial no call
-        if i == '.' or j == '.':
+    for allele in call:
+        if allele == '.':
             results['alleles_no_call'] += 1
-        elif i == j:
+        elif allele in pos:
             results['alleles_concordant'] += 1
+            # We cannot match an A/A call with A/G, so we need to remove the
+            # A call from the positive set once we have 'used' it
+            pos.remove(allele)
         else:
             results['alleles_discordant'] += 1
 
@@ -204,7 +206,7 @@ def site_concordancy(call_vcf: VCF,
             # We also need to know the reference call to determine if variants
             # are hom ref. For this, we take the positive vcf REF call to be
             # the truth
-            parse_variants(pos_record.REF, pos_gt, cal_gt, d)
+            parse_variants(pos_record.REF, cal_gt, pos_gt, d)
 
             # The current variant is discordant
             if d['alleles_discordant'] > discordant_count:
-- 
GitLab