From 34801b917bededdf2d86367680fd59b2d6e7199c Mon Sep 17 00:00:00 2001
From: Redmar van den Berg <RedmarvandenBerg@lumc.nl>
Date: Tue, 22 Oct 2019 09:54:57 +0200
Subject: [PATCH] Add comment for decomposed variants

---
 tests/test_evaluate.py | 6 +++---
 vtools/evaluate.py     | 7 +++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index e4afd85..dbb3cb5 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -117,7 +117,7 @@ def test_truncated_called_no_call(NA12878_call_truncated):
 
 @pytest.fixture(scope='module')
 def NA12878_positive_truncated():
-    """ When the known set is truncated, i.e. the called vcf file contains
+    """ When the positive set is truncated, i.e. the called vcf file contains
     variants which are absent from the positive vcf file """
     filename = 'tests/cases/gatk.vcf.gz'
     truncated = 'tests/cases/gatk_truncated.vcf.gz'
@@ -129,7 +129,7 @@ def NA12878_positive_truncated():
     return d
 
 
-def test_truncated_known_no_call(NA12878_positive_truncated):
-    """ Variants which are missing from the known vcf do not count towards
+def test_truncated_positive_no_call(NA12878_positive_truncated):
+    """ Variants which are missing from the positive vcf do not count towards
     alleles_no_call """
     assert NA12878_positive_truncated['alleles_no_call'] == 0
diff --git a/vtools/evaluate.py b/vtools/evaluate.py
index f42dc15..891c450 100644
--- a/vtools/evaluate.py
+++ b/vtools/evaluate.py
@@ -66,6 +66,7 @@ def site_concordancy(call_vcf: VCF,
     }
     discordant_count = 0
     discordant_records = list()
+
     for pos_record in positive_vcf:
         d['total_sites'] += 1
         query_str = "{0}:{1}-{2}".format(
@@ -76,7 +77,13 @@ def site_concordancy(call_vcf: VCF,
 
         it = call_vcf(query_str)
         same = []
+        # If the vcf file has been decomposed, there can be multiple sites with
+        # the same CHROM and POS, which is why we have to iterate over all the
+        # sites that are returned
         for it_record in it:
+            # We only want to consider sites that have the same CHROM, POS, REF
+            # and ALT as the positive vcf, since otherwise we might consider
+            # A/T and A/G as identical since they are both heterozygous
             if (it_record.CHROM == pos_record.CHROM
                     and it_record.POS == pos_record.POS
                     and it_record.REF == pos_record.REF
-- 
GitLab