From 34801b917bededdf2d86367680fd59b2d6e7199c Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <RedmarvandenBerg@lumc.nl> Date: Tue, 22 Oct 2019 09:54:57 +0200 Subject: [PATCH] Add comment for decomposed variants --- tests/test_evaluate.py | 6 +++--- vtools/evaluate.py | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index e4afd85..dbb3cb5 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -117,7 +117,7 @@ def test_truncated_called_no_call(NA12878_call_truncated): @pytest.fixture(scope='module') def NA12878_positive_truncated(): - """ When the known set is truncated, i.e. the called vcf file contains + """ When the positive set is truncated, i.e. the called vcf file contains variants which are absent from the positive vcf file """ filename = 'tests/cases/gatk.vcf.gz' truncated = 'tests/cases/gatk_truncated.vcf.gz' @@ -129,7 +129,7 @@ def NA12878_positive_truncated(): return d -def test_truncated_known_no_call(NA12878_positive_truncated): - """ Variants which are missing from the known vcf do not count towards +def test_truncated_positive_no_call(NA12878_positive_truncated): + """ Variants which are missing from the positive vcf do not count towards alleles_no_call """ assert NA12878_positive_truncated['alleles_no_call'] == 0 diff --git a/vtools/evaluate.py b/vtools/evaluate.py index f42dc15..891c450 100644 --- a/vtools/evaluate.py +++ b/vtools/evaluate.py @@ -66,6 +66,7 @@ def site_concordancy(call_vcf: VCF, } discordant_count = 0 discordant_records = list() + for pos_record in positive_vcf: d['total_sites'] += 1 query_str = "{0}:{1}-{2}".format( @@ -76,7 +77,13 @@ def site_concordancy(call_vcf: VCF, it = call_vcf(query_str) same = [] + # If the vcf file has been decomposed, there can be multiple sites with + # the same CHROM and POS, which is why we have to iterate over all the + # sites that are returned for it_record in it: + # We only want to consider sites that have the same CHROM, POS, REF + # and ALT as the positive vcf, since otherwise we might consider + # A/T and A/G as identical since they are both heterozygous if (it_record.CHROM == pos_record.CHROM and it_record.POS == pos_record.POS and it_record.REF == pos_record.REF -- GitLab