Commit 34801b91 authored by van den Berg's avatar van den Berg

Add comment for decomposed variants

parent c2d64d27
Pipeline #2773 failed with stage
in 24 seconds
......@@ -117,7 +117,7 @@ def test_truncated_called_no_call(NA12878_call_truncated):
@pytest.fixture(scope='module')
def NA12878_positive_truncated():
""" When the known set is truncated, i.e. the called vcf file contains
""" When the positive set is truncated, i.e. the called vcf file contains
variants which are absent from the positive vcf file """
filename = 'tests/cases/gatk.vcf.gz'
truncated = 'tests/cases/gatk_truncated.vcf.gz'
......@@ -129,7 +129,7 @@ def NA12878_positive_truncated():
return d
def test_truncated_known_no_call(NA12878_positive_truncated):
""" Variants which are missing from the known vcf do not count towards
def test_truncated_positive_no_call(NA12878_positive_truncated):
""" Variants which are missing from the positive vcf do not count towards
alleles_no_call """
assert NA12878_positive_truncated['alleles_no_call'] == 0
......@@ -66,6 +66,7 @@ def site_concordancy(call_vcf: VCF,
}
discordant_count = 0
discordant_records = list()
for pos_record in positive_vcf:
d['total_sites'] += 1
query_str = "{0}:{1}-{2}".format(
......@@ -76,7 +77,13 @@ def site_concordancy(call_vcf: VCF,
it = call_vcf(query_str)
same = []
# If the vcf file has been decomposed, there can be multiple sites with
# the same CHROM and POS, which is why we have to iterate over all the
# sites that are returned
for it_record in it:
# We only want to consider sites that have the same CHROM, POS, REF
# and ALT as the positive vcf, since otherwise we might consider
# A/T and A/G as identical since they are both heterozygous
if (it_record.CHROM == pos_record.CHROM
and it_record.POS == pos_record.POS
and it_record.REF == pos_record.REF
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment