Skip to content
Snippets Groups Projects
Commit f9aadab3 authored by van den Berg's avatar van den Berg
Browse files

Add support for parital calls

parent d6a8d0e5
No related branches found
No related tags found
2 merge requests!6Merge testing into master,!5Merge new testing code into devel
Pipeline #2780 failed
File added
File added
......@@ -184,3 +184,79 @@ def test_haploid_call_file():
match='Non-diploid variants are not supported'):
site_concordancy(call, positive, call_samples=['BLANK'],
positive_samples=['NA12878'])
@pytest.fixture(scope='module')
def partial_call_file():
""" Test statistics when the call vcf contains partial variants """
filename = 'tests/cases/gatk.vcf.gz'
partial = 'tests/cases/gatk_partial_call.vcf.gz'
call = VCF(partial, gts012=True)
positive = VCF(filename, gts012=True)
d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
positive_samples=['BLANK'], min_dp=0, min_gq=0)
return d
def test_partial_call_total_sites(partial_call_file):
assert partial_call_file['total_sites'] == 37
def test_partial_call_alleles_hom_ref_concordant(partial_call_file):
assert partial_call_file['alleles_hom_ref_concordant'] == 0
def test_partial_call_alleles_het_concordant(partial_call_file):
assert partial_call_file['alleles_het_concordant'] == 0
def test_partial_call_alleles_hom_alt_concordant(partial_call_file):
assert partial_call_file['alleles_hom_alt_concordant'] == 0
def test_partial_call_alleles_concordant(partial_call_file):
assert partial_call_file['alleles_concordant'] == 6
def test_partial_call_alleles_discordant(partial_call_file):
assert partial_call_file['alleles_discordant'] == 0
def test_partial_call_alleles_no_call(partial_call_file):
assert partial_call_file['alleles_no_call'] == 68
@pytest.fixture(scope='module')
def partial_positive_file():
""" Test statistics when the call vcf contains partial variants """
filename = 'tests/cases/gatk.vcf.gz'
partial = 'tests/cases/gatk_partial_call.vcf.gz'
call = VCF(filename, gts012=True)
positive = VCF(partial, gts012=True)
d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
positive_samples=['BLANK'], min_dp=0, min_gq=0)
return d
def test_partial_positive_total_sites(partial_positive_file):
assert partial_positive_file['total_sites'] == 6
def test_partial_positive_hom_ref_concordant(partial_positive_file):
assert partial_positive_file['alleles_hom_ref_concordant'] == 0
def test_partial_positive_het_concordant(partial_positive_file):
assert partial_positive_file['alleles_het_concordant'] == 0
def test_partial_positive_hom_alt_concordant(partial_positive_file):
assert partial_positive_file['alleles_hom_alt_concordant'] == 0
def test_partial_positive_concordant(partial_positive_file):
assert partial_positive_file['alleles_concordant'] == 6
def test_partial_positive_no_call(partial_positive_file):
assert partial_positive_file['alleles_no_call'] == 6
......@@ -34,7 +34,10 @@ def parse_variants(ref: str, call: List[str], pos: List[str],
# Here we count all alleles independently, also for the concordant calls
for i, j in zip(call_variant, pos_variant):
if i == j:
# If one of the variants is partial no call
if i == '.' or j == '.':
results['alleles_no_call'] += 1
elif i == j:
results['alleles_concordant'] += 1
else:
results['alleles_discordant'] += 1
......@@ -143,7 +146,8 @@ def site_concordancy(call_vcf: VCF,
p_gt = pos_record.gt_types[p_s]
c_gt = call_record.gt_types[c_s]
# Is there missing data in one of the vcf files?
# Is one of the sites no call. This is only for fully no call
# sites. Partial no call site are handled in parse_variants
if p_gt == 3 or c_gt == 3:
d['alleles_no_call'] += 2
continue
......@@ -188,9 +192,13 @@ def site_concordancy(call_vcf: VCF,
pos_alleles = [pos_record.REF] + pos_record.ALT
cal_alleles = [call_record.REF] + call_record.ALT
# The third item in pos is a boolean indicating phasing
pos_gt = [pos_alleles[x] for x in pos[:2]]
cal_gt = [cal_alleles[x] for x in cal[:2]]
# The third item in pos is a boolean indicating phasing, so we
# don't use that
# No call is indicated by -1 in pyvcf2, so here we convert negative
# values to '.', which is what is used in the vcf to indicate no
# call
pos_gt = [pos_alleles[x] if x >= 0 else '.' for x in pos[:2]]
cal_gt = [cal_alleles[x] if x >= 0 else '.' for x in cal[:2]]
# Parse the genotypes and add the results into d
# We also need to know the reference call to determine if variants
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment