Commit f9aadab3 authored by van den Berg's avatar van den Berg

Add support for parital calls

parent d6a8d0e5
Pipeline #2780 failed with stage
in 22 seconds
......@@ -184,3 +184,79 @@ def test_haploid_call_file():
match='Non-diploid variants are not supported'):
site_concordancy(call, positive, call_samples=['BLANK'],
positive_samples=['NA12878'])
@pytest.fixture(scope='module')
def partial_call_file():
""" Test statistics when the call vcf contains partial variants """
filename = 'tests/cases/gatk.vcf.gz'
partial = 'tests/cases/gatk_partial_call.vcf.gz'
call = VCF(partial, gts012=True)
positive = VCF(filename, gts012=True)
d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
positive_samples=['BLANK'], min_dp=0, min_gq=0)
return d
def test_partial_call_total_sites(partial_call_file):
assert partial_call_file['total_sites'] == 37
def test_partial_call_alleles_hom_ref_concordant(partial_call_file):
assert partial_call_file['alleles_hom_ref_concordant'] == 0
def test_partial_call_alleles_het_concordant(partial_call_file):
assert partial_call_file['alleles_het_concordant'] == 0
def test_partial_call_alleles_hom_alt_concordant(partial_call_file):
assert partial_call_file['alleles_hom_alt_concordant'] == 0
def test_partial_call_alleles_concordant(partial_call_file):
assert partial_call_file['alleles_concordant'] == 6
def test_partial_call_alleles_discordant(partial_call_file):
assert partial_call_file['alleles_discordant'] == 0
def test_partial_call_alleles_no_call(partial_call_file):
assert partial_call_file['alleles_no_call'] == 68
@pytest.fixture(scope='module')
def partial_positive_file():
""" Test statistics when the call vcf contains partial variants """
filename = 'tests/cases/gatk.vcf.gz'
partial = 'tests/cases/gatk_partial_call.vcf.gz'
call = VCF(filename, gts012=True)
positive = VCF(partial, gts012=True)
d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
positive_samples=['BLANK'], min_dp=0, min_gq=0)
return d
def test_partial_positive_total_sites(partial_positive_file):
assert partial_positive_file['total_sites'] == 6
def test_partial_positive_hom_ref_concordant(partial_positive_file):
assert partial_positive_file['alleles_hom_ref_concordant'] == 0
def test_partial_positive_het_concordant(partial_positive_file):
assert partial_positive_file['alleles_het_concordant'] == 0
def test_partial_positive_hom_alt_concordant(partial_positive_file):
assert partial_positive_file['alleles_hom_alt_concordant'] == 0
def test_partial_positive_concordant(partial_positive_file):
assert partial_positive_file['alleles_concordant'] == 6
def test_partial_positive_no_call(partial_positive_file):
assert partial_positive_file['alleles_no_call'] == 6
......@@ -34,7 +34,10 @@ def parse_variants(ref: str, call: List[str], pos: List[str],
# Here we count all alleles independently, also for the concordant calls
for i, j in zip(call_variant, pos_variant):
if i == j:
# If one of the variants is partial no call
if i == '.' or j == '.':
results['alleles_no_call'] += 1
elif i == j:
results['alleles_concordant'] += 1
else:
results['alleles_discordant'] += 1
......@@ -143,7 +146,8 @@ def site_concordancy(call_vcf: VCF,
p_gt = pos_record.gt_types[p_s]
c_gt = call_record.gt_types[c_s]
# Is there missing data in one of the vcf files?
# Is one of the sites no call. This is only for fully no call
# sites. Partial no call site are handled in parse_variants
if p_gt == 3 or c_gt == 3:
d['alleles_no_call'] += 2
continue
......@@ -188,9 +192,13 @@ def site_concordancy(call_vcf: VCF,
pos_alleles = [pos_record.REF] + pos_record.ALT
cal_alleles = [call_record.REF] + call_record.ALT
# The third item in pos is a boolean indicating phasing
pos_gt = [pos_alleles[x] for x in pos[:2]]
cal_gt = [cal_alleles[x] for x in cal[:2]]
# The third item in pos is a boolean indicating phasing, so we
# don't use that
# No call is indicated by -1 in pyvcf2, so here we convert negative
# values to '.', which is what is used in the vcf to indicate no
# call
pos_gt = [pos_alleles[x] if x >= 0 else '.' for x in pos[:2]]
cal_gt = [cal_alleles[x] if x >= 0 else '.' for x in cal[:2]]
# Parse the genotypes and add the results into d
# We also need to know the reference call to determine if variants
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment