Skip to content
Snippets Groups Projects
Commit c2d64d27 authored by van den Berg's avatar van den Berg
Browse files

Add test cases for truncated call vcf

There are two ways in which a variant can be no call. The variant can be
present in the vcf file with a ./. genotype, or the variant can be
missing from the vcf file altogether. This commit adds a test case to
verify that no call sites are counted properly when they are missing
from the called vcf file.

Sites that are missing from the positive vcf file do not count towards
alleles_no_call, since we do not care about variants which are not
present in the positive vcf file.
parent 895f7541
No related branches found
No related tags found
2 merge requests!6Merge testing into master,!5Merge new testing code into devel
Pipeline #2772 failed
File added
File added
......@@ -94,5 +94,42 @@ def NA12878_BLANK():
return d
# def test_no_call2(NA12878_BLANK):
# assert NA12878_BLANK['alleles_no_call'] == 8
@pytest.fixture(scope='module')
def NA12878_call_truncated():
""" When the call set is truncated, i.e. is missing variants which are
present in the positive vcf file """
filename = 'tests/cases/gatk.vcf.gz'
truncated = 'tests/cases/gatk_truncated.vcf.gz'
call = VCF(truncated, gts012=True)
positive = VCF(filename, gts012=True)
d, disc = site_concordancy(call, positive, call_samples=['NA12878'],
positive_samples=['BLANK'],
min_gq=30, min_dp=20)
return d
def test_truncated_called_no_call(NA12878_call_truncated):
""" Variants which are missing from the call vcf count towards
alleles_no_call """
assert NA12878_call_truncated['alleles_no_call'] == 12
@pytest.fixture(scope='module')
def NA12878_positive_truncated():
""" When the known set is truncated, i.e. the called vcf file contains
variants which are absent from the positive vcf file """
filename = 'tests/cases/gatk.vcf.gz'
truncated = 'tests/cases/gatk_truncated.vcf.gz'
call = VCF(filename, gts012=True)
positive = VCF(truncated, gts012=True)
d, disc = site_concordancy(call, positive, call_samples=['NA12878'],
positive_samples=['BLANK'],
min_gq=30, min_dp=20)
return d
def test_truncated_known_no_call(NA12878_positive_truncated):
""" Variants which are missing from the known vcf do not count towards
alleles_no_call """
assert NA12878_positive_truncated['alleles_no_call'] == 0
......@@ -83,6 +83,10 @@ def site_concordancy(call_vcf: VCF,
and it_record.ALT == pos_record.ALT):
same.append(it_record)
# If the variant is not present in the call vcf
if len(same) == 0:
d['alleles_no_call'] += 2
if len(same) != 1:
continue
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment