diff --git a/tests/cases/gatk_truncated.vcf.gz b/tests/cases/gatk_truncated.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..f07ffaed0445f1c1e63206f517df0dda21f53bca Binary files /dev/null and b/tests/cases/gatk_truncated.vcf.gz differ diff --git a/tests/cases/gatk_truncated.vcf.gz.tbi b/tests/cases/gatk_truncated.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..38bcbf2d56e47126019c1119e3af7644b4e7a82e Binary files /dev/null and b/tests/cases/gatk_truncated.vcf.gz.tbi differ diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index c4e56c0a2ac189363850d26e848d9aa2cd606e16..e4afd85c2b2d573d82deaea7e016eaa27c83de7a 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -94,5 +94,42 @@ def NA12878_BLANK(): return d -# def test_no_call2(NA12878_BLANK): -# assert NA12878_BLANK['alleles_no_call'] == 8 +@pytest.fixture(scope='module') +def NA12878_call_truncated(): + """ When the call set is truncated, i.e. is missing variants which are + present in the positive vcf file """ + + filename = 'tests/cases/gatk.vcf.gz' + truncated = 'tests/cases/gatk_truncated.vcf.gz' + call = VCF(truncated, gts012=True) + positive = VCF(filename, gts012=True) + d, disc = site_concordancy(call, positive, call_samples=['NA12878'], + positive_samples=['BLANK'], + min_gq=30, min_dp=20) + return d + + +def test_truncated_called_no_call(NA12878_call_truncated): + """ Variants which are missing from the call vcf count towards + alleles_no_call """ + assert NA12878_call_truncated['alleles_no_call'] == 12 + + +@pytest.fixture(scope='module') +def NA12878_positive_truncated(): + """ When the known set is truncated, i.e. the called vcf file contains + variants which are absent from the positive vcf file """ + filename = 'tests/cases/gatk.vcf.gz' + truncated = 'tests/cases/gatk_truncated.vcf.gz' + call = VCF(filename, gts012=True) + positive = VCF(truncated, gts012=True) + d, disc = site_concordancy(call, positive, call_samples=['NA12878'], + positive_samples=['BLANK'], + min_gq=30, min_dp=20) + return d + + +def test_truncated_known_no_call(NA12878_positive_truncated): + """ Variants which are missing from the known vcf do not count towards + alleles_no_call """ + assert NA12878_positive_truncated['alleles_no_call'] == 0 diff --git a/vtools/evaluate.py b/vtools/evaluate.py index edb3deff1838243114cc4ad1dcb0fd52cfac4751..f42dc15abae38169ac1915bf9bc0889191445adc 100644 --- a/vtools/evaluate.py +++ b/vtools/evaluate.py @@ -83,6 +83,10 @@ def site_concordancy(call_vcf: VCF, and it_record.ALT == pos_record.ALT): same.append(it_record) + # If the variant is not present in the call vcf + if len(same) == 0: + d['alleles_no_call'] += 2 + if len(same) != 1: continue