diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index e62dda052ee91e9b345d1be7dd8e1b4a49879d7e..26e142d40e58eb5ddc81be748fe133c7630df023 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -261,7 +261,7 @@ def test_partial_positive_concordant(partial_positive_file): def test_partial_positive_no_call(partial_positive_file): - assert partial_positive_file['alleles_no_call'] == 6 + assert partial_positive_file['alleles_no_call'] == 0 @pytest.fixture(scope='module') diff --git a/vtools/evaluate.py b/vtools/evaluate.py index 0369ff02c459182652e2b5a372468e687cc81289..c5a657b7c09d7814a214c31e4443a103b92a6979 100644 --- a/vtools/evaluate.py +++ b/vtools/evaluate.py @@ -16,16 +16,16 @@ def parse_variants(ref: str, call: List[str], pos: List[str], results: Dict[str, int]): """ Parse the variants and add to results """ - call_variant = sorted(call) - pos_variant = sorted(pos) + call_variant = set(call) + pos_variant = set(pos) # The types of concordant calls are counted separately if call_variant == pos_variant: # These variants are homozygous reference - if call_variant[0] == call_variant[1] and call_variant[0] == ref: + if len(call_variant) == 1 and next(iter(call_variant)) == ref: results['alleles_hom_ref_concordant'] += 2 # These variants are heterozygous, since they have different calls - elif call_variant[0] != call_variant[1]: + elif len(call_variant) > 1: results['alleles_het_concordant'] += 2 # If they are not homozygous reference, and not heterozygous, they must # be homozygous alternative, for whichever alt allele they have @@ -33,12 +33,14 @@ def parse_variants(ref: str, call: List[str], pos: List[str], results['alleles_hom_alt_concordant'] += 2 # Here we count all alleles independently, also for the concordant calls - for i, j in zip(call_variant, pos_variant): - # If one of the variants is partial no call - if i == '.' or j == '.': + for allele in call: + if allele == '.': results['alleles_no_call'] += 1 - elif i == j: + elif allele in pos: results['alleles_concordant'] += 1 + # We cannot match an A/A call with A/G, so we need to remove the + # A call from the positive set once we have 'used' it + pos.remove(allele) else: results['alleles_discordant'] += 1 @@ -204,7 +206,7 @@ def site_concordancy(call_vcf: VCF, # We also need to know the reference call to determine if variants # are hom ref. For this, we take the positive vcf REF call to be # the truth - parse_variants(pos_record.REF, pos_gt, cal_gt, d) + parse_variants(pos_record.REF, cal_gt, pos_gt, d) # The current variant is discordant if d['alleles_discordant'] > discordant_count: