From d6a8d0e585f833b1c1673a8405718a000173e6f7 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <RedmarvandenBerg@lumc.nl> Date: Thu, 24 Oct 2019 15:58:58 +0200 Subject: [PATCH] Extract genotypes to support conflicting ALT calls --- vtools/evaluate.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/vtools/evaluate.py b/vtools/evaluate.py index 498e1ea..880853b 100644 --- a/vtools/evaluate.py +++ b/vtools/evaluate.py @@ -12,16 +12,17 @@ from types import SimpleNamespace from cyvcf2 import VCF -def parse_variants(call: List[any], pos: List[any], results: Dict[str, int]): +def parse_variants(ref: str, call: List[str], pos: List[str], + results: Dict[str, int]): """ Parse the variants and add to results """ - call_variant = sorted(call[0:2]) - pos_variant = sorted(pos[0:2]) + call_variant = sorted(call) + pos_variant = sorted(pos) # The types of concordant calls are counted separately if call_variant == pos_variant: # These variants are homozygous reference - if call_variant == [0, 0]: + if call_variant[0] == call_variant[1] and call_variant[0] == ref: results['alleles_hom_ref_concordant'] += 2 # These variants are heterozygous, since they have different calls elif call_variant[0] != call_variant[1]: @@ -182,8 +183,20 @@ def site_concordancy(call_vcf: VCF, if pos[2] or cal[2]: raise NotImplementedError('Phased variants are not supported') + # Get the genotyped bases. There is a deprecationWarning when using + # gt_bases directly so we go via the alleles and the gt call + pos_alleles = [pos_record.REF] + pos_record.ALT + cal_alleles = [call_record.REF] + call_record.ALT + + # The third item in pos is a boolean indicating phasing + pos_gt = [pos_alleles[x] for x in pos[:2]] + cal_gt = [cal_alleles[x] for x in cal[:2]] + # Parse the genotypes and add the results into d - parse_variants(pos, cal, d) + # We also need to know the reference call to determine if variants + # are hom ref. For this, we take the positive vcf REF call to be + # the truth + parse_variants(pos_record.REF, pos_gt, cal_gt, d) # The current variant is discordant if d['alleles_discordant'] > discordant_count: -- GitLab