From b63f7cb09a0e6606e49a3971b8fc4f649ca739f9 Mon Sep 17 00:00:00 2001
From: Redmar van den Berg <RedmarvandenBerg@lumc.nl>
Date: Fri, 25 Oct 2019 08:37:45 +0200
Subject: [PATCH] Add test cases where the ALT and REF differ

The actual genotypes are the same between gatk.vcf and
gatk_ref_alt_changed.vcf, but the REF, ALT and corresponding genotype
calls differ. This is to test if the conversion of genotype call
(e.g. 1/3) to the actual genotype (e.g. A/T) is working as intended.
---
 tests/cases/gatk_ref_alt_changed.vcf.gz     | Bin 0 -> 1780 bytes
 tests/cases/gatk_ref_alt_changed.vcf.gz.tbi | Bin 0 -> 172 bytes
 tests/test_evaluate.py                      |  80 ++++++++++++++++++++
 3 files changed, 80 insertions(+)
 create mode 100644 tests/cases/gatk_ref_alt_changed.vcf.gz
 create mode 100644 tests/cases/gatk_ref_alt_changed.vcf.gz.tbi

diff --git a/tests/cases/gatk_ref_alt_changed.vcf.gz b/tests/cases/gatk_ref_alt_changed.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d6e7abc4e9c71e290cd03a48acabdc66a3c8e3f0
GIT binary patch
literal 1780
zcmV<Q1`GKgiwFb&00000{{{d;LjnNT2DO%7bJ|E2#h;Z=(KUJ4ohrH0|D_hoZV?E!
zYi$Pun^eu4j2ci4NE%5b@qYTfErBg0Cg7Ud2b;Fl>f66Q=bq;M`(?Z_%QWAHW&g`?
z{7qo`?*04m=jr`m-v7t#s6QCF54-J?$@|!yKWC<Yo0Mi{a(85kMIL8moF@HW2Ahr9
z#0wOetXvle7DVORpxi7`m_#Vtl<)&nOo8I0dX(mo$<Zb*N)!F^&*Pe-$)P6wS<NiV
z(;qSH+!hL>x;Xl~9T(_(4CDL9K9dq^)qg(0Bs;x8VQvtRmnJvSukG^JkFE0KO&XTR
z#jj10mhcQc?!rx6K6lobx7L`Id7P|HFVcQ_a(8(7`X~GkR<%w1UlX}#vIdZ=Vi9gk
z1c&-Nh_O!7s8(XNF9X;G9MUpQVA5U&&>R(+S;WhD5!!XDz1%*GKiBFH4?7v0Q{SLP
zx=TvC@Jm6rs&B%@8V&B|dkeUS+C{iM_y9K6u)=HkajX0ZCHBR~x%p#flEw31uK93@
z`Q~XEZctK9MCr2W?u8vkuZM8F{Kv2`kJB*ueYV@4x7lx?X==Tir(5(j-Yn7|X@YWi
z9TmGR0xOrc`T+XtOM%f8`t1n{pg$^C#_i&xkB5@vP%><JGJ2XlOwhftvx;?`wHEjb
zo!LW0x~>0FKpQ@zb@<JoCu5SAR%{iUo!#wMcncEd*&f1P-ptAnrQef!9Q~`csM!?f
z&wZSS3p=vwPOA+M)1(aJ1bP_FvM{Lzu$Hm2TBkgooiA|Iu-VnOaI-ULSJ+j@aj`b}
z-wI@Yz$R*q%M{MAH1KCp<~C8E;;>cqX1Yr*5$7(<S9VDY6t8yau0S{GHqFw_^A@}e
zt61nSVIGIj&HL`UP0%MuP<TFybF=6M#Z8!PD!b3DU(fSH^};qEH0+CZG1#Tb>99oG
z)KY&>QDIiOSwS?x$*d1WTIjdQQ*KNIQ--NoF3(qgeE~uX-iQ~m0a!=`IGM_~QQdNO
zUx!9M%GJ!|SGB>=_cb_pk!~}%RN$&xflKV(;mw;ckDjl-nmB>&tUyl)+VVuo-d}Lx
zOz&stKFl(^=jNT$ScF_2T&<PRZzfM`qqi4*b5xDbN7c7|R8`rq-EZYjBI|K<zVyMY
zV5(&tfv3!e1K86ubSZ?-@uHS=k{17u7(QalQZTiRkwd!<Z>FE`oyq5!b31aTpT-Vc
zQO@Ik26xW*_HO=Z>ewwh^;ZGs<K5ul-|rUdoH0i*$r#tn$qVLlk9yp92F~2Qc9@nF
zYw2KTFzmD5VB9B|3f>z$^r`4IuN!#4J=q(5?2RUUDv9ciW<8sS@PLrscvgLqo=uWI
zd>plOpE8c6q^ehasp42O7!jCv-&kMyviCUZORP!n_>keD)Wo@-2ZK>CngrL6!Q{?i
zA6TGV5~zUsn23P5)OQ)BO=?9sP*cjKFNiaA1^`W|q7)NeQ>&U<Fnx-eD$IQSf=&G<
zHb09k7?$218^%I)uo)(lSZs}9UJLt*Vvo}{Hvw+ST@g?(Pz);(DDD!MY1Jh6q?PsB
z8M*_9E3Ew*m)5w%qo;7MsKCBbH7xU*u=E_P)fFlW3rn?!c@4`Nuve5|zs3ch$u6#k
znGkJW`hf;ax$w&iz^=pxr6qNxM;g4GkXmV_sYjFmt>ODGuzMgrmV1l)HN8AduP|Zi
zpta!qXBMX&tqF+b>l)t&Sw%<dD{upxBhU-1J7W8o=@aZcprCC)SpXdZpH^w<9g%xp
zrS!m&0)t&H(HYJ_@x9FSO|1+ye8Fo>B@USM7*kMfF@aZ~@HAt(>uCyh22V2@Xofuz
zXzOXod6Se8{hDE<%CMnBrIZT$`Vxh~q0)a1G5sckQwHv3r0!-0M)bN0gRr7XF>pGk
z-5686gHEBHxvK&lXdO@~02{n?giWc%CPYXW4V}3IT_Uk$m*h5VMEc&JFrN=@0%Axd
z4$P8tnIT-DZ7Ns$9l4<qq<5;LH6*tV7&u5lKQrM%&NiV%r8U()BsT-8hpxS_ln-sD
zFG+3?tAgEkGvv~nxFu+<l~-{SINa`qC9v>VePO9ilxfVh>RwpT%q|bW<r11GkX39q
zQ=9ulzBru8N^7b(gekwUt-05@t$un*vo^B_=?h+~rG_k&CoU^rRmchwwQFu0yPdu!
z0kxMjRRPzSXwaH#S9r2XtwojHN(;~4(I9lCHl#P>N>NNN>1}}(VXy3O)0_Jk&}+G2
zth(V1p;muCp`bT!!*htNPG^e`5^OF5Dyr75ZfQ>VyZ-^<cSr#_6951oiwFb&00000
W{{{d;LjnLB00RI30000000018KX76I

literal 0
HcmV?d00001

diff --git a/tests/cases/gatk_ref_alt_changed.vcf.gz.tbi b/tests/cases/gatk_ref_alt_changed.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..16b661a64744cf268cc76e6ef270345a55668af2
GIT binary patch
literal 172
zcmb2|=3rp}f&Xj_PR>jW{S0qUZ{$7Xz{46K&nh&tkxy-h;EM(4*i1B67<q-A2$F2K
zo}}R=wxBY8)4Y`W?fvOqf`wX}ql%+#rdO8Np4)%=xz+Y#r;GYliajztGyiz4@b5db
j*E&y9@x+aO>R+#6EN5`tF)NUPK_1OL(hSUC=Ya?S8LK)*

literal 0
HcmV?d00001

diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index 15b8e7b..0ae13a2 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -260,3 +260,83 @@ def test_partial_positive_concordant(partial_positive_file):
 
 def test_partial_positive_no_call(partial_positive_file):
     assert partial_positive_file['alleles_no_call'] == 6
+
+
+@pytest.fixture(scope='module')
+def ref_alt_changed_positive():
+    """ Test statistics when the ref and alt have been changed.
+
+    Only the REF and ALT have been changed, and the gt calls for BLANK have
+    been updated to keep the actual genotype the same
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    mixed = 'tests/cases/gatk_ref_alt_changed.vcf.gz'
+    positive = VCF(mixed, gts012=True)
+    call = VCF(filename, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
+                               positive_samples=['BLANK'], min_dp=0, min_gq=0)
+    return d
+
+
+def test_ref_alt_changed_positive_total(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['total_sites'] == 10
+
+
+def test_ref_alt_changed_positive_hom_ref_concordant(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_hom_ref_concordant'] == 2
+
+
+def test_ref_alt_changed_positive_het_concordant(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_het_concordant'] == 12
+
+
+def test_ref_alt_changed_positive_hom_alt_concordant(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_hom_alt_concordant'] == 6
+
+
+def test_ref_alt_changed_positive_concordant(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_concordant'] == 20
+
+
+def test_ref_alt_changed_positive_no_call(ref_alt_changed_positive):
+    assert ref_alt_changed_positive['alleles_no_call'] == 0
+
+
+@pytest.fixture(scope='module')
+def ref_alt_changed_call():
+    """ Test statistics when the ref and alt have been changed.
+
+    Only the REF and ALT have been changed, and the gt calls for BLANK have
+    been updated to keep the actual genotype the same
+    """
+    filename = 'tests/cases/gatk.vcf.gz'
+    mixed = 'tests/cases/gatk_ref_alt_changed.vcf.gz'
+    positive = VCF(filename, gts012=True)
+    call = VCF(mixed, gts012=True)
+    d, disc = site_concordancy(call, positive, call_samples=['BLANK'],
+                               positive_samples=['BLANK'], min_dp=0, min_gq=0)
+    return d
+
+
+def test_ref_alt_changed_call_total(ref_alt_changed_call):
+    assert ref_alt_changed_call['total_sites'] == 37
+
+
+def test_ref_alt_changed_call_hom_ref_concordant(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_hom_ref_concordant'] == 8
+
+
+def test_ref_alt_changed_call_het_concordant(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_het_concordant'] == 12
+
+
+def test_ref_alt_changed_call_hom_alt_concordant(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_hom_alt_concordant'] == 0
+
+
+def test_ref_alt_changed_call_concordant(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_concordant'] == 20
+
+
+def test_ref_alt_changed_call_no_call(ref_alt_changed_call):
+    assert ref_alt_changed_call['alleles_no_call'] == 54
-- 
GitLab