From 955802ec1aa07a576df4e07be03382a00b649f83 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <RedmarvandenBerg@lumc.nl> Date: Mon, 4 Nov 2019 11:27:32 +0100 Subject: [PATCH] Add test cases for missing ALT in call vcf When a vcf file contains calls for all specified positions, the ALT field will be empty if there is only a reference call in that position. This commits adds tests for this case. --- tests/cases/gatk_no_alt.vcf.gz | Bin 0 -> 3364 bytes tests/cases/gatk_no_alt.vcf.gz.tbi | Bin 0 -> 171 bytes tests/test_evaluate.py | 48 +++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 tests/cases/gatk_no_alt.vcf.gz create mode 100644 tests/cases/gatk_no_alt.vcf.gz.tbi diff --git a/tests/cases/gatk_no_alt.vcf.gz b/tests/cases/gatk_no_alt.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..2eadfc08e074500ef0aaa88fa1f923f157847836 GIT binary patch literal 3364 zcmV+<4cqb`iwFb&00000{{{d;LjnK?4YgU@a@$4{eP+G_6+i5zibCdofmXXETaoQr z%hqj^s=QGoB(Xvf3`yD1e*K=F0V#k3iDXjyAPX|VnK|gwr%yNDyty4u%G+xGFjyY_ z{O<It%K3M1-kg59JUhNV`p3tUqvMnQ=f{Vea(<Neub*b+(Z}hsyesGZlX5Yfk7vtq zH9h+Ncrqy`<6$-`XUqFVh6S_beVNV6+iWl$WrN8QKe(k_WaDZ0sG5(;c{Uj@mgVUC zf428LIbZF`U)yswn^%8~adO&dFp8UJf2Zq3_HB&ovx_64HTH^s-ry#>eUT03Wd`KS za$b&p*xdg8#m4rhlWMSRZ~nfVR!e-AT|5pZ<K<Jb#`VS;SIhZ$de^<k=9kYut-k#J z75twmYLoGQ%TYf&zXy<m#c(hwM|h~eAsF{nHHwkAtG5B{1s>Avc#50qFd*jHVpa~v zx8vb})(vO*`1$lpjQ+dTNshNgzs!c!<8(<2uQ%+6{ma4dK0E$&T~naH+Ium0Sn&V{ zYf#~N`_ql>J0ekUemXCIe=MiNr@y@C=UvEmT`dQbY#MINs@o>Jhjcx9ehBU5-wqb# z#pPi7+tuU4cAEW)m{!EqdG(O}GM)^pKdNaq$JevP<3k22FKPE<<k!!OJiA1`-C%?4 z_rS{W=H@4FS6i~xmV*sWj&80#pJ!(!-L<$M&o&nL3z>PCEvkq39}A@6Q+7Z2T4pz8 zIbEmX#%5PrPx}kL1%-K{hfs%iMqwCL-=^_;^skLY%gJK<b3dIAhIB>iUWNm|tES7r zc#0g(u4aR2xWHN^-L>(QPp`Ib&`Q|Z!Pmj$vCJM9wCd@2abM2=w#dpqaEKV=Wrb(B zEb-@JIVVNE84osg{c!m>-6c4m2J<^wk^qhG9;?Si_Mv*HX4T~B0bIr^1p4Q}d^|ul z*V*-Pn*9U?#pfsEc{xml;=^Dz33R_AzHY~l-4~R6NZ57k#-K~k>A^C4s0j79DqECy z^YRYTgeP;q+GGRz$LY<yEJwKIT~*%RZr}a+3lJ@QV>}#}01HLHlL@>X#gwCStrEGV ztFv-`5DAujy9a}Z)x!)$1*Pr|ETOaGn;!=A(bK^%<#>wY+##M2+GU5zp6yU@F3+yA zv%zdeXKvP<#v*KY;o4X!`?Z``k?5O??k!cP+okGjEmc7q9_pjKAXyhD+qYhk6{NZy zkHD$&#}%-fCuAwar@bg4J+Bu34;gM@n^hn+$;c{Q-+j3Ja@ITla@G5I(!2cWw1*Pa zyZF!Xr{3wuPuD+P_UMScXa#z2KOKMm_jkklxez@iv=GvZ-h3gPwA@P9d*91@!fT#; zjjQ8#N8<4K^oZqL$;0E%M_e5?iw!=IRv(_cJv=!-;+mPmldD5gLwvy4;ptWQ%??RN zj_|9M&_`UzTx)I)gQb$W7r0`$@2s)Dar*G$<Vfe99kw4aKGdG|-d`8TC&kHm@&2MX z|I}kYvcmN7Q*f6vRj@vHeZhGXv*8k$IhWcg79ewOIM11km`%j2gzv)4O)i{YgXXRY zT5bicM6PWO8rRAspheC&BhWs_eH)+;cy1-{Ld|7g72Fm^<VF=n_E}$e)5L8x1@#?% zq{+RDV0i?~Ebjt)z*X*yiBN@YLgm|_QcSV0BB<QkTt=v(0rh~h+(oe9m`=gkTqxCr zR+v67ge^c-<`8Ag``WSwTFF?9vhmz9qk6}{YZ3HZ*VJ|qx$Z`8awg0w%1X-DsLhio zGla?c2;Cu|Dv9y|cz|aJ<SO?`m|ZS>CvJ|Y7F-m_4E&}TYuiF?Euq7YRyl~WON{Xh zjI2Y<_ex>x;gpOpjatFvZJ3ICf(g97gUv$tPMj8O7QFBxx2o{OW-VnCsbKzDxmfR- z(pYP4a_4u6FSx1Qvvm2rsLcg<D_G280SekQz6h%d+69hFo{E=|oxo+^6u$7j;93DR znA8HzIe}(OX<QLz<cH3+*hQ^z5bf*>n77L{LnyRSE6kdum?0}jn!st71RH6fZ5Lz@ zz4nCwe+)8Ns6O<y30j3nbMK(nBIvboNHB>14swl11*CN%Lyq<Wtr2K%Yyi#hWYZd@ za%DxVQ6^C^pG$8#`Q4WV2(FPog$`QXlxpdiUXxqspxN9=XsFyVqor+>s8{Q<*u-oh zx*%GNGh|SAR48WxWCJlxnN4cj1)1tHH-+>$^9Z!`ePwkMwO2v1jaN2En2^TsoU>i> zS>=X_XUO}7S-KpEW0Y}iQn`kVMJK`$X1R%3LZmvg+4pt9RY>PhktJ(tm;$|471?U+ zyHjNnB<yMz%azt8ODfFx%6)WLF8ClVZ7tY|=Q@BVak^W#Bup24nk$s)Bh*ctN)ny9 zWAH9A$d<-`=c2|dBVGYi-E8LmIkdvOCe<lq>?6s-)R0+fxF97$M#JS3)sk4-bpf`0 z4$(3cTcr(2wQrJH86^IE0m)KK$*9#v!SWiui&o2=bN^?hTGOFJ-h$2yPNXY?MWi#X zk`yMT?(~?oQrqacgzZb+EKiCOcfOF$qd42ZYXlsO*{zpoB(U;&&nT;0t6H?+8cix% z$Qt+)f(*glM2r&7Uns};DIACaz`oxqOqxq-o;cHvljM;|E0p!z*CEysu++9&vAj~x zA$I#J7Ke3;bqVo^iELUS*3nrbqiu)(kgT&XIRJK{80%P!C2L`}_PJrC6)46UIkvJO zs?KF+IxPsOCEN=g<5t_{JjfbKFe(#P@=1_&jw=09DnoE=r*^?{K)VErHA=FlR93cy z8O_ofvyOrG#JHV^ZOG&9n92#U(Wx<|YRHbtZQ3I{9f~nJ=p@W2(4w<PPBU4FCS(_l zW`wMQdP(EZmY7{2D>%hQF=qQw8!pnb2$>U;I@AlKB*-vCE`=<Nc%gNoe*$PzgKo7H z+KC2e8R862!q!f68))+vKvVbR<g1{)P!Y6CKx^2OPH(*it?Ec4p?QnkUQ<F_Xqhvx zBP81B5@@#w+SX<SNAEHtRPoI-yY+tdS}8QTHpm#dxGs95U>X)%3X{OLIm&zIcmQrh zlOK^AR>UynYs=*{c2}zwnMIiP1rI`U%eydcAA=|w*+?CmWPxhfF$O`K;G>~-Ne<&H zvJ)g)Vt|x13z2P*Du9eFF_pVrng)RfEdoLz_8a5@pkOt3ku551hRMsim-<4oM4fI! z$i6Ei5ANB6$9*&gEtF2J($#n$ItC@s?|HvVV@Sg#r=O>nQZzw}t<;4~Ip-s4**U$G zxl%4Fvq(oD^t|N@eG=C-ZgeBSt%Pgh){6RJZ5ZcVN1Mf!ux35)dA)xCZVP9>#%=cs zwxcFAPPPQRolILLh`-~p2B)xXuDVeR5m4KJTGM1p$`+@!iQ;S^STc&Ug5_NM>DP72 zhs<7Me${LxMc6yE={)*r@JppC<#Q!j3^k~eU^cE>r6OimpsBQBR*lZ7Ww(GjsIx|R zh4Zl_SqGSWr&)FMLo3*7zna9k0W8a6Q9|FFCN^66IQ$?3n3!r<lNW-m3Amy$AG9$J z=}oH^130x3)<}*Vh@MNKNbWWOq1k_9M!zR+*i30v>=-#LXFBLI=txcMC9F!rt-A*x z6mCbTsg#j=Be87@vn7gSYo)X^aV~&%!m!;&^-y~EQm{AmOeGwb$mNymaum%}&TMpD zoJRSH>yqL{jt9`tOGUOzg5$2uaT~@y%rTFiIaDq3+&Ir}gL+QW>pvH2h^EmxG8|f> z(vAXPb?B7g0Ft&5Y1=)fU!x{9W1)7LwuU>l#$0z4#(-?ATKzg^iyFtDG4DCF#}iY| zujaTS#f(NN4n%ku9&CWdV^S|lxGz*FTrfNVbgN*S5Yf;}h#gtYaaLQqlKs4;CUd6w z%9V6X1hBT|HKCJ97aol5Y>)+~Hxs(BoZc^5s#KPV7EDV9B+RMV&dDq2U0FThY610I zTSO|u+=-P;tF(8Z*LcR&@7YA6SO*-*FS=t&0}vq#1Jc-nBHDMNmAx=!fytn>j?;Pc ztMqzfm!1hiW1hPQvF95J8uU0>H%;L+S~?G?n`5frX^55X&R*({g)baPKxNtSK4+@w zMy#^68<A4+`1VQQETEKWHxzU}0xmWKj>tZDi)_CXCUJCH#<mGQGzCufQpItX?R*#0 zF~-e*%$pFklo6s(Gz}no4bw{+W!jF*btA+h_wogVTL*fMN^D><0QgnY)b1(j_b%`t ziAF)UY%;aoz-Rrs?7&%tVhcXNmA8p&Y$rMzv98~xF{}1MXHhtuiH4GS3V2#<8CMZq zT5OSBd>c9!hLE9{Vh1N%6G_E8LBT-?S&I<cuaFw)z>D~10hKIw9d8y;H&m1$mJpH> uB^kgHr2he}6`4CvEC2u>iwFb&00000{{{d;LjnLB00RI3000000001M4~SF% literal 0 HcmV?d00001 diff --git a/tests/cases/gatk_no_alt.vcf.gz.tbi b/tests/cases/gatk_no_alt.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..4a964e0351e4325914edef48315c6a4fdfc84165 GIT binary patch literal 171 zcmb2|=3rp}f&Xj_PR>jWeGG3;@8xX@5MjIUTcc;vjz-yC49Yhg&$5|`h4nn*(tB0S zq_~OU?9WNn{zdg|)zgDqLUxLI&${Wn?s##|?fcJ4XRfnbUMYLL_~qZ~Uw6*sf1IDa j_ud2*&q=t^yQh=%A26iF%nD>+kVkWlGy^l(bsz!&2@5|u literal 0 HcmV?d00001 diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index 26e142d..999c72f 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -362,3 +362,51 @@ def test_parse_variants_concordant(): parse_variants('A', call, pos, results) assert results['alleles_concordant'] == 1 + + +@pytest.fixture(scope='module') +def gatk_no_alt_in_call(): + """ Test statistics when the ALT allele is missing from the called vcf + + The ALT allele has been set to '.' for each variant, and the corresponding + GT has been set to 0/0 to generate valid variants. + """ + filename = 'tests/cases/gatk.vcf.gz' + no_alt = 'tests/cases/gatk_no_alt.vcf.gz' + positive = VCF(filename, gts012=True) + call = VCF(no_alt, gts012=True) + d, disc = site_concordancy(call, positive, call_samples=['BLANK'], + positive_samples=['BLANK'], min_dp=0, min_gq=0) + return d + + +def test_no_alt_call_total_sites(gatk_no_alt_in_call): + assert gatk_no_alt_in_call['total_sites'] == 37 + + +def test_no_alt_call_sites_considered(gatk_no_alt_in_call): + assert gatk_no_alt_in_call['sites_considered'] == 37 + + +def test_no_alt_call_het_concordant(gatk_no_alt_in_call): + assert gatk_no_alt_in_call['alleles_het_concordant'] == 0 + + +def test_no_alt_call_hom_alt_concordant(gatk_no_alt_in_call): + assert gatk_no_alt_in_call['alleles_hom_alt_concordant'] == 0 + + +def test_no_alt_call_hom_ref_concordant(gatk_no_alt_in_call): + assert gatk_no_alt_in_call['alleles_hom_ref_concordant'] == 32 + + +def test_no_alt_call_alleles_concordant(gatk_no_alt_in_call): + assert gatk_no_alt_in_call['alleles_concordant'] == 46 + + +def test_no_alt_call_alleles_discordant(gatk_no_alt_in_call): + assert gatk_no_alt_in_call['alleles_discordant'] == 20 + + +def test_no_alt_call_alleles_no_call(gatk_no_alt_in_call): + assert gatk_no_alt_in_call['alleles_no_call'] == 8 -- GitLab