From 8fac2dc7918918e2afab066e2c088f726daa57f6 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Fri, 18 Dec 2015 15:48:40 +0100 Subject: [PATCH] Keep incomplete genes with complete features With this change the genbank parser no longer discards incomplete genes directly but keeps them as long as they have complete features annotated. For example, the PIK3R2 gene is annotated on NC_000019.9 (or a slice) as 4973..>22328 with two RNA entries. One of these, however, is complete so it would be a shame to discard the entire gene. --- mutalyzer/parsers/genbank.py | 162 ++++++++++++++++-------------- tests/data/UD_144959560058.gb.bz2 | Bin 0 -> 17122 bytes tests/data/references.yml | 25 +++++ tests/test_parsers_genbank.py | 38 ++++++- 4 files changed, 146 insertions(+), 79 deletions(-) create mode 100644 tests/data/UD_144959560058.gb.bz2 diff --git a/mutalyzer/parsers/genbank.py b/mutalyzer/parsers/genbank.py index 266def8f..65df3512 100644 --- a/mutalyzer/parsers/genbank.py +++ b/mutalyzer/parsers/genbank.py @@ -57,12 +57,14 @@ class GBparser(): """ @todo: documentation """ - def __location2pos(self, location): + def __location2pos(self, location, require_exact=True): """ Convert a location object to a tuple of integers. @arg location: A location object (see the BioPython documentation) @type location: location object + @arg require_exact: Require exact positions. + @type require_exact: bool @return: A tuple of integers @rtype: list @@ -70,10 +72,10 @@ class GBparser(): ret = [] - if not unicode(location.start).isdigit() or \ - not unicode(location.end).isdigit() : - return None - #if + if require_exact: + if not unicode(location.start).isdigit() or \ + not unicode(location.end).isdigit() : + return None ret.append(location.start.position + 1) ret.append(location.end.position) @@ -81,12 +83,14 @@ class GBparser(): return ret #__location2pos - def __locationList2posList(self, locationList): + def __location2posList(self, location, require_exact=True): """ - Convert a list of locations to a list of integers. + Convert a location object to a list of integers. - @arg locationList: A list of locations (see the BioPython documentation) - @type locationList: list (location objects) + @arg location: A location object (see the BioPython documentation) + @type location: location object + @arg require_exact: Require exact positions. + @type require_exact: bool @return: A list (of even length) of integers @rtype: list (integers) @@ -94,25 +98,29 @@ class GBparser(): ret = [] - if not unicode(locationList.location.start).isdigit() or \ - not unicode(locationList.location.end).isdigit() : - return None + if require_exact: + if not unicode(location.start).isdigit() or \ + not unicode(location.end).isdigit() : + return None #if - for i in locationList.sub_features : - if i.ref : # This is a workaround for a bug in BioPython. - ret = None - break - #if - temp = self.__location2pos(i.location) - if temp : - ret.append(temp[0]) - ret.append(temp[1]) + for part in location.parts[::location.strand]: + pos = self.__location2pos(part, require_exact=require_exact) + if not pos: + return None + + ret.append(pos[0]) + ret.append(pos[1]) #if #for + if not ret: + # No subfeatures found, in that case just use the feature itself + # as if it were its only subfeature. + ret = self.__location2pos(location, require_exact=require_exact) + return ret - #__locationList2posList + #__location2posList def _find_mismatch(self, sentences): """ @@ -227,11 +235,20 @@ class GBparser(): accession, int(version), match_version=False)[0] except ncbi.NoLinkError: pass - i.positionList = self.__locationList2posList(i) + i.original_location = i.location + if i.ref: + # This is a workaround for a bug in BioPython. + # But seriously I have no idea for which bug and couldn't find + # any hints in the commit history. So I just copied it over + # with the last changes to this code, but it can probably be + # removed. + i.positionList = None + else: + i.positionList = self.__location2posList(i.location) i.location = self.__location2pos(i.location) #FIXME #if not i.positionList : # FIXME ??? # i.positionList = i.location - if i.positionList or i.location : + if i.positionList : i.usable = True else : i.usable = False @@ -305,6 +322,13 @@ class GBparser(): mrnaList = mrna.positionList if not mrnaList : mrnaList = mrna.location + if not mrnaList : + # If the mRNA doesn't have exact positions (e.g., it's annotated + # at `join(<1..11,214..548,851..4143)`), we still want to use the + # part that is in this reference for matching. + mrnaList = self.__location2posList(mrna.original_location, + require_exact=False) + cdsList = cds.positionList if not cdsList : cdsList = cds.location @@ -493,12 +517,6 @@ class GBparser(): #if if i.qualifiers.has_key("gene") : - if not unicode(i.location.start).isdigit() or \ - not unicode(i.location.end).isdigit(): - # Feature is not completely in reference. Either start - # or end is not a Bio.SeqFeature.ExactPosition. - continue - geneName = i.qualifiers["gene"][0] if i.type == "gene" : if not geneDict.has_key(geneName) : @@ -509,14 +527,6 @@ class GBparser(): myGene.location = self.__location2pos(i.location) geneDict[geneName] = tempGene(geneName) #if - else: - if geneName not in geneDict: - # We should have seen a gene entry for this gene - # by now. Could be that it was skipped because it - # was not completely in reference (see check - # above). In that case we just ignore any of its - # features. - continue #if if i.type in ["mRNA", "misc_RNA", "ncRNA", "rRNA", "tRNA", @@ -537,8 +547,15 @@ class GBparser(): myGene = geneDict[j] self.link(myGene.rnaList, myGene.cdsList) for i in myGene.rnaList : + myRealGene = record.findGene(i.gene) + version = myRealGene.newLocusTag() + # TODO: Here we discard transcripts that are not complete + # in this reference, but it might be nicer to still keep + # them so that we can (for example) show them in the + # legend. Of course they should still not be allowed to be + # selected in the variant description. + # (Same for leftover CDS features below.) if i.usable : - myRealGene = record.findGene(i.gene) if i.locus_tag : # Note: We use the last three characters of the # locus_tag as a unique transcript version id. @@ -550,14 +567,13 @@ class GBparser(): # underscore. Or prepended with a letter. We # really want a number, so 'fix' this by only # looking for a numeric part. + # (Same for leftover CDS features below.) try: version = LOCUS_TAG_VERSION.findall( i.locus_tag)[0].zfill(3) except IndexError: - version = '000' - myTranscript = Locus(version) - else : - myTranscript = Locus(myRealGene.newLocusTag()) + pass + myTranscript = Locus(version) myTranscript.mRNA = PList() myTranscript.mRNA.positionList = i.positionList myTranscript.mRNA.location = i.location @@ -580,38 +596,33 @@ class GBparser(): myRealGene.transcriptList.append(myTranscript) #if #for + + # We now look for leftover CDS entries that were not linked to + # any transcript. We add them and the RNA will be constructed + # for them later. + # This does mean that these transcripts always come last (and + # are shown last in for example the legend). for i in myGene.cdsList : - if not i.linked and \ - (i.usable or not geneDict[myGene.name].rnaList) : + if not i.linked: myRealGene = record.findGene(i.gene) - if i.locus_tag : - # Note: We use the last three characters of the - # locus_tag as a unique transcript version id. - # This is also used to for the protein-transcript - # link table. - # Normally, locus_tag ends with three digits, but - # for some (e.g. mobA on NC_011228, a plasmid) it - # ends with two digits prepended with an - # underscore. Or prepended with a letter. We - # really want a number, so 'fix' this by only - # looking for a numeric part. - try: - version = LOCUS_TAG_VERSION.findall( - i.locus_tag)[0].zfill(3) - except IndexError: - version = '000' + version = myRealGene.newLocusTag() + if i.usable: + if i.locus_tag : + try: + version = LOCUS_TAG_VERSION.findall( + i.locus_tag)[0].zfill(3) + except IndexError: + pass myTranscript = Locus(version) - else : - myTranscript = Locus(myRealGene.newLocusTag()) - myTranscript.CDS = PList() - myTranscript.CDS.positionList = i.positionList - myTranscript.CDS.location = i.location - myTranscript.proteinID = i.protein_id - myTranscript.proteinProduct = i.product - if i.qualifiers.has_key("transl_table") : - myTranscript.txTable = \ - int(i.qualifiers["transl_table"][0]) - myRealGene.transcriptList.append(myTranscript) + myTranscript.CDS = PList() + myTranscript.CDS.positionList = i.positionList + myTranscript.CDS.location = i.location + myTranscript.proteinID = i.protein_id + myTranscript.proteinProduct = i.product + if i.qualifiers.has_key("transl_table") : + myTranscript.txTable = \ + int(i.qualifiers["transl_table"][0]) + myRealGene.transcriptList.append(myTranscript) #if #if #for @@ -655,9 +666,10 @@ class GBparser(): #if #if #else - for i in record.geneList : - if not i.transcriptList : - record.geneList.remove(i) + + # Discard genes for which we haven't constructed any transcripts. + record.geneList = [gene for gene in record.geneList + if gene.transcriptList] return record #create_record diff --git a/tests/data/UD_144959560058.gb.bz2 b/tests/data/UD_144959560058.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..72e9dd0f00e3dda68e2e7d6d21d32719938d67bf GIT binary patch literal 17122 zcmV(-K-|AVT4*^jL0KkKS$<63j{q!Se}I4RAOKqD|NlS#|N6iG|L|aE`_Sp;Wygf7 zs=zfs6j!0`m2rVi$KPN8D5FmS1E2r^004(=jWPhB0001>1b_u3wg3Y_1b_m8pay;K zsXoBK4uL=#JB*iFP&KBF!Ojg1ncWEGkz){0l7nEvmMyV{hK`v~8O8v0wx9v;2}nKa zl=Pw+TQyW0Y=K%5s#KlFS5$4FFtpLxfS?`6q<}8=@3SOYVWHltrLY0@cF+J-YFeW} z0MNVZy#j^ndEf~l06_!<$jF&76VhO6Wj!*OGMP^lAF6mM>VBZp$fYEKBPq2s(U8+X z02u~=00000l=4)R%1l$jMvRRIs2XSh0000000~p_N~x+(YJdO$00EFPWB>pF00@*# z2%A9-04A9UgvO=-r=XrBWMT{u0a8Q&KtN3jX+24h)6{4^Kr{dV00I2J)~EC-`@6b1 z*D!=N{}wT0O}>M;Lz#*xdb8bd-vatd1ce|{P>hmA6<LI0UuL1AUJ$E^p}VA+<g}_2 zDlKx25b~lzvR$%ff$n5PqF!F<C{`3indo5HEoWKG*RJnpj_xBcdEj8NXQ9%Crj(Pd z;y@+>13qoQdjW40eNJzOz}t)0Fi?;$33qYb_V)APXLF><mMshz(J0!6izQ@C>T$7; zq8;2XCRC42M!spf9x1ZzmDbK{5;BEZWm)ZiQSI&^dLE#rDkLR}5oy;1xB^_W%d~7( zz`e6BC9OKnQYhKXj<y*RR_)JAVz$n-I?K03^H+%~{A~sYuhQto2xuHqA6rq+>FLw| z7O<AK6roZ>S4WR;LDSsk{+_S%o5PC!eoB534g4q$Xf}vr3c)|@0=Lxc$Hegp4zN7F zNWVU~Sr@VadJtD6G=ZIdC)GRO5_{=2oV11KxcJ?C{x9439!nD^cRw1IrOJ^NE^A-; zu~K6ie$U&TO(<wL2|JUue|c8^7lN)k-rL67$dYf7pw|1kwk%4TYgQ)z$5NQb44F1H zk5iM4p{1dlgllV(t~f}(oLg%dT5W<v8Nm(y68^w@yHAw?+?TKh4{V6^4}#rty??1} z-kN@1zlWEz>G5H2B`KHyKwBz{`*HVbs)rfZ-K9==rO{jNrSjju0msKY_qP{;L=<MI zk0Xc#me};BGf#|dz6E7(pP!ZmA(+wcXD(t$z?R}K`YUs@Uc$YL(AnzXzWkx|4mTIK zV`UPe_!Q|RoNvq3M50Wu3pTL_nX)qaM%|CCmBs<+#iEpppn**XvY=EToLq7cqE(x+ z*_IhLJ`JKnQ93IEPL&uTgqh)<13XH3Y*36GBD9Kj;y7JIaV?rPSswf^4JkDwRe;o^ z?b<29<gZbpQbz@|)r5!`6T(2raA+Apy4ov*5aRGjB$7!b7jE7gQ}y%vt==a7-p1Yg z|2Mz8f52#VC)^ZaO21<Lfd299A1`}+C$r*N<8AFFIH9F#O48qkr@eDw8h~pTC7W+o zz8l--NNHx;S4&Ou`jbAg#e&i+zFY4w8psm*mp0jL`;M9yUl{#g`+Zz_LHdtd^{tu5 z6@jFB18%2OyQo0WCVM(1;LG-08T9+TFSzdw_INPrvs*Sh<=PT^+T~t8To=WCqY$Mb zN#xKt?B}sA9$-gT6r0AKAWf*qF^RX~`Dyt6Ps{I?VVTIZNC{}mK4xY+&jdrLkV!~q z>_RPnBMy@Qsud|`do(#JMW&3~H?G!;6M%tD8Dy?{8&uU7nRO}&0zIksdGYT<$0hUa zHEeR;xqa*4e7?lQ%^BBiH;+nh4(pV;iD5Rj)kf>LvxyB75q%D@C`#G)?@6$DB;9p% z6R=q*jt4}!39ZFGS2Nz4BNPH%Q?0%~Nxn1P^0-J2bOs%a60=!I=W7@a*kE1EfM6mk z*4?@=Hk!|ud(wxZ6t`rqCU#hm;}H)hhm&*L3=EKQ{pnJbPj-kMh#Btf>7FWF*4Irj zb;Q&wTHNVy(j`MWZqd5f%#{<c-@KQ%sq1%2kw}#B;y?;QOwUEhQZF@_XOu)d1Ih@K znn;%NT!`P_WTuyV(zWJ6(&_Q;--X?E#n2^=j!U7fq|Cx?#`a;8jZaXBlJs(Q9G?5f zGR;Dubnw?Q*sZIq+PQ9{#$8>ktQYcZ_af?e3bV$>DGv)OOOeKTwo)9M1t;3K<MXbm zK32le9jtjVDXCUk(pYl2(^(z|(s}fEf4A4~e}8-58*Jei_Ds7o<(zkFvoa{z)Jx?J zi#hc-9PI^$ZPx(5bL@GKv0bpshnWgkHj@Uf#AdCK&>GxmOdQ9tuMx{`>Z5jU=Hn@C zp-9VXeWRf`o?o^3?)mLw^Urx3J>YHgW4XskoW9&^Iuyl`Un^E22Zx7ta{J+oa7pr$ z9w{)0!d>xXBc>v~8HBLR>R>zqFZO%xq)AU1S{{#AS|{vG*XX$p5!cB9?xJ7UE1)Uj zmu5IKc0!J}i2cs$a$~uOiaa&&%EmfSx$)D%yEZeyH}M&L=z;8}wc1^>x>DHLSc%@Y zhK=@00SqSUh!3;d-jpWT_(v(3eQ*WQWg`Hv#jqOol^v{`Ge9P?(dQi;+`{dsW_;FO z^rJ#StO74J>N#`Tmfry4zq!5>=fpERu<Y>f6s)TY$A2gjW2p1?aOuMCT)^)ccd0)| z==~p`^jr5`ar(2`XznBO$!p#N*PQC;;icC!w#G!HpT;HcdR`^-9PCD|d^bw%w)5cp zFGck6#liN4fj>{~av!m}A;v@>%i%#2$>?AYvXktugiWbmmhjDrPSOxyBp3%kd=jH% zKdV0SBS~6WyQR%F*ajWxXRWJk77~4c_<y2FBz>pZ&-f5$Mqv4a;~RqyQ~gqpeqM84 zWt$I2pAz0sG<)*T(OIS*w)d0jb0~(GDh<mBylHWI<m+<Im2%K(a9cncpWnOl-=DMA z|4&pkf7h;!Z-|fgFOCadvHNlU`>IXGXL`8C@}|;@`&&AE`6X)WjMBJuc>MFmdsFV7 z&gx`4>luq>=w7v_%Q@ejwYi9X80h($X4}6ui|rE2^iQ1Wr5xpU>b2r|ZJNb1!-rJW zFVy#zk<qUWjMoyYal2ZF3h{%Mu48J(EYpo7(Hlx5T`KZ=ZNgPyny;-m#Qn`(A5Hnq zqPl8U4!h@uyy#C(t#zJWusN&i3_jdj5j`qbsmyD9+eV*DQYBo)x*JPL+Z1wFu`Z&o zTo&aPHaIr+KN+sy9`7uV0I9oi9zrG?I2=54kL~-h=JDy}rs;i_bf_~2vmays1Lgm& z$<F`2vai!WzaLNG6W)aXob9HYW*YeZnqrlw@!l;u(`mMvS^vKYSeBLJoi>zIX=+OP zNA%m2tv#pu<(ubiEuK)?;}etUH8YJOoSG#O6ythdM&XO_r}b&Yty=JWe}|KATXW?% zmiueZZFy=oyT#jyc%%^zA;0r!MJHNL-CEXit+&4mZ<|%*&CjCzA6+e2zV*XxSBr40 z;@vuLZK)|sLfl~G6|YAcH%CVG)-PJ|y?Nc`c*%Rp9kS09;`XiW{&eMxM=lt*O?qXT zayU|Ec9$J#6ya=by<(fVQ&wzo!#8dAJxqF^Ti$(-wZIPyl0GK}#FH{qAe+0yt6AQ* zy>Cm>^t~RjC;L(bI2=n1b?epP&&$iJojTf9CUm(rZr=jZ?e21gC*?c5b6+-MyJwDG zZc}P$w)e*zzO<uedr~x%uI)U{^|MANJN$h|{{Qv;d;emp-31%aB8OUZcC%KASadyB zYh>w({?X6UR@e02{+i8|#2AQqQkKhJQ?(wf*m~N`*J<6N^R?QsIm5K#PifY1Xc}{* z-X%G`sMUw8Qgw@zYmU&v6}ZKt7rfXd7p18?ULwPTGMX6}T+$0@X2DruhI?y@PR3eM zooHNPEXBfEwqdP0X0?gc?%|UK6y=AVYidrFh+u3LtV;>~FbD8KAG$A7FwJ&%8QSVn zc!-omt)K+4WgOfaL0l}`8-<XdtA_CCLNs~Uulx(|CGt%ZT{Rnxc+Hn^M3Yr$R^+x( zEV)uvBeuI%xtCh53c3lX%JD7H+3eKR(zpu2mR5uzv6W42y7JAN+9@>#!Z?Qdoeez` zL%eOYG&bB!ODS%$=9bx;#`1{~Ig)h}9v$9LVIX~8=mstE70U|CWm@57em~}V>c+JG zB&Wty(`~j_Ka(joXq>YI+1RN^P}F5MQe`GoX*My6<7<CBT%%0^{L}GGFD)Q7Y9^v; zG<J?jaMSplaZ46;hPq=6sLF|_H0l;PCakHLocF&i@08ilq!LVM%PnyxDps(nnz-#Y zvYMhs9a)LDCcA13QI-n1wDqStUe+$n@w9s7(ZIDYA$$u)=++rXXdBAkGIiym{Tg!D zd=A>?iVHAk%8YHudolSt)w!S>35&A3DX>E-2I@{6{NVkixl@AXpyD)|wEqi8j&{FJ zyJr9A4-L`MJ=1ed&50)2q?oG`O~eiL)|_WJd(urj@Znj~my25RtIAOJoZ{A;MK0}U zp+<t&tUO-POU_ScYPF%-Rw;TcYTB_@RbZ1SWxj;qzLKVx*48k``q7@v(!-!A?wL`R zB4`LxW=y8UlQRr7GMLhA?bkjP)|=wvF|VhJv8^Yn(tHY11P?6)np)PC(tLKF8A+~9 z5|ByUDN4CoRwX)OwX?_~Dpi#kPIIX+kV{7-^avdWmc#oxI@|(|(3Yyv)U2+wrrhI& z_uqxM$xfmPiAwSb<pFzIl$b<nr5Zh{oMN}Dg;46@Czc{2FA<vKG**h%IH0tuPH^E= z1U|UFwZ5-gKkgw!J11hf*0j?PYXiHxxNY->v%IH^ha)I+Dr#!ihtm_5)}?i*P>~u_ zG_ibQSgvVWX6<`5YSWw(yF)xkrOMKjm8(cEddhaa?^cRLS}zxz=+?ZcInmVv@}`#( zlwC>|B=yBOW_@+O(vkU26sHJ>!{{4Ecbx0BrA?us+Hf8*S#6AF*RYc?%ndx|Q?~e) z_NjLQR#4$?^Z%0D7T0cOg%*L5%A8(PwzjNOg!5NS6VegXJG93Nj0EEq(wMB3oy4C` z0VWcTxOTp|<()UJm~q@~9(IY_U8`+fm>rp`j36wbS4X7hdfBG-dPBICfX(fY!UKVb zOs7Rq9_|BO@mWe)*4f2%3*s)&^QK!DIv(0>Y+W^^@|e<Sd10FrvsApLXQZrNmU-S* zr0rW!cZ*iVXF19)>pCDX@ShDl%Nfn8(syaBy{SvnHmfr;$weO%o;A#^mtb}su*w=N z@0{kd#cLJkc9xWt=&eaLas`$*9-^h$jSRZd($f>Yr=1}-B@|M5J5n<{S6TT><{KAM zl!ue3t3BziwY6M#W_Yc$T`P&HE5eqsh<0~sMD@0qoLTJ>i85|#b|fkw(lwlbfrU3{ zx>t;}Pg`Eus92=yD$dfy?P}VNVpN(W(Q4Y#yk3-?QH#aMu{qYz^@p>~-qN*bPL`YA zCsvz2CntD-!&cL_F^l1()MjfjC=0diD5T<3(-W>P_abWYy7PslCV={a;2kr&Yb)n% zGg$!c%|l1p_Py_HO+FaAJSYclVT-}Sj}Gr_=?YFfWi+8;`b4d#3|3+b-muA&ySsF@ zF`DN~-zx9ByLTfvrnl2eQC;<Um?ab*L8_MxGqrcEww2pml@y^#<`huPIq7?5(Uh_9 z0Adn{vU;fY|0DLL_G~JX;c8S^{g&hO2fX#5N+5l-Xb@lTUpgg&)6x4LWAi)(=$|J$ z`0gA&pS<92qlevvuDmX#bMuG5JHB7s2nGLn03rZt+f)eE5)}yZrOO4QPGBTb1*nJ4 zbNzm!%luEad&_TpiFO8r_o8fm@~KKiQl(0%C72(#ftaWbfG3t0&}yro3<rWf+X|nB zsDXkmO@;x84U+8jgZuly$PWF(_aOLXBJKfTf23ePlYxd~0no7NrSQ~T>5Pv9(^1m_ zd3#BP4Mz4C(yvL^-e1Ro&}oL+(8f!->x?^$_P#>ZCR8m$z}y&AmNg;LgD}ZB7T65E zoC$&F0zs!c3@{cJlH;a$W9B<_%wA$R$ARBbzC$6_PMt>ZTzMTb;AJOl{Y4Idx4VR; zadv@Kb%CnEp?5GE0n?a~jE-`L3c|8OtT|3uFAZk@-r!>-14N&~g<&L$SEk!lDN#yC z2#dy|&*guW-}l3zwwcc(4<CTsdk((C^xktPo_>caoJv|x$gum|zJKie=MNJt<E3u3 zl~qXpwH8#jZM1$fwwH6?rg}{W9}+%lsTV&nAEu&)S`CI?y(IOx;!L9JLj<i-?FM)c zQP_1Kr1z6dM~O1uhsPtO<i}Yx;A4g&!h)UQ-yL=rlT3!1?whn2f1BEN7n84+!vSc( zTwyr<b?c9bj9hUgrVpf@<nOS{87NTC%KxpCJIRfOttNeB@-iCgm{1IsyMSf`ds5vN zZuJm3WQp35TdbE-sa^T}Iy<_$^qx<zy;T)RyB(}Ey^@pd?lvAhQ@gHv4{FMjL9q7i zDowRbm|-Qf8+%|rdi3B_oD9Ye1x1G#NxgLFJm@sweC97B3<q9DLmkGb;AAR?7Z?nN zNbEKk#<*pG>^|ck0@fSbft|*qL1EJz{29kiHOymSdGq9EI_5jV_zm#nV%G|W3|x;h zagf4G4DuFOQRP0;b>MB|$2yID#kQfyTK#4@51Bfr`%Sy5lQEiDbR2rroQ_8f1xc=2 zWS+i9Ir>MIJ&(OUs(y6!@6+q=N~-!9Q2UMti8^>4hFNYbJB!Kr>3Q%n8VA7V2{+r+ zeQGj{R~Obx;w+Y}1_OawVbkPb7<u^Sg=9Joon+?w4H^yP&wWLQ&QvJGb{P#t53iq< zewLP~s#jkedwQ0yEH-|>KV?*!zSGlhe;CJsmpn=AJMA<T(fQ&_CH;qqJbi~v*BOs- z(0#Y5GEbZ}!pEIE!nNMQ!^KIExUg<F)MJQIu97+qe@B{wEA<|l^@iOAq_3f%=Hku< zgPcd1fbcNLXo%lY-i8>DSX;isY$WNG#`?+VFw|UWdKZRWI29%XVcTJ#;&qN`oY-tV z1+W+n;_`S-sOw4VVRgkxLm#1Wen*kBTB%v6WY9AhM|r5I*PN<M$C*0bD=s_=liTd_ z_lT+gbrJJeQdJ+f$G4hg^s1#%RXmuE2R|O~s!e{rN5IHX{bD@Cage}zj$(T?7&=YG zs91aTykzh)lTnR^idbqmI`$aDZ@#Iz<#IKfjR!%LnQM3vYzGh(tuf$7h9`0wp;$@! zMw7X-hgSCikHmcTBhR?bPfzGXd=H+U;=X%Sp2K^N<2@wXF^2=-T1p=;C2%P@lTOIX zpxku0{Ey5>QJ-&Z$JU!xFx}3Tn(}k7yvxv5oQ|=Zu*7UCPIDOdl2~6EVCA%wnkBd$ z2cO{Xdwa!HN~KjQ6;(v_ygq&{*@rzX)5W{&Z&5ToMsV%vLZt2Dafi3-n9gO*hpt1X z79C;Q+1z6$8e}~}@@uaG!`&KYn@77P$7<{`8#S=JI>QL-)Mz&slYxz7E<QE5yx(!W za>^=)BV09LGaAWAU6>4(Z8&2g+>Ib%tdiI;%_viDTAm&jMnXank#k?V1rv7gCjG!! z>H;qS8b>wLk+p^+pyC-Ol5xWcIH92Nu*Q426qGj^#;bH5-3GAR>V=14%$oLDgV<g> zj3f2!CU;^pYC9~Pbs6F|81OY71}sNGb%%Z-z|3dzJN`Q0ofrmi&V^W5zTK|rcAbTW zjbzh#D^Ed2vUKyJUs109CG_qrXE4_G-w}YTjHJQ!cx{&*yA3dtO~yHCWWj<q84Pcj z+*28~rWH$yi*3$(;42P;A*jUO!qTLdg$f;{`qXnKeTO3nFVbi&kXlZzg27T@fY5av z1{$`k{cYPm+KdKb{PF9fJ4tBg$i!rv`sba7L&&Z-`+bz8{nVd-eOParxsGWD5hRe2 zff457MCsq;19S?ZS_?}~B;)ZsdJnz_zNt@f?mWrFMwIFNdX0xq$E=)UG&CJ5;_~6- zU_61O`7Gi&AtZ?*BRiy^NW7Ahi%#xn@u>~lKw%_b#&E$qLMCHV{HffpK0s#!4h_9u z!QFAl{ELsxeg$EBH6Ioqk;I)xLFb_R{~c|R=r4tGHOOy<J#!s>+h93Pf}mss1AGn- z`4^Tvxw~4~2z1-K$(-5H*fPhf%QLWN;V9;tvzd~dk4j{jkj;?AfWimnDmI7bwmym( zr?+8otF9wa^f1_eB%0%x=fKp%3ygENm=5E7Ml8i|z_9ihVll0MBEx}0H0(Q%L74ph z!I-~F7N3sY1?#6HA;yKN`wB^oU^Nz@nB_dOlV24jq`jn<Ew)-zm&++GGm`jKRICGh zs-ay+afTb2zT%V9jdJ6Wt^(r;FAs6wtqTiDH`ID@)Yx0wX(qCU<2f>EGsI&jd&$o@ z3`t6<Don<aX*voT$*!ZQ&5O`b&}lI6?qHhCBIZMS86@0TbJlUDl53F6VqcM#<F~I@ z@X1_Hs(!RBQD5M&?c;u)oc7A0&7|{yu*hgKh92Y2zJjtI+za{+cF%F?te!=r_R4hE zA-tV)9lLNg#BIR(9Y#-)N!tC#$6@BV-gXz099JJB#kxs5iw^7scf+>d%4NTrU$b=H z&kdJlt<ScFwqw!Il0r!!$S1foXPeBac$`}S&98$2*kvEn>vvidXUOmCklk^R*nDC9 zH6_P6N5cjB41Za`{x6K~KNsmdkK8i(k6!mqKGJp6cOE^aEauXA^w41=A=BAgj3mzG zVICsRc;kGu&sEJXFiGH0=Iy0P%$N^ZGqAieVk|732J&S?89NNch9jmk+xGv%_oLlI z+FDG8zDIG}*9=1l#9sCt#{>3B<18VJU9W6p)p6aTp_tlhCOHc7rD`9bp@7b8FB5hj zvu{Spi*aDcWHguHd`wLB8_8`qq^UI+qvm!Nj>AE|Soh1GC&p=hx{GlSi8|yk&<MDj z;CZ^B_*$Xek_3dn2Sn%NrtXW0vTvKXazHi98{J)UL6IQ|7({MN`SXF0KEj8@F_YY7 zCsEi`{(8d^)Mxmeha<4gK3!z21;Y4O&sj3eXPC4%A8}G)Crl)iHWMJyh0{vJTrd`t zG#u<bMmb5QS%v$IWO6yvySE+2gQ9bsnYSDcy>J*!hBJ9E<YqUMZZ#i%v4YZAP}iW( z!0eV949DwStEQsK$jRZ@WHXb{Vl5{F0pAUVyGcC;Lq_eEOhyuDDWskC8A~ZuB~bLr zWixi@JB-Fdz1d4iu91U*j5d=S8u~CAiQ3$Y6FdfDgUG;SIU3nI++I-RQ(^b;F~J)N z_!*4`1E{#RLZr(WKc#6WBe=><y~fv0?xDckdb|SQG`!C=2@g83X%LfSDHNJc8~1Cp zskfhdeG~6dq#C=to5^_C)3ER~u)nO<Pi09-eX<x01$7$IDuz}VN3CfktXyI@ABF>H zUmnBE>%Z45=3?5?he<wjX(v1l1&)<FjYd~-VPx1;bdx-jLAJ&eubywjVwm(CdWslj zC8Wo%PDX=0B<eBpDLIo~qj8|9q5!Bq`T?jD-rP8ctFPivyI6yQPuYd9qpg>H+Gm$u z-96v4yI&Reb*g81_8Y^0eQrJn!ov!WhEj3s4%><s?Iv1}*gsFs<B_(a_zu!sXgK&6 z94A3Z%ih>vJT=Z1OG%((-(l+}qmgm8IMkbuDS0&>1rE{Hp~nKk(zvuu_+}_tNrj~A zj505mBLUD<n{MtYFPGq3LE~ZAVlFox^Z9F^kBX|K#C}Ka?UZ+Ywe{_e9^)rrzC#)0 zTzc!b(xp{YPPCKqlD52`ZobLUue>KCL63Tq-`-6=h9kJv(s}tDCT>NcfVB#~OKrCc z<Sm2=%TO^O99wF+>rHO+wmo$7?Aq?mbCC$LY&-qq&_EyuNPeHH3VTFG|L{Phq?l4y zzj58vY-ZXt?VD`v8Id5We814w5*ji3+on|PijlSIds$()(%d^?)2@-KWsK;?-9`{9 zSxP9vrLBNPelqVdm&KJFoQT&9GYH8-!mVqlLnX1c+(wZ#C2ifYViap^M6o3$GtD`K z<0w)CUKwwzwJ%zMHcq1wDKiniI-On3uv~~8$B1W#W=r&apWf%)tfl?`56|w3+V;Hg zeQj1|G&tkFUZd{@SwV%8D9bNaf7j1DF<&}cpG?}%V_G)MI~vg(aocE;NL7=@7q?U; zbgdArKW*0L$($q7tF{DjwsA>wF|n|Yt^|ZknaqpIBaN#SGrMh`)vP&PbZ)YFV>Xc? z3ibUN-w-3g>O2p|Q>7-JiX?PIVO$q7^iIbauQ3=YW(%}z3#J^2C^qYkq`_}kk($PB zN{P?Cyn5eIAez8NAkBN(lpw@cP?5I9x6l?ar$t7WSjal;z~n&U6J}j#(2|<Lsg=o? zmua%hGy7Xufj)=6e7<!c8nVna$7oDtfJBn#Cu0$fmz?Vo6qwoIl=@nDJwY)%(pl>m zMl@9;?h9#aH&}K!2~FpF52W&T9*p_fBR10Z+dDg(c1x6SOA|{SaV~<>zZcnYUy5um zB9O(DhO{Ja4JX&VGrGc6MV`FMwTA>-oJm7MrRXP(J+1czc}Z-c7{n1`Sg<G5+UYii zm*(;4alFPWtc<&8%CNLZZFgC-t6JK|R@7RnVno~!M>w~mFL8HbH@4$R1uXb#LkUSf zeBSpmv7RXsRe|#9nS#`7P9a1(1h5I=I~5+p-DYVCoiHdv%=e+=layaV5#?dRVb<0* z=-cC8Y?6CN%+kcR*rMDu=Ss{wLA(;0l(GcCPojGqke@v%WDksQw(W>pwau~XnMpD4 zTb5hm$wm@>rynysu?0R4Pn=?Gr($bv?J*&A%mmYIiI%mGtV!wlezf~(?|pL^+bb;- zh7PI)Q)~~r&cnI$JCb9>Ye}U@9h(~tW#L%qt8M}(IY@o}p&z>G=jX~u%E6UvnK^)j z6RvtS>Pu*?ph=D;p-68sZ#;RwPivii*5}<_<oH-x4(%E^@XqO>!c0PT8Qq@1?>NkZ zZ#~|etH@ZYM%HZ(WqmNUDwTI*FGWzPCC%MwIm49*>pGV+nUJLz!e7qZeq#PQsq;HF zhBPsCG$t&gI+I#n=L{@lW%_l*BpK~?n;2HD==3g;Pe!)eJ7<pLuEx{idMx`el5RNB zc7{v2-*0!1Z$R2yi>j7bpv!u+jjJgx9(hIAorck6<4-X**4o2CRKulZ;lDNW1zE3q zGaW4%i=nPs+B=9vQ(jn46+}0&8`0`g8HCV-Sq=53_o9uEv7;^H-1W@HY_Q89IAU`N zHhZd=bUy>iB5}~R-Mfk8%4MiS&MyR$MU?hwte$4qtfeU(mD*!565DNVakjSo{ol3i zJ;eJA3G`$w;-$3vJ*D7Gbf6X{W}xO<YCRtNT9nsm&Ow$<W76d;tpqjNyG;i$5=(VU zT}f=OClq+ay=oAZwqj#}(gbSbt%Mpp4KHl&*eTI%v|Y5<c=a%nnAr&`vz{S%3x->0 zLnlVIRuwXp3g^&m9GN>KU6W=7Ot)8x#vXAiud{&Jm}1Hjthh>gwnH&IF%Tiz6#hTR zKG(qXd>?qhp7)CzONz4j<{x;E5a-LG&9d7Jv<bPeHY_(;Ll&`NHR_2DJ4OqIVhBp? zx2)GqE2|Jvv{sY}Y+k6$wbe&xhn?-_5q4}^%^lmLjPDGGXCmKu`FEbLH2fv`>E4n& z+F;?&LtJ-w|Bq^4FG^iLeLE|}NZoaLI@iqqG`=3rUoVGyVJ_<R0q_v~J)I`}u^koX zP8~=`WV#>~)d;4a3D(d1B}}%yxcZifY?Bo9f0sxOgdTf)d3K|_Z)}&Mety|>`d(f= z%AY6mQOySrEz;qBK1jQuiE^>S&}h3ik68X7but$<@On=PHPC*rJ)F3EW98{wsVYJ{ zY%wd%RS}&TY%S8z^6#4P^X}GL?swfEtLpsUZ-L(5F2eA#)_x6^E}JSaX$vj9+UEd5 ze>?bd=g*$`HYSx#K{6dnDMG!5tVGUhk%ih8EJKNP(-tw=S<?`dsvA(2*l}qTgVZ)6 zLRi7<3dO{K7q7j1nV9{E{s%qKVJx6G^65MnFwWLzVTg!`ahQE&4GhfALSlXz%aKNw z)VTagcb1W&&qX@d&ZjJ^NO1xT6?+V>w6hmrurb)JW!msTVHK8i16#&w!zr%T?zJ|# zZT+9O=iMStejOwF(1(aWO79;(eH1FQLzWxoySYP-kwb6B?@8<Uw>;*}dY=w_)xCXX z%PA1E_P@dWZ%?Do!|18F+hhy{5@0kSmp?b?4=@zpr|IwT>OGt_LL;jCd3j*5tjxiE z&3sJO+&fQ|ybvUCq)T7yy)o@P?c3D<^F967S*DhNkNAh-u>sqFKG+-w(*2*CNbc(9 zM_R?N#ZBWH?i%H@DGlUxwuVgAB#EfDWn(QT(D;~ByyX;JTMJr+(t<j&w$?C-bWW+E z*0sf<O6rDD9hN1FC2U%W?Tk@}qbt`*HD|9^THN}wMkKXn*42j|wq9*rk#^^C&yxkL zh2dFXsM&@}QS~&7w%J9-&x+?8v{e)vXQx70E3X9@2LQ*a+f8={*N8QrwzE01Exzfu zZ9W#v%ErQ)g<gBO!jn(6<j!b)V>PN(#Vxtm$b#D(yl0(tvTfNipAS57SN3~-UEgEu zdwYCC@3n539;aJblGge--!z{2PKkCgnoPS5GpCcEWKk|B)+WJXh{du`m?L?_`D#`r zdX5%(#)-l++fyq$7|z9-G#?nQp|TWibg?ocX4_VlTWyqrVzx@LW^6|VdX6!sg%aCC zo4}PZNrjn<&hKv-eFDknl4ix*it%>tHP{}(VJ^pX$g{a_ea_bL2`X46^K6r<z>F<B zjoTZHle*4zw#gH)zULPRgw`}(WwP??ODxdYC}c9&>^E&4&#OG~t&FU-m)*7iF#&6} z*|56o<4pE*6dI#4;4y4eSVrHPdRd0h<`zTHdrF612tt)EC?vNJCG1PiE||nvExnDX zseOr1MTDP(=drXSqcuS~lvB+hirNH<t9xgaak9;Ao2#v*grv&xILH++J(r<VN2LPh zCakA*jAnF6Wt&j$(&uxaQc@H%a?Qbq+g^vdojIPz8ykAK%ELyrFQ}7>26tLY)=5X# zxEsU}I@m^IsOw^FYGQ4!#f|G)EX~z0#M!ea%VF&+4hDKQZj3C7`Jx_XbIPr}=aEwx zAjbXwjQKIXbtqPYdawwu2g^(tHos@JkDqOs1;*lIW@&AftTyN_CDg`^f~d+&;$$hF z3Yc9rUa7TfFxfv&_k>`1;?E}|s?Dg%vnky=NLtWsIkBo*)c0$#t(R8pquMH*rOe#B z38zhlFH*i#AjZBrHMrh=-DIh7ml)YcmCm-*GB$Iejy1Z*&e+80)cItq-$u{5dPI_I z<n}nyOJT6<Y$7<%t9xTM9XQEI{B_~Ugvu9Kv5pmz(^z&ixSL|ib52&!jfqrCfH}qO zZ7|T<OC>i)ZP3(&R78O`<t<_&a#*t*>Kkc}TN)A|Oii`c^)x!&6)roXcr($?y3Lch zD8{)=;}%>7v`oj>z2YH-L&%J7X0VxjC5Df!tVZjb0M%<I+)>eC&r!<RFdhSYjKr-M zq8empq={wbx;<lQ=arb{o}z^H$hmUMwOUokP&>3`>EvwPAhDfG(O67b>q*Siu&EIw zlI)YSyLWV>vM?<yfov5<h;>vNTwJ=^wsNDhr%ah`ZHj71Q7D!+owu>2me+N|6jL1= zLlj#fI|H^;I$+9MYFekecWKIHQ82Fdv}9KPe0(`Mu!N*bZ(jNk3_lwS>jgU&-GG^l zSfy6R?NZ9o%|i0b-7Za^tr!qJJ?VECqZx6FHNgsUM8mgy-R~oq%L0?l!vgB5M3AmV zHMDD5#vC}JyGHCW@Me@3O1wdqx7p5=PT?v=y)Kg_4l{&FPFfP8uHm32<C2o5p$?;5 z7}o0}Vr+YLsoT{osZ%AdD;VH;$t)9=gf-g{k$J6*>EjnqY{l5^mh_U_ZLNidg3n&t z9&UjEI-20qIe}$?uNc<>0zAb$Y31=NJcW>&G-+|2D$~~AdMvW9R$4Q%c+oCw3r0$; z$NEy`EwEaw?48r+5Wyjnu>}Y}i?bJnV~7`k9}fhS5lH^?*1Y+@lImzu4sM}IwYejo zGFI5b;lR#H7Vg<)^~<jAxFTYtK}6>nwkef+n1<77szx6U4_W1*=<32b>zTBxPK<7* zdGwk(7jWqt*4EhOTC%Pn*<#`Arig{NM2)>ERf@JTgjnNKQI56jh9(JORyA;2ro^~x zQ&pwD^XU%oc|es9HmC!e0@0@<7O#z;7sKi7!%KG4We7t}Vr#7sNk-@#W1E`dR>16b zg_`DKTbm7Olznw37mr6RZNkwm*1<Ut<fl1K*>fEBK1p);EqV}MTAzfJx)2<esp`>~ ziCnvhX9yUCk>nZKCIL+WrT2a?nmZ|0V`CC)3L?VT!?uk*J)I1udfk~OT?j6m?~#uB z{%qH)WzH90PnTgvVMD}>%NegB!1ak)@_ugVHZTf#t7BM|vR}UMr6tcE(WsU(_O#Is z$8NFK(oAMI6J;IP9g>lR#k95LmSak|ZR0iS!;ZGI&8|T>v5x|C4hG85v^%G&D23XW zwX_hfS2J=X*h)lmX)bQJvh}v(u^7fCCyD3)1eQ6aliNW|>UJZ8M{2rSMlVumMdT-1 z3P!cM=47-b>g_DMLkS6X&0!6WdN-MkZ)v#0KpA7$?>uvRYi+M<J~EpWV(J=^0NRg? zdCm2t(VDjP8v$Z;!4t$!6iu}5!l8>Rj-uUjRJ7LF9O9n3^|MM&Y)RLlF{{^EV*%E7 zB^bcYpyw*Ac{KZPCkYP5c6!L-T)jR-EMqjct8R?JdcuchBc{$I6I8q~mD=g0mbX`k zy#?WU;5gcZ#<G=c&K6USB0WT{;>WXLT8$+t4cgWWfJ(TN-rLq+VQDSoK%;UN6u@)C z(1<dn%+u`1le|Pu*Tz|>;w&iJHgao-OElTHv0ghxzjscbo9_30LVWnk1FUb|rMZ60 z904v#NZ%CR8+?`s?kT0HT`E;vRJDNGKp|!-qZy1+!(OjzTdmp*&YKuzbpkrH(A2rs zX2fB=eKcHcv^_7a?g4buV3(`kf%g30iQIHll0XESxqNkOLQAfdMAD%y(OEHVu>~Z3 zvL+r3g15eSq{2gG-6yOR!6Jex>DxyHLrRsspUbcuQj{T`h9V5Q244W6c|uW?9dk-k zVce<<LCGmd5j(`#vSJoBmXC8Y?6ocJ*Bbj+)AZ+ht_NQX0dO{C2GoVhQBVpZN9To_ zqIi2K6x-ph=JNYY*Jpfq`c0RWCQRc*j`R_%UMPWW%~VA1LoHh|)woxalN1#A(oA-p z&X2Ec3!QvY3$kA_whry@+b(y|#fumDv|q1pVUBH~NM<&js@<xW+qYg-qT>QaGDm-< zQ_~S5Bo5vjxv}2!X3dy{ghwRG+4Al6F5BG@81C@eM~cc{5=oLwHkISMklF2}<+Sv> zT=%{GRLjy=*EaN})#J9^&*w(+-Hs>@lq1s~J+&j@`Z`v<{bPQ!o$tGx$^8FUCD&BC z`$Ki3!J2CwoxT7z;T~r0LNWGUz2479EBOw_N{45UxogRnGt~#SIZX3{j@a~iz(R9m zuLCbxcdY)lN_a|hez71@JKwR{o*Y_qC4|$;CekUK@oKE?v3}d#*iqCi>l)UeYfe6w zfKd6~BiHFZ-vi8w!%Y_u`(tJz?dy5RD(s4{mU4*s6h+poFUBp>$w4LQ^x<d2NNX<^ zCl0Pe%xo+x(=RCNEvhxnvQ|9_qe7WQ$;_`(%~f+;Uc0NjNl5c#Qetsb7E+s}x<aKL zu|}O@R%2UaH4PzWw6Y$BT<g{qqQ|Q$(yNivI<<yM+AYo!kr81LP?xH7D+b1PwY3Q6 zw{yIcE`>!|YYRPlW++YV58vNEF(7xwhtcor-uQy!H%75+^0qcA=|xU!Mnhiv&k0M& z&C^y)wb;bGQpok4iMm9&5^aq9P2|i01UkQ#gmk>lb@6!_==hkkFxKs^+RU7+wdifu zO>b^A^QgMsYJ$-7h!6ugg|UN30zx{*8g6RMswA~dMr}e;VU&|DUALx{tnMRmRud{+ zysSZBmWHjBQr_EH6HBZsHkTOMlTEZ84a;3?S{H&AvcQ4ex!jPl9i=gqXi__Dnk{Nl zaygc&GBuaecA;XgFnZapma8^r_0N0c`SYo}(54HcLdRWG8jjVW?3a5w`DQtCg!E)| zbP=1*vQHfr*y`*<Z1aH{C`&fUqA(&h65L++IeT+zRWtC;ZiC=O^~LEV%?BIEW)np0 zSwVCQIAU5<&2v<4GrYwjKIEoA%hl})#~A^dx=o_!7MP{YtkQ%4oPoeXcin#O-^uj( z=p-Z+9$24qX!JhS_-bG%R%q25T;dp6Ffe&yqpxXAHM$xkymKDQwyB&;N?NHH#jJ$9 z<uX8q*GoSiO}!hIn+=#tnHG#Bi-o5|R%{8RSK&F~MBznRH=N|WtiOABG%z;JY|FTX zjlFnYNl7lMTGXkP){I!GE>eUw&b402yId)qJY#ttBcYQ=lO?ydCWyyeYYm5ObpmB9 z)$6u1RoZH^ZeoQvvXGG_31Y19fh-svDa-AmM1%&kRZatF4ulR`^@+8#b1y?yhcvF! z7P<fd6ge~k)Fv}xv?7cgLehI%z(6&KUS;jN!wfM4<*1QSLCs@rT^_Z?Vx?3n7qw#9 zcqy&gl)EKIRLZI-qN~=#owpG!+YVM`x;S?N+BBm=oU)DYT2BYO<_uX8y7){1q3R=E zLcGn=++0bnxUjbd&BVCA_4j4)uC^NGjZ(r|!)D`Ic5^InW~G-svzfi!iBpM<=+-r` zHsQKeR7I57HtQzsE_zK}TJFOZ_Quy3pqGNm%abyaWo*NC?Txy<vI&jjNGVHew=Xj7 zV!@Ug-U_AKX*5}e(Ap(1@ow&NrbCK2=zVT1NaXHLF=ZUUcv)Q~g3t?gkh5&VO!YIw zcqOB28DOi`WIU%3y7@ba61+~ewwB4xRY1v?Z9uZ<H)?1*31aD#jWw{eB#66POd$DV zZMxAM`eq(N9+@$X<uX@KZl89_GHh3=ke<7?Lb8>T`zBOej55kwVXLOytZAbu6br1O z8_zAqyF7<>XOqfKmD>}r$I#3H)PXv0_;ij%vXTz^-yBEAu-_xMg_Ab2<`lY`7>Z%6 z+GJLHGn|sVi#p6@&em<QwwxPh640xgR;jEvJ9<XB){@Z}RgF$>Osa#~>ocvPn}<71 z(sObqjJ9wieIv<vx=Uh}w@bsp2<))qh@jjlb7^L+W3C5YQP^JVpINZzF@<P59oWe@ zn|79pv?$zbEU;Ic<E|#M9W#3i(Qx_d7Hp2$8`&KSxU!>KjF9(FQX_TKx2<GeG~1l= zaPKRe7aGdsl$f@+t!0*H!JH$*Mx$0&40-|)eO^Xl%XQ*dx)oAfBx^fZ-XmP?v0AMp zQMi<_TWJYuj61TjwxU7V(*ayHx4j8HM1(vZ&YM}Ty-kWlbs%en!7h#9SUGlXwd-Y= zLFN<pjb=_uUuqoo7G4>2a?!jlyJ9UR%)COUhFH7Kwp=@#thwk>nb52Yh9=KrrR}c~ zw1;9X){_ese8_p2p1*HS={nuqDtvHjlqtB7SfXl@M!Hy!gPc8;<?YHws)RCah3TnH zk?79XT3Il<>EYV8O4K)F4Ct+`%?>DxmL$nLP_g=DngB%*%?c=$rEN<ua&JkAB+ts> z<~UN(V-uyrD)3d}#+Ff2vrd;QThuDCqQ{8*!Wvs^6oJbM-U})0)jdb0!cR;>T&uJx zsn|lBO>o>A+*yVYBem^DsU@vBbg(RsSQy7yl8q*I+RI*awry#PtwP4;EVdEjjc7wi zlIn2`nVgeJq%<)6$-hO&CCw+}rHIBE;vt=$G(SDNbLTc9QJsj%E8CzmlyJz?MV-rn z3hMb2S8VyDNooua#%s_gJV?kat1;upr=Y<~Th)yAbZcdn;NIF5)-kbEI+v^wGm0jr zS*NeF!D546t0)cNinL}D8r6(U1`}N}?Axs+yIEIT8G0&<o7AdJSh=kRZ(A?DEi)Af ztD>XC#<OpEAw$e>gBBPyER3<RqzXzqR*l9`hhC^n5~9k(+_3R&JZMPXc;e#M#<SEL zN~lnjlp|tT`C_JY2D;qjDK?pvHZj!PheS<cor+0oa*)-rwx%e#+nKgdnX9D?=!Tea zt(wv}vWF6ofpKMtFINu}XRjDUNs9?cNKeV7L^AtY671<_?#a;!nG{M*LYoCHa+1eZ z8xYy+16#_mm5m2kbGuh^y_aKH(LNN=Uf%=fA2a3;rJW{WhH6XAIDc6>Q=e;2Qz5ib z<f$JLlO_<~COrlM=f>5ezrxk{u00#5y{d<l<Qz?CBvWO2L<Ox_sseNmlb38RE9<6x zHZFAbagi=9)eB+$4Yo1s##*RLAa-D-<PRM<JSV8|7oc(?e3UmdxM41~em{@t`3}zh z-CY*x_di)qwQ2~S?~Xz``+S~k`aX+34VXTSgh!%9&}DT#{XM>(x^>U?V(2O4su9E7 zo@KiuBs~xe=oR1O@;L24$$6VCLxP$0)aK;%Y0HV#ip}$~r>|n@#(KE*n+OXcp_B@8 zeD?sYw=31Nl+fcL9XxA(#=lRUmJ;QfNGat>htX-f_;OZDt;a$=e`c9x<i3BW^#9kd z+0c36i)!i)J?}iH={#$OrK!|07w-AIfO>#$ygSKlbB3nTgtT!X#%397#yGt7YEO0G z=gm5s>$<qfd|c3K9v(VV1lk=2%3oXoJ(y;{c_=Ct)59B2V})6&HkEXtu6y>E(BC~0 z+TPeM*&amVQfYL{w<~*TdU}1cKN4fG`QBSFbZRBp@l~qF5hABjPV7(Bq`P^CQV#SM zwW^h?<)=lgod{;j7KgAk6Ch^rNyn-X&cGj90)iei?^48+#qMa?(pwmHO;~j9v=o@m zf`v!~E}S-H7gbwTQq*cz>JTjp{B~SJLiM022laO78Wc!vd8nO*<GXM?33<D?j^pI> zjwtkJiZy|9v_`~f3ereKASLwQh&56}Qc=seot3XCQIj$RNFHQD%P{W44FojK>@h#d zTxqPBt*t)Skf>FnKWKM0IuUn~F~bT*GA)3tAs`K68>Pj?P|<9NT1#3jR>f*{SS>k_ z+K}Aycd_ig^{waO?)Q2;P!>D<od*QSM^~FDF**W2IPS^s@C4~zWVPe7{<D7Hl{)oL zbBAt{J1O^lN3i#I^VaA_y@fo>sAQ9_`gE~PivTfkLEP664Ak#X%_KbZtf*ZINLZ?~ z>wwt^sZ)=PICP+By&~{A^57i3hc~C8y%v66^t^q{xk-<F_Q~JV;kV;s<LcAq@zf5F z4Y*^o=1Z}X@H7u!b)tTfNa<Z+9KEv;EdeRV+ub1thMpeY&Yy;UJq^bj)h2pli}p*5 z{ybA|HTG*ZJjb98Dzxx$?>t@JB2+xVjWA39_m>Y%#tU2e1+C)fJyJvXq$^QYg-!J2 zP3HSVS`mfGMI-`BK%@=m)(Hu@Te$m{UB5OwJ9K5oqqbQ9&8F`>)7PfNn$0kkr5MyC zT2iLw6?hRmN09`Mn)Enc$HO#fjZhUNYt4qb9irpCP&df!w#!n~Kyt6b0>P{$G$SlB z26kFTG6-UUj3s%9CX+6Vpf4!XIv)J$S)???GXTjujDeZ@9nelzX(<C}{?{PY{e_yU zX`gyGOz~NWsERNZK4i3+4ZgA3_Rl3?#(R98dgDw|<>dK%`kT{=)y(a0xp@J0@aKc8 zRE3*~4aW8jiZisjl9&e;Kv_w!GrPq6k?B%PR3nm>yo8JgQMwKFVF$g^n&3|ZzexLy z2kP}Ig?9Z>zcAB{5Dy?ZOYTAY_ujVdTn#6&l$HbA@VMYV8tGrh+~fFko<F|fJ$m(* zpCwPy_}F4oh!#8)r8vKv@;H)zm(qOigVcN~!DZc{)-@`u=yOc3O2&Py-)~9k$>KBK zsqGjS%#zWGVbK_o%uQJ;v4_`&GXkiS@$R$6nCHc;!&8BuGf0z0##Z6Qrf7$ljH=-U znxRcv&aH0$9=a#52ZWhulBvIPeU5$pPrK$mmOf9la410-^5F!ev9Y}UY!QdcKDFWD zCdB1-%6sr{tK|89Prvt@rhZ%@6F});MoO_ZamLI<5YZ8{#GS@7(U$WF{!4W&=)^C6 z5)yus-cfwZPt<4Uy`}pz?)p!6kJc<BH2I{*j{>Z3t*1M)ZksrtG`=67E<pM0WXs;y z-hDbfpiM@(msr45J2W4kF=x+;pASc2uuU&OxC{wOrJFXgS#{X5>us-61mJ-sZf>d= znZJoXjqV5JY+svFY+`4Xg}Mo;^>yvuWpUda+OW09X?CfWGZ1Yskegqx-LM(cd!=7e z7SaNII~W&R_)e0crf6RbBHP~XpRv=X>_TbsmE}Htzi;z<@cR2twh>}5qOC%~6<LQj zQ1nf<c;{yhrc@U_jhGhMEV<B^Bv|m2+98HiW;rc+p4KllJy~TVDg<cvX;9un=cgGY z_02}c^>LBp0f7P@ypZvLS?i5dm#cvhFC)(LWf3?;=sRvs&Q4s$wB*7^W?6HMp4QmQ zY?zoi1vv9C5W_Gen)jxG215)yFgaAH!03!6toGFHX^3e_D>t>`urzI$a?zHKJCcbW zyJ1>99J_A5mG*^$2oQl53=b@}6OoS$$W5cYd_eDU<I_1|soKk=$oTi36-I9lrqy8K z=E_(KaXoK2>$%q2+u8QV;^=%LENSz>;+2e^9WoJEYi1jzn+U2<qWFxgeP^sUk3+jN z(%)NQpcdI_t(6QLn+V(R>_`*Nuo3_@@!A}s#ABGnN=6}3g&SorEmzGTQ^tK@GsI8; z50xBNM6sF6uM+t?1Y@ByvQn(mI}*=(9`*L>5bFv!7pyOF2Fo10_Hky3qmHKQI}D6l zNK(foZjT1dQC$W($=)u*oukVdzCJkl_Xl;s@}M6s#_QLQ&p#bcvd_JPYYIZr4W+!V zN;mM>@~HEim9k?ym$G9aNe?C|vC!KU5<<Eu&q;-TW_56M08XB*p0rS63~nVhKvuDB zPVzOO^f-2meWe^mdCoeU@~-rJU8rTH(b_P?bk<5rsuC(G!PRdWT{fW|QttRJJ>;uB zyIT#AND}6Rb0s>q_JB#6%-seo6H6FmluuaW)6%zKLV5SSY_iwXZu++q#Vj}rVJsk% z1ofV@7GB$Wmz84Ao>_^~#@Sq;ZBj5VP_>w8OhlFV?G0-JDxk#nvdf*jO`(K9>~ciI z?Q!Mt$(K5zSqpCyJeb>D$o9TBwX*ta9;8-MVp0=0&XtBiU_jy;GJ8gx5%hd)<yPq6 zFhPvh+gzk=rL473Sz<+yme62*H1WNL@+URxLO&&T>h5-$%No|PcAHuh4y<l{=3~|* zTbb`)Zj|UvZS)~qSoHx3np*adX0$qJ1j13a?EyP$rHqufcEYd@eEWJ|dbEd_S{+_{ zTUatLxqV~U4QASHtVxb`x0TQsf;r7e6A&?~h1RENVztr|vkVwbuthcmpJxHT0zmz0 zuI*SC+%pJDIE%RAu$X}YCcIkG#?Kca1b3Oo9I06|cU!Ls%+7ReCWh6yVi4*Ol!zXz z+(II21KKv*VqLCilXcqbwKD~jw+~=bWXVJ~bwN;_h1*7V4x@OSJzQs+4K(h!E&Fo^ zm5u^Oxd9_a&XJx{QCez(mm#GRBf!N>G`XA%F3^TVTNiY2X~Qxl3-Gi*zPoR*RqLpi z6oPX~jFu{d2RXRT&E`aDKTqtk>F%*-Hj96PIve%P00;@>^AC$c@<uIe<og6(d)Ym& zve<j_ya(8>6<pnJt0*qkZsmrzTzyL%-m~8ID*Zn^a7`)VtcHxR-53xuUz=#+#HF4i zK6;i1aH2$>x7xJW2vRx*#L~2-)+W?Q*+RWoJx+AgnJuNzpw!iJ?UuAF#wM2+O&uOK zGQ(b0vYz0{<35%&JqWd{T9}5Z9#_nP4YF05L3+TkQ(GN`hMJdKZQ=~drsgEN;ev67 zd^sRvBf>%MN<cU(771)6m6y`{ddJ6?w8TiqSm80PuaBQkwe!D&_rDNu9>>HZ>Z#`V zoe5m(V;KgYonmi8DbU1P9lYB{Geve=sK#j)GV6;lYZh9tu?ieOmi4D34lJ&_vlB9s zRwmrY6{HszelF?Hq~I>URNU}<eVwg8Og<WCu<=e0U$mQgTaX}oO!W0{S8kw4yYYEH z0SWV8s(N-}FHrk`LIcle__y`nqJ>W}o}JW#mt1}PH|#iul*yj7wn_Xg!+8R^@4WPh z{r;pPo*igMX+<O>uj(oAXGPt7p!zU(ArwkZ^hu<h+|Q!Z*3_u&`~2{fi7n2Du*Mxv z)`26y@7QNxNxZO)l9FmgY~M>0*m=?&pmOK4v!7Ls+HKT+$!1Gn_BrAc)ZU)G#EEbT drWm7}*7LjmsPpiYSJ2<~F64@Ep&<O3y&h`lnfm|$ literal 0 HcmV?d00001 diff --git a/tests/data/references.yml b/tests/data/references.yml index 6ab753a8..4a18ca69 100644 --- a/tests/data/references.yml +++ b/tests/data/references.yml @@ -44,6 +44,31 @@ COL1A1: - XP_005257115 - - NM_000088 - NP_000079 +PIK3R2: + accession: UD_144959560058 + checksum: f696ee19bba83e899ed8c0f2c2f2ebc4 + filename: UD_144959560058.gb.bz2 + links: + - - XM_005259824 + - XP_005259881 + - - XM_005259825 + - XP_005259882 + - - NM_015016 + - NP_055831 + - - XM_005259822 + - XP_005259879 + - - XM_005259828 + - null + - - XM_005259823 + - XP_005259880 + - - XM_005259827 + - XP_005259884 + - - XM_005259826 + - XP_005259883 + - - NR_073517 + - null + - - NM_005027 + - NP_005018 DMD: accession: UD_139262478721 checksum: d41d8cd98f00b204e9800998ecf8427e diff --git a/tests/test_parsers_genbank.py b/tests/test_parsers_genbank.py index f997e89c..4c27c9a2 100644 --- a/tests/test_parsers_genbank.py +++ b/tests/test_parsers_genbank.py @@ -37,17 +37,47 @@ def test_product_lists_mismatch(parser, products, expected): assert parser._find_mismatch(products) == expected +@with_references('AB026906.1') +def test_include_cds_without_mrna(settings, references, parser): + """ + Annotated CDS without mRNA feature should be included since Mutalyzer can + construct the RNA from the CDS. + """ + # Contains one gene with only a CDS annotated, no mRNA. + accession = references[0].accession + filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession) + record = parser.create_record(filename) + assert record.geneList[0].transcriptList[0].name == '001' + + @with_references('A1BG') -def test_only_complete_genes_included(settings, references, parser): +def test_only_complete_mrna_included(settings, references, parser): """ - Incomplete genes from the reference file should be ignored. + Incomplete transcripts from the reference file should be ignored. """ - # contains A1BG (complete) and A1BG-AS1, ZNF497, LOC100419840 - # (incomplete). + # Contains A1BG (two complete transcripts) and A1BG-AS1, ZNF497, + # LOC100419840 (no complete transcripts). accession = references[0].accession filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession) record = parser.create_record(filename) assert [g.name for g in record.geneList] == ['A1BG'] + assert len(record.geneList[0].transcriptList) == 2 + + +@with_references('PIK3R2') +def test_complete_and_incomplete_mrna(settings, references, parser): + """ + Incomplete transcripts from the reference file should be ignored, but the + gene should be included if it contains another complete transcript. + """ + # Contains MAST3 without complete transcripts and PIK3R2 with one complete + # and one incomplete transcript. + accession = references[0].accession + filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession) + record = parser.create_record(filename) + assert [g.name for g in record.geneList] == ['PIK3R2'] + assert len(record.geneList[0].transcriptList) == 1 + @with_references('ADAC') def test_no_version(settings, references, parser): -- GitLab