From 8fac2dc7918918e2afab066e2c088f726daa57f6 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Fri, 18 Dec 2015 15:48:40 +0100
Subject: [PATCH] Keep incomplete genes with complete features

With this change the genbank parser no longer discards incomplete genes
directly but keeps them as long as they have complete features
annotated.

For example, the PIK3R2 gene is annotated on NC_000019.9 (or a slice) as
4973..>22328 with two RNA entries. One of these, however, is complete so
it would be a shame to discard the entire gene.
---
 mutalyzer/parsers/genbank.py      | 162 ++++++++++++++++--------------
 tests/data/UD_144959560058.gb.bz2 | Bin 0 -> 17122 bytes
 tests/data/references.yml         |  25 +++++
 tests/test_parsers_genbank.py     |  38 ++++++-
 4 files changed, 146 insertions(+), 79 deletions(-)
 create mode 100644 tests/data/UD_144959560058.gb.bz2

diff --git a/mutalyzer/parsers/genbank.py b/mutalyzer/parsers/genbank.py
index 266def8f..65df3512 100644
--- a/mutalyzer/parsers/genbank.py
+++ b/mutalyzer/parsers/genbank.py
@@ -57,12 +57,14 @@ class GBparser():
     """
     @todo: documentation
     """
-    def __location2pos(self, location):
+    def __location2pos(self, location, require_exact=True):
         """
         Convert a location object to a tuple of integers.
 
         @arg location: A location object (see the BioPython documentation)
         @type location: location object
+        @arg require_exact: Require exact positions.
+        @type require_exact: bool
 
         @return: A tuple of integers
         @rtype: list
@@ -70,10 +72,10 @@ class GBparser():
 
         ret = []
 
-        if not unicode(location.start).isdigit() or \
-           not unicode(location.end).isdigit() :
-            return None
-        #if
+        if require_exact:
+            if not unicode(location.start).isdigit() or \
+               not unicode(location.end).isdigit() :
+                return None
 
         ret.append(location.start.position + 1)
         ret.append(location.end.position)
@@ -81,12 +83,14 @@ class GBparser():
         return ret
     #__location2pos
 
-    def __locationList2posList(self, locationList):
+    def __location2posList(self, location, require_exact=True):
         """
-        Convert a list of locations to a list of integers.
+        Convert a location object to a list of integers.
 
-        @arg locationList: A list of locations (see the BioPython documentation)
-        @type locationList: list (location objects)
+        @arg location: A location object (see the BioPython documentation)
+        @type location: location object
+        @arg require_exact: Require exact positions.
+        @type require_exact: bool
 
         @return: A list (of even length) of integers
         @rtype: list (integers)
@@ -94,25 +98,29 @@ class GBparser():
 
         ret = []
 
-        if not unicode(locationList.location.start).isdigit() or \
-           not unicode(locationList.location.end).isdigit() :
-            return None
+        if require_exact:
+            if not unicode(location.start).isdigit() or \
+               not unicode(location.end).isdigit() :
+                return None
         #if
 
-        for i in locationList.sub_features :
-            if i.ref : # This is a workaround for a bug in BioPython.
-                ret = None
-                break
-            #if
-            temp = self.__location2pos(i.location)
-            if temp :
-                ret.append(temp[0])
-                ret.append(temp[1])
+        for part in location.parts[::location.strand]:
+            pos = self.__location2pos(part, require_exact=require_exact)
+            if not pos:
+                return None
+
+            ret.append(pos[0])
+            ret.append(pos[1])
             #if
         #for
 
+        if not ret:
+            # No subfeatures found, in that case just use the feature itself
+            # as if it were its only subfeature.
+            ret = self.__location2pos(location, require_exact=require_exact)
+
         return ret
-    #__locationList2posList
+    #__location2posList
 
     def _find_mismatch(self, sentences):
         """
@@ -227,11 +235,20 @@ class GBparser():
                         accession, int(version), match_version=False)[0]
                 except ncbi.NoLinkError:
                     pass
-            i.positionList = self.__locationList2posList(i)
+            i.original_location = i.location
+            if i.ref:
+                # This is a workaround for a bug in BioPython.
+                # But seriously I have no idea for which bug and couldn't find
+                # any hints in the commit history. So I just copied it over
+                # with the last changes to this code, but it can probably be
+                # removed.
+                i.positionList = None
+            else:
+                i.positionList = self.__location2posList(i.location)
             i.location = self.__location2pos(i.location) #FIXME
             #if not i.positionList : # FIXME ???
             #    i.positionList = i.location
-            if i.positionList or i.location :
+            if i.positionList :
                 i.usable = True
             else :
                 i.usable = False
@@ -305,6 +322,13 @@ class GBparser():
         mrnaList = mrna.positionList
         if not mrnaList :
             mrnaList = mrna.location
+        if not mrnaList :
+            # If the mRNA doesn't have exact positions (e.g., it's annotated
+            # at `join(<1..11,214..548,851..4143)`), we still want to use the
+            # part that is in this reference for matching.
+            mrnaList = self.__location2posList(mrna.original_location,
+                                               require_exact=False)
+
         cdsList = cds.positionList
         if not cdsList :
             cdsList = cds.location
@@ -493,12 +517,6 @@ class GBparser():
                 #if
 
                 if i.qualifiers.has_key("gene") :
-                    if not unicode(i.location.start).isdigit() or \
-                       not unicode(i.location.end).isdigit():
-                        # Feature is not completely in reference. Either start
-                        # or end is not a Bio.SeqFeature.ExactPosition.
-                        continue
-
                     geneName = i.qualifiers["gene"][0]
                     if i.type == "gene" :
                         if not geneDict.has_key(geneName) :
@@ -509,14 +527,6 @@ class GBparser():
                             myGene.location = self.__location2pos(i.location)
                             geneDict[geneName] = tempGene(geneName)
                         #if
-                    else:
-                        if geneName not in geneDict:
-                            # We should have seen a gene entry for this gene
-                            # by now. Could be that it was skipped because it
-                            # was not completely in reference (see check
-                            # above). In that case we just ignore any of its
-                            # features.
-                            continue
                     #if
 
                     if i.type in ["mRNA", "misc_RNA", "ncRNA", "rRNA", "tRNA",
@@ -537,8 +547,15 @@ class GBparser():
                 myGene = geneDict[j]
                 self.link(myGene.rnaList, myGene.cdsList)
                 for i in myGene.rnaList :
+                    myRealGene = record.findGene(i.gene)
+                    version = myRealGene.newLocusTag()
+                    # TODO: Here we discard transcripts that are not complete
+                    # in this reference, but it might be nicer to still keep
+                    # them so that we can (for example) show them in the
+                    # legend. Of course they should still not be allowed to be
+                    # selected in the variant description.
+                    # (Same for leftover CDS features below.)
                     if i.usable :
-                        myRealGene = record.findGene(i.gene)
                         if i.locus_tag :
                             # Note: We use the last three characters of the
                             # locus_tag as a unique transcript version id.
@@ -550,14 +567,13 @@ class GBparser():
                             # underscore. Or prepended with a letter. We
                             # really want a number, so 'fix' this by only
                             # looking for a numeric part.
+                            # (Same for leftover CDS features below.)
                             try:
                                 version = LOCUS_TAG_VERSION.findall(
                                     i.locus_tag)[0].zfill(3)
                             except IndexError:
-                                version = '000'
-                            myTranscript = Locus(version)
-                        else :
-                            myTranscript = Locus(myRealGene.newLocusTag())
+                                pass
+                        myTranscript = Locus(version)
                         myTranscript.mRNA = PList()
                         myTranscript.mRNA.positionList = i.positionList
                         myTranscript.mRNA.location = i.location
@@ -580,38 +596,33 @@ class GBparser():
                         myRealGene.transcriptList.append(myTranscript)
                     #if
                 #for
+
+                # We now look for leftover CDS entries that were not linked to
+                # any transcript. We add them and the RNA will be constructed
+                # for them later.
+                # This does mean that these transcripts always come last (and
+                # are shown last in for example the legend).
                 for i in myGene.cdsList :
-                    if not i.linked and \
-                       (i.usable or not geneDict[myGene.name].rnaList) :
+                    if not i.linked:
                         myRealGene = record.findGene(i.gene)
-                        if i.locus_tag :
-                            # Note: We use the last three characters of the
-                            # locus_tag as a unique transcript version id.
-                            # This is also used to for the protein-transcript
-                            # link table.
-                            # Normally, locus_tag ends with three digits, but
-                            # for some (e.g. mobA on NC_011228, a plasmid) it
-                            # ends with two digits prepended with an
-                            # underscore. Or prepended with a letter. We
-                            # really want a number, so 'fix' this by only
-                            # looking for a numeric part.
-                            try:
-                                version = LOCUS_TAG_VERSION.findall(
-                                    i.locus_tag)[0].zfill(3)
-                            except IndexError:
-                                version = '000'
+                        version = myRealGene.newLocusTag()
+                        if i.usable:
+                            if i.locus_tag :
+                                try:
+                                    version = LOCUS_TAG_VERSION.findall(
+                                        i.locus_tag)[0].zfill(3)
+                                except IndexError:
+                                    pass
                             myTranscript = Locus(version)
-                        else :
-                            myTranscript = Locus(myRealGene.newLocusTag())
-                        myTranscript.CDS = PList()
-                        myTranscript.CDS.positionList = i.positionList
-                        myTranscript.CDS.location = i.location
-                        myTranscript.proteinID = i.protein_id
-                        myTranscript.proteinProduct = i.product
-                        if i.qualifiers.has_key("transl_table") :
-                            myTranscript.txTable = \
-                                int(i.qualifiers["transl_table"][0])
-                        myRealGene.transcriptList.append(myTranscript)
+                            myTranscript.CDS = PList()
+                            myTranscript.CDS.positionList = i.positionList
+                            myTranscript.CDS.location = i.location
+                            myTranscript.proteinID = i.protein_id
+                            myTranscript.proteinProduct = i.product
+                            if i.qualifiers.has_key("transl_table") :
+                                myTranscript.txTable = \
+                                    int(i.qualifiers["transl_table"][0])
+                            myRealGene.transcriptList.append(myTranscript)
                         #if
                     #if
                 #for
@@ -655,9 +666,10 @@ class GBparser():
                 #if
             #if
         #else
-        for i in record.geneList :
-            if not i.transcriptList :
-                record.geneList.remove(i)
+
+        # Discard genes for which we haven't constructed any transcripts.
+        record.geneList = [gene for gene in record.geneList
+                           if gene.transcriptList]
 
         return record
     #create_record
diff --git a/tests/data/UD_144959560058.gb.bz2 b/tests/data/UD_144959560058.gb.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..72e9dd0f00e3dda68e2e7d6d21d32719938d67bf
GIT binary patch
literal 17122
zcmV(-K-|AVT4*^jL0KkKS$<63j{q!Se}I4RAOKqD|NlS#|N6iG|L|aE`_Sp;Wygf7
zs=zfs6j!0`m2rVi$KPN8D5FmS1E2r^004(=jWPhB0001>1b_u3wg3Y_1b_m8pay;K
zsXoBK4uL=#JB*iFP&KBF!Ojg1ncWEGkz){0l7nEvmMyV{hK`v~8O8v0wx9v;2}nKa
zl=Pw+TQyW0Y=K%5s#KlFS5$4FFtpLxfS?`6q<}8=@3SOYVWHltrLY0@cF+J-YFeW}
z0MNVZy#j^ndEf~l06_!<$jF&76VhO6Wj!*OGMP^lAF6mM>VBZp$fYEKBPq2s(U8+X
z02u~=00000l=4)R%1l$jMvRRIs2XSh0000000~p_N~x+(YJdO$00EFPWB>pF00@*#
z2%A9-04A9UgvO=-r=XrBWMT{u0a8Q&KtN3jX+24h)6{4^Kr{dV00I2J)~EC-`@6b1
z*D!=N{}wT0O}>M;Lz#*xdb8bd-vatd1ce|{P>hmA6<LI0UuL1AUJ$E^p}VA+<g}_2
zDlKx25b~lzvR$%ff$n5PqF!F<C{`3indo5HEoWKG*RJnpj_xBcdEj8NXQ9%Crj(Pd
z;y@+>13qoQdjW40eNJzOz}t)0Fi?;$33qYb_V)APXLF><mMshz(J0!6izQ@C>T$7;
zq8;2XCRC42M!spf9x1ZzmDbK{5;BEZWm)ZiQSI&^dLE#rDkLR}5oy;1xB^_W%d~7(
zz`e6BC9OKnQYhKXj<y*RR_)JAVz$n-I?K03^H+%~{A~sYuhQto2xuHqA6rq+>FLw|
z7O<AK6roZ>S4WR;LDSsk{+_S%o5PC!eoB534g4q$Xf}vr3c)|@0=Lxc$Hegp4zN7F
zNWVU~Sr@VadJtD6G=ZIdC)GRO5_{=2oV11KxcJ?C{x9439!nD^cRw1IrOJ^NE^A-;
zu~K6ie$U&TO(<wL2|JUue|c8^7lN)k-rL67$dYf7pw|1kwk%4TYgQ)z$5NQb44F1H
zk5iM4p{1dlgllV(t~f}(oLg%dT5W<v8Nm(y68^w@yHAw?+?TKh4{V6^4}#rty??1}
z-kN@1zlWEz>G5H2B`KHyKwBz{`*HVbs)rfZ-K9==rO{jNrSjju0msKY_qP{;L=<MI
zk0Xc#me};BGf#|dz6E7(pP!ZmA(+wcXD(t$z?R}K`YUs@Uc$YL(AnzXzWkx|4mTIK
zV`UPe_!Q|RoNvq3M50Wu3pTL_nX)qaM%|CCmBs<+#iEpppn**XvY=EToLq7cqE(x+
z*_IhLJ`JKnQ93IEPL&uTgqh)<13XH3Y*36GBD9Kj;y7JIaV?rPSswf^4JkDwRe;o^
z?b<29<gZbpQbz@|)r5!`6T(2raA+Apy4ov*5aRGjB$7!b7jE7gQ}y%vt==a7-p1Yg
z|2Mz8f52#VC)^ZaO21<Lfd299A1`}+C$r*N<8AFFIH9F#O48qkr@eDw8h~pTC7W+o
zz8l--NNHx;S4&Ou`jbAg#e&i+zFY4w8psm*mp0jL`;M9yUl{#g`+Zz_LHdtd^{tu5
z6@jFB18%2OyQo0WCVM(1;LG-08T9+TFSzdw_INPrvs*Sh<=PT^+T~t8To=WCqY$Mb
zN#xKt?B}sA9$-gT6r0AKAWf*qF^RX~`Dyt6Ps{I?VVTIZNC{}mK4xY+&jdrLkV!~q
z>_RPnBMy@Qsud|`do(#JMW&3~H?G!;6M%tD8Dy?{8&uU7nRO}&0zIksdGYT<$0hUa
zHEeR;xqa*4e7?lQ%^BBiH;+nh4(pV;iD5Rj)kf>LvxyB75q%D@C`#G)?@6$DB;9p%
z6R=q*jt4}!39ZFGS2Nz4BNPH%Q?0%~Nxn1P^0-J2bOs%a60=!I=W7@a*kE1EfM6mk
z*4?@=Hk!|ud(wxZ6t`rqCU#hm;}H)hhm&*L3=EKQ{pnJbPj-kMh#Btf>7FWF*4Irj
zb;Q&wTHNVy(j`MWZqd5f%#{<c-@KQ%sq1%2kw}#B;y?;QOwUEhQZF@_XOu)d1Ih@K
znn;%NT!`P_WTuyV(zWJ6(&_Q;--X?E#n2^=j!U7fq|Cx?#`a;8jZaXBlJs(Q9G?5f
zGR;Dubnw?Q*sZIq+PQ9{#$8>ktQYcZ_af?e3bV$>DGv)OOOeKTwo)9M1t;3K<MXbm
zK32le9jtjVDXCUk(pYl2(^(z|(s}fEf4A4~e}8-58*Jei_Ds7o<(zkFvoa{z)Jx?J
zi#hc-9PI^$ZPx(5bL@GKv0bpshnWgkHj@Uf#AdCK&>GxmOdQ9tuMx{`>Z5jU=Hn@C
zp-9VXeWRf`o?o^3?)mLw^Urx3J>YHgW4XskoW9&^Iuyl`Un^E22Zx7ta{J+oa7pr$
z9w{)0!d>xXBc>v~8HBLR>R>zqFZO%xq)AU1S{{#AS|{vG*XX$p5!cB9?xJ7UE1)Uj
zmu5IKc0!J}i2cs$a$~uOiaa&&%EmfSx$)D%yEZeyH}M&L=z;8}wc1^>x>DHLSc%@Y
zhK=@00SqSUh!3;d-jpWT_(v(3eQ*WQWg`Hv#jqOol^v{`Ge9P?(dQi;+`{dsW_;FO
z^rJ#StO74J>N#`Tmfry4zq!5>=fpERu<Y>f6s)TY$A2gjW2p1?aOuMCT)^)ccd0)|
z==~p`^jr5`ar(2`XznBO$!p#N*PQC;;icC!w#G!HpT;HcdR`^-9PCD|d^bw%w)5cp
zFGck6#liN4fj>{~av!m}A;v@>%i%#2$>?AYvXktugiWbmmhjDrPSOxyBp3%kd=jH%
zKdV0SBS~6WyQR%F*ajWxXRWJk77~4c_<y2FBz>pZ&-f5$Mqv4a;~RqyQ~gqpeqM84
zWt$I2pAz0sG<)*T(OIS*w)d0jb0~(GDh<mBylHWI<m+<Im2%K(a9cncpWnOl-=DMA
z|4&pkf7h;!Z-|fgFOCadvHNlU`>IXGXL`8C@}|;@`&&AE`6X)WjMBJuc>MFmdsFV7
z&gx`4>luq>=w7v_%Q@ejwYi9X80h($X4}6ui|rE2^iQ1Wr5xpU>b2r|ZJNb1!-rJW
zFVy#zk<qUWjMoyYal2ZF3h{%Mu48J(EYpo7(Hlx5T`KZ=ZNgPyny;-m#Qn`(A5Hnq
zqPl8U4!h@uyy#C(t#zJWusN&i3_jdj5j`qbsmyD9+eV*DQYBo)x*JPL+Z1wFu`Z&o
zTo&aPHaIr+KN+sy9`7uV0I9oi9zrG?I2=54kL~-h=JDy}rs;i_bf_~2vmays1Lgm&
z$<F`2vai!WzaLNG6W)aXob9HYW*YeZnqrlw@!l;u(`mMvS^vKYSeBLJoi>zIX=+OP
zNA%m2tv#pu<(ubiEuK)?;}etUH8YJOoSG#O6ythdM&XO_r}b&Yty=JWe}|KATXW?%
zmiueZZFy=oyT#jyc%%^zA;0r!MJHNL-CEXit+&4mZ<|%*&CjCzA6+e2zV*XxSBr40
z;@vuLZK)|sLfl~G6|YAcH%CVG)-PJ|y?Nc`c*%Rp9kS09;`XiW{&eMxM=lt*O?qXT
zayU|Ec9$J#6ya=by<(fVQ&wzo!#8dAJxqF^Ti$(-wZIPyl0GK}#FH{qAe+0yt6AQ*
zy>Cm>^t~RjC;L(bI2=n1b?epP&&$iJojTf9CUm(rZr=jZ?e21gC*?c5b6+-MyJwDG
zZc}P$w)e*zzO<uedr~x%uI)U{^|MANJN$h|{{Qv;d;emp-31%aB8OUZcC%KASadyB
zYh>w({?X6UR@e02{+i8|#2AQqQkKhJQ?(wf*m~N`*J<6N^R?QsIm5K#PifY1Xc}{*
z-X%G`sMUw8Qgw@zYmU&v6}ZKt7rfXd7p18?ULwPTGMX6}T+$0@X2DruhI?y@PR3eM
zooHNPEXBfEwqdP0X0?gc?%|UK6y=AVYidrFh+u3LtV;>~FbD8KAG$A7FwJ&%8QSVn
zc!-omt)K+4WgOfaL0l}`8-<XdtA_CCLNs~Uulx(|CGt%ZT{Rnxc+Hn^M3Yr$R^+x(
zEV)uvBeuI%xtCh53c3lX%JD7H+3eKR(zpu2mR5uzv6W42y7JAN+9@>#!Z?Qdoeez`
zL%eOYG&bB!ODS%$=9bx;#`1{~Ig)h}9v$9LVIX~8=mstE70U|CWm@57em~}V>c+JG
zB&Wty(`~j_Ka(joXq>YI+1RN^P}F5MQe`GoX*My6<7<CBT%%0^{L}GGFD)Q7Y9^v;
zG<J?jaMSplaZ46;hPq=6sLF|_H0l;PCakHLocF&i@08ilq!LVM%PnyxDps(nnz-#Y
zvYMhs9a)LDCcA13QI-n1wDqStUe+$n@w9s7(ZIDYA$$u)=++rXXdBAkGIiym{Tg!D
zd=A>?iVHAk%8YHudolSt)w!S>35&A3DX>E-2I@{6{NVkixl@AXpyD)|wEqi8j&{FJ
zyJr9A4-L`MJ=1ed&50)2q?oG`O~eiL)|_WJd(urj@Znj~my25RtIAOJoZ{A;MK0}U
zp+<t&tUO-POU_ScYPF%-Rw;TcYTB_@RbZ1SWxj;qzLKVx*48k``q7@v(!-!A?wL`R
zB4`LxW=y8UlQRr7GMLhA?bkjP)|=wvF|VhJv8^Yn(tHY11P?6)np)PC(tLKF8A+~9
z5|ByUDN4CoRwX)OwX?_~Dpi#kPIIX+kV{7-^avdWmc#oxI@|(|(3Yyv)U2+wrrhI&
z_uqxM$xfmPiAwSb<pFzIl$b<nr5Zh{oMN}Dg;46@Czc{2FA<vKG**h%IH0tuPH^E=
z1U|UFwZ5-gKkgw!J11hf*0j?PYXiHxxNY->v%IH^ha)I+Dr#!ihtm_5)}?i*P>~u_
zG_ibQSgvVWX6<`5YSWw(yF)xkrOMKjm8(cEddhaa?^cRLS}zxz=+?ZcInmVv@}`#(
zlwC>|B=yBOW_@+O(vkU26sHJ>!{{4Ecbx0BrA?us+Hf8*S#6AF*RYc?%ndx|Q?~e)
z_NjLQR#4$?^Z%0D7T0cOg%*L5%A8(PwzjNOg!5NS6VegXJG93Nj0EEq(wMB3oy4C`
z0VWcTxOTp|<()UJm~q@~9(IY_U8`+fm>rp`j36wbS4X7hdfBG-dPBICfX(fY!UKVb
zOs7Rq9_|BO@mWe)*4f2%3*s)&^QK!DIv(0>Y+W^^@|e<Sd10FrvsApLXQZrNmU-S*
zr0rW!cZ*iVXF19)>pCDX@ShDl%Nfn8(syaBy{SvnHmfr;$weO%o;A#^mtb}su*w=N
z@0{kd#cLJkc9xWt=&eaLas`$*9-^h$jSRZd($f>Yr=1}-B@|M5J5n<{S6TT><{KAM
zl!ue3t3BziwY6M#W_Yc$T`P&HE5eqsh<0~sMD@0qoLTJ>i85|#b|fkw(lwlbfrU3{
zx>t;}Pg`Eus92=yD$dfy?P}VNVpN(W(Q4Y#yk3-?QH#aMu{qYz^@p>~-qN*bPL`YA
zCsvz2CntD-!&cL_F^l1()MjfjC=0diD5T<3(-W>P_abWYy7PslCV={a;2kr&Yb)n%
zGg$!c%|l1p_Py_HO+FaAJSYclVT-}Sj}Gr_=?YFfWi+8;`b4d#3|3+b-muA&ySsF@
zF`DN~-zx9ByLTfvrnl2eQC;<Um?ab*L8_MxGqrcEww2pml@y^#<`huPIq7?5(Uh_9
z0Adn{vU;fY|0DLL_G~JX;c8S^{g&hO2fX#5N+5l-Xb@lTUpgg&)6x4LWAi)(=$|J$
z`0gA&pS<92qlevvuDmX#bMuG5JHB7s2nGLn03rZt+f)eE5)}yZrOO4QPGBTb1*nJ4
zbNzm!%luEad&_TpiFO8r_o8fm@~KKiQl(0%C72(#ftaWbfG3t0&}yro3<rWf+X|nB
zsDXkmO@;x84U+8jgZuly$PWF(_aOLXBJKfTf23ePlYxd~0no7NrSQ~T>5Pv9(^1m_
zd3#BP4Mz4C(yvL^-e1Ro&}oL+(8f!->x?^$_P#>ZCR8m$z}y&AmNg;LgD}ZB7T65E
zoC$&F0zs!c3@{cJlH;a$W9B<_%wA$R$ARBbzC$6_PMt>ZTzMTb;AJOl{Y4Idx4VR;
zadv@Kb%CnEp?5GE0n?a~jE-`L3c|8OtT|3uFAZk@-r!>-14N&~g<&L$SEk!lDN#yC
z2#dy|&*guW-}l3zwwcc(4<CTsdk((C^xktPo_>caoJv|x$gum|zJKie=MNJt<E3u3
zl~qXpwH8#jZM1$fwwH6?rg}{W9}+%lsTV&nAEu&)S`CI?y(IOx;!L9JLj<i-?FM)c
zQP_1Kr1z6dM~O1uhsPtO<i}Yx;A4g&!h)UQ-yL=rlT3!1?whn2f1BEN7n84+!vSc(
zTwyr<b?c9bj9hUgrVpf@<nOS{87NTC%KxpCJIRfOttNeB@-iCgm{1IsyMSf`ds5vN
zZuJm3WQp35TdbE-sa^T}Iy<_$^qx<zy;T)RyB(}Ey^@pd?lvAhQ@gHv4{FMjL9q7i
zDowRbm|-Qf8+%|rdi3B_oD9Ye1x1G#NxgLFJm@sweC97B3<q9DLmkGb;AAR?7Z?nN
zNbEKk#<*pG>^|ck0@fSbft|*qL1EJz{29kiHOymSdGq9EI_5jV_zm#nV%G|W3|x;h
zagf4G4DuFOQRP0;b>MB|$2yID#kQfyTK#4@51Bfr`%Sy5lQEiDbR2rroQ_8f1xc=2
zWS+i9Ir>MIJ&(OUs(y6!@6+q=N~-!9Q2UMti8^>4hFNYbJB!Kr>3Q%n8VA7V2{+r+
zeQGj{R~Obx;w+Y}1_OawVbkPb7<u^Sg=9Joon+?w4H^yP&wWLQ&QvJGb{P#t53iq<
zewLP~s#jkedwQ0yEH-|>KV?*!zSGlhe;CJsmpn=AJMA<T(fQ&_CH;qqJbi~v*BOs-
z(0#Y5GEbZ}!pEIE!nNMQ!^KIExUg<F)MJQIu97+qe@B{wEA<|l^@iOAq_3f%=Hku<
zgPcd1fbcNLXo%lY-i8>DSX;isY$WNG#`?+VFw|UWdKZRWI29%XVcTJ#;&qN`oY-tV
z1+W+n;_`S-sOw4VVRgkxLm#1Wen*kBTB%v6WY9AhM|r5I*PN<M$C*0bD=s_=liTd_
z_lT+gbrJJeQdJ+f$G4hg^s1#%RXmuE2R|O~s!e{rN5IHX{bD@Cage}zj$(T?7&=YG
zs91aTykzh)lTnR^idbqmI`$aDZ@#Iz<#IKfjR!%LnQM3vYzGh(tuf$7h9`0wp;$@!
zMw7X-hgSCikHmcTBhR?bPfzGXd=H+U;=X%Sp2K^N<2@wXF^2=-T1p=;C2%P@lTOIX
zpxku0{Ey5>QJ-&Z$JU!xFx}3Tn(}k7yvxv5oQ|=Zu*7UCPIDOdl2~6EVCA%wnkBd$
z2cO{Xdwa!HN~KjQ6;(v_ygq&{*@rzX)5W{&Z&5ToMsV%vLZt2Dafi3-n9gO*hpt1X
z79C;Q+1z6$8e}~}@@uaG!`&KYn@77P$7<{`8#S=JI>QL-)Mz&slYxz7E<QE5yx(!W
za>^=)BV09LGaAWAU6>4(Z8&2g+>Ib%tdiI;%_viDTAm&jMnXank#k?V1rv7gCjG!!
z>H;qS8b>wLk+p^+pyC-Ol5xWcIH92Nu*Q426qGj^#;bH5-3GAR>V=14%$oLDgV<g>
zj3f2!CU;^pYC9~Pbs6F|81OY71}sNGb%%Z-z|3dzJN`Q0ofrmi&V^W5zTK|rcAbTW
zjbzh#D^Ed2vUKyJUs109CG_qrXE4_G-w}YTjHJQ!cx{&*yA3dtO~yHCWWj<q84Pcj
z+*28~rWH$yi*3$(;42P;A*jUO!qTLdg$f;{`qXnKeTO3nFVbi&kXlZzg27T@fY5av
z1{$`k{cYPm+KdKb{PF9fJ4tBg$i!rv`sba7L&&Z-`+bz8{nVd-eOParxsGWD5hRe2
zff457MCsq;19S?ZS_?}~B;)ZsdJnz_zNt@f?mWrFMwIFNdX0xq$E=)UG&CJ5;_~6-
zU_61O`7Gi&AtZ?*BRiy^NW7Ahi%#xn@u>~lKw%_b#&E$qLMCHV{HffpK0s#!4h_9u
z!QFAl{ELsxeg$EBH6Ioqk;I)xLFb_R{~c|R=r4tGHOOy<J#!s>+h93Pf}mss1AGn-
z`4^Tvxw~4~2z1-K$(-5H*fPhf%QLWN;V9;tvzd~dk4j{jkj;?AfWimnDmI7bwmym(
zr?+8otF9wa^f1_eB%0%x=fKp%3ygENm=5E7Ml8i|z_9ihVll0MBEx}0H0(Q%L74ph
z!I-~F7N3sY1?#6HA;yKN`wB^oU^Nz@nB_dOlV24jq`jn<Ew)-zm&++GGm`jKRICGh
zs-ay+afTb2zT%V9jdJ6Wt^(r;FAs6wtqTiDH`ID@)Yx0wX(qCU<2f>EGsI&jd&$o@
z3`t6<Don<aX*voT$*!ZQ&5O`b&}lI6?qHhCBIZMS86@0TbJlUDl53F6VqcM#<F~I@
z@X1_Hs(!RBQD5M&?c;u)oc7A0&7|{yu*hgKh92Y2zJjtI+za{+cF%F?te!=r_R4hE
zA-tV)9lLNg#BIR(9Y#-)N!tC#$6@BV-gXz099JJB#kxs5iw^7scf+>d%4NTrU$b=H
z&kdJlt<ScFwqw!Il0r!!$S1foXPeBac$`}S&98$2*kvEn>vvidXUOmCklk^R*nDC9
zH6_P6N5cjB41Za`{x6K~KNsmdkK8i(k6!mqKGJp6cOE^aEauXA^w41=A=BAgj3mzG
zVICsRc;kGu&sEJXFiGH0=Iy0P%$N^ZGqAieVk|732J&S?89NNch9jmk+xGv%_oLlI
z+FDG8zDIG}*9=1l#9sCt#{>3B<18VJU9W6p)p6aTp_tlhCOHc7rD`9bp@7b8FB5hj
zvu{Spi*aDcWHguHd`wLB8_8`qq^UI+qvm!Nj>AE|Soh1GC&p=hx{GlSi8|yk&<MDj
z;CZ^B_*$Xek_3dn2Sn%NrtXW0vTvKXazHi98{J)UL6IQ|7({MN`SXF0KEj8@F_YY7
zCsEi`{(8d^)Mxmeha<4gK3!z21;Y4O&sj3eXPC4%A8}G)Crl)iHWMJyh0{vJTrd`t
zG#u<bMmb5QS%v$IWO6yvySE+2gQ9bsnYSDcy>J*!hBJ9E<YqUMZZ#i%v4YZAP}iW(
z!0eV949DwStEQsK$jRZ@WHXb{Vl5{F0pAUVyGcC;Lq_eEOhyuDDWskC8A~ZuB~bLr
zWixi@JB-Fdz1d4iu91U*j5d=S8u~CAiQ3$Y6FdfDgUG;SIU3nI++I-RQ(^b;F~J)N
z_!*4`1E{#RLZr(WKc#6WBe=><y~fv0?xDckdb|SQG`!C=2@g83X%LfSDHNJc8~1Cp
zskfhdeG~6dq#C=to5^_C)3ER~u)nO<Pi09-eX<x01$7$IDuz}VN3CfktXyI@ABF>H
zUmnBE>%Z45=3?5?he<wjX(v1l1&)<FjYd~-VPx1;bdx-jLAJ&eubywjVwm(CdWslj
zC8Wo%PDX=0B<eBpDLIo~qj8|9q5!Bq`T?jD-rP8ctFPivyI6yQPuYd9qpg>H+Gm$u
z-96v4yI&Reb*g81_8Y^0eQrJn!ov!WhEj3s4%><s?Iv1}*gsFs<B_(a_zu!sXgK&6
z94A3Z%ih>vJT=Z1OG%((-(l+}qmgm8IMkbuDS0&>1rE{Hp~nKk(zvuu_+}_tNrj~A
zj505mBLUD<n{MtYFPGq3LE~ZAVlFox^Z9F^kBX|K#C}Ka?UZ+Ywe{_e9^)rrzC#)0
zTzc!b(xp{YPPCKqlD52`ZobLUue>KCL63Tq-`-6=h9kJv(s}tDCT>NcfVB#~OKrCc
z<Sm2=%TO^O99wF+>rHO+wmo$7?Aq?mbCC$LY&-qq&_EyuNPeHH3VTFG|L{Phq?l4y
zzj58vY-ZXt?VD`v8Id5We814w5*ji3+on|PijlSIds$()(%d^?)2@-KWsK;?-9`{9
zSxP9vrLBNPelqVdm&KJFoQT&9GYH8-!mVqlLnX1c+(wZ#C2ifYViap^M6o3$GtD`K
z<0w)CUKwwzwJ%zMHcq1wDKiniI-On3uv~~8$B1W#W=r&apWf%)tfl?`56|w3+V;Hg
zeQj1|G&tkFUZd{@SwV%8D9bNaf7j1DF<&}cpG?}%V_G)MI~vg(aocE;NL7=@7q?U;
zbgdArKW*0L$($q7tF{DjwsA>wF|n|Yt^|ZknaqpIBaN#SGrMh`)vP&PbZ)YFV>Xc?
z3ibUN-w-3g>O2p|Q>7-JiX?PIVO$q7^iIbauQ3=YW(%}z3#J^2C^qYkq`_}kk($PB
zN{P?Cyn5eIAez8NAkBN(lpw@cP?5I9x6l?ar$t7WSjal;z~n&U6J}j#(2|<Lsg=o?
zmua%hGy7Xufj)=6e7<!c8nVna$7oDtfJBn#Cu0$fmz?Vo6qwoIl=@nDJwY)%(pl>m
zMl@9;?h9#aH&}K!2~FpF52W&T9*p_fBR10Z+dDg(c1x6SOA|{SaV~<>zZcnYUy5um
zB9O(DhO{Ja4JX&VGrGc6MV`FMwTA>-oJm7MrRXP(J+1czc}Z-c7{n1`Sg<G5+UYii
zm*(;4alFPWtc<&8%CNLZZFgC-t6JK|R@7RnVno~!M>w~mFL8HbH@4$R1uXb#LkUSf
zeBSpmv7RXsRe|#9nS#`7P9a1(1h5I=I~5+p-DYVCoiHdv%=e+=layaV5#?dRVb<0*
z=-cC8Y?6CN%+kcR*rMDu=Ss{wLA(;0l(GcCPojGqke@v%WDksQw(W>pwau~XnMpD4
zTb5hm$wm@>rynysu?0R4Pn=?Gr($bv?J*&A%mmYIiI%mGtV!wlezf~(?|pL^+bb;-
zh7PI)Q)~~r&cnI$JCb9>Ye}U@9h(~tW#L%qt8M}(IY@o}p&z>G=jX~u%E6UvnK^)j
z6RvtS>Pu*?ph=D;p-68sZ#;RwPivii*5}<_<oH-x4(%E^@XqO>!c0PT8Qq@1?>NkZ
zZ#~|etH@ZYM%HZ(WqmNUDwTI*FGWzPCC%MwIm49*>pGV+nUJLz!e7qZeq#PQsq;HF
zhBPsCG$t&gI+I#n=L{@lW%_l*BpK~?n;2HD==3g;Pe!)eJ7<pLuEx{idMx`el5RNB
zc7{v2-*0!1Z$R2yi>j7bpv!u+jjJgx9(hIAorck6<4-X**4o2CRKulZ;lDNW1zE3q
zGaW4%i=nPs+B=9vQ(jn46+}0&8`0`g8HCV-Sq=53_o9uEv7;^H-1W@HY_Q89IAU`N
zHhZd=bUy>iB5}~R-Mfk8%4MiS&MyR$MU?hwte$4qtfeU(mD*!565DNVakjSo{ol3i
zJ;eJA3G`$w;-$3vJ*D7Gbf6X{W}xO<YCRtNT9nsm&Ow$<W76d;tpqjNyG;i$5=(VU
zT}f=OClq+ay=oAZwqj#}(gbSbt%Mpp4KHl&*eTI%v|Y5<c=a%nnAr&`vz{S%3x->0
zLnlVIRuwXp3g^&m9GN>KU6W=7Ot)8x#vXAiud{&Jm}1Hjthh>gwnH&IF%Tiz6#hTR
zKG(qXd>?qhp7)CzONz4j<{x;E5a-LG&9d7Jv<bPeHY_(;Ll&`NHR_2DJ4OqIVhBp?
zx2)GqE2|Jvv{sY}Y+k6$wbe&xhn?-_5q4}^%^lmLjPDGGXCmKu`FEbLH2fv`>E4n&
z+F;?&LtJ-w|Bq^4FG^iLeLE|}NZoaLI@iqqG`=3rUoVGyVJ_<R0q_v~J)I`}u^koX
zP8~=`WV#>~)d;4a3D(d1B}}%yxcZifY?Bo9f0sxOgdTf)d3K|_Z)}&Mety|>`d(f=
z%AY6mQOySrEz;qBK1jQuiE^>S&}h3ik68X7but$<@On=PHPC*rJ)F3EW98{wsVYJ{
zY%wd%RS}&TY%S8z^6#4P^X}GL?swfEtLpsUZ-L(5F2eA#)_x6^E}JSaX$vj9+UEd5
ze>?bd=g*$`HYSx#K{6dnDMG!5tVGUhk%ih8EJKNP(-tw=S<?`dsvA(2*l}qTgVZ)6
zLRi7<3dO{K7q7j1nV9{E{s%qKVJx6G^65MnFwWLzVTg!`ahQE&4GhfALSlXz%aKNw
z)VTagcb1W&&qX@d&ZjJ^NO1xT6?+V>w6hmrurb)JW!msTVHK8i16#&w!zr%T?zJ|#
zZT+9O=iMStejOwF(1(aWO79;(eH1FQLzWxoySYP-kwb6B?@8<Uw>;*}dY=w_)xCXX
z%PA1E_P@dWZ%?Do!|18F+hhy{5@0kSmp?b?4=@zpr|IwT>OGt_LL;jCd3j*5tjxiE
z&3sJO+&fQ|ybvUCq)T7yy)o@P?c3D<^F967S*DhNkNAh-u>sqFKG+-w(*2*CNbc(9
zM_R?N#ZBWH?i%H@DGlUxwuVgAB#EfDWn(QT(D;~ByyX;JTMJr+(t<j&w$?C-bWW+E
z*0sf<O6rDD9hN1FC2U%W?Tk@}qbt`*HD|9^THN}wMkKXn*42j|wq9*rk#^^C&yxkL
zh2dFXsM&@}QS~&7w%J9-&x+?8v{e)vXQx70E3X9@2LQ*a+f8={*N8QrwzE01Exzfu
zZ9W#v%ErQ)g<gBO!jn(6<j!b)V>PN(#Vxtm$b#D(yl0(tvTfNipAS57SN3~-UEgEu
zdwYCC@3n539;aJblGge--!z{2PKkCgnoPS5GpCcEWKk|B)+WJXh{du`m?L?_`D#`r
zdX5%(#)-l++fyq$7|z9-G#?nQp|TWibg?ocX4_VlTWyqrVzx@LW^6|VdX6!sg%aCC
zo4}PZNrjn<&hKv-eFDknl4ix*it%>tHP{}(VJ^pX$g{a_ea_bL2`X46^K6r<z>F<B
zjoTZHle*4zw#gH)zULPRgw`}(WwP??ODxdYC}c9&>^E&4&#OG~t&FU-m)*7iF#&6}
z*|56o<4pE*6dI#4;4y4eSVrHPdRd0h<`zTHdrF612tt)EC?vNJCG1PiE||nvExnDX
zseOr1MTDP(=drXSqcuS~lvB+hirNH<t9xgaak9;Ao2#v*grv&xILH++J(r<VN2LPh
zCakA*jAnF6Wt&j$(&uxaQc@H%a?Qbq+g^vdojIPz8ykAK%ELyrFQ}7>26tLY)=5X#
zxEsU}I@m^IsOw^FYGQ4!#f|G)EX~z0#M!ea%VF&+4hDKQZj3C7`Jx_XbIPr}=aEwx
zAjbXwjQKIXbtqPYdawwu2g^(tHos@JkDqOs1;*lIW@&AftTyN_CDg`^f~d+&;$$hF
z3Yc9rUa7TfFxfv&_k>`1;?E}|s?Dg%vnky=NLtWsIkBo*)c0$#t(R8pquMH*rOe#B
z38zhlFH*i#AjZBrHMrh=-DIh7ml)YcmCm-*GB$Iejy1Z*&e+80)cItq-$u{5dPI_I
z<n}nyOJT6<Y$7<%t9xTM9XQEI{B_~Ugvu9Kv5pmz(^z&ixSL|ib52&!jfqrCfH}qO
zZ7|T<OC>i)ZP3(&R78O`<t<_&a#*t*>Kkc}TN)A|Oii`c^)x!&6)roXcr($?y3Lch
zD8{)=;}%>7v`oj>z2YH-L&%J7X0VxjC5Df!tVZjb0M%<I+)>eC&r!<RFdhSYjKr-M
zq8empq={wbx;<lQ=arb{o}z^H$hmUMwOUokP&>3`>EvwPAhDfG(O67b>q*Siu&EIw
zlI)YSyLWV>vM?<yfov5<h;>vNTwJ=^wsNDhr%ah`ZHj71Q7D!+owu>2me+N|6jL1=
zLlj#fI|H^;I$+9MYFekecWKIHQ82Fdv}9KPe0(`Mu!N*bZ(jNk3_lwS>jgU&-GG^l
zSfy6R?NZ9o%|i0b-7Za^tr!qJJ?VECqZx6FHNgsUM8mgy-R~oq%L0?l!vgB5M3AmV
zHMDD5#vC}JyGHCW@Me@3O1wdqx7p5=PT?v=y)Kg_4l{&FPFfP8uHm32<C2o5p$?;5
z7}o0}Vr+YLsoT{osZ%AdD;VH;$t)9=gf-g{k$J6*>EjnqY{l5^mh_U_ZLNidg3n&t
z9&UjEI-20qIe}$?uNc<>0zAb$Y31=NJcW>&G-+|2D$~~AdMvW9R$4Q%c+oCw3r0$;
z$NEy`EwEaw?48r+5Wyjnu>}Y}i?bJnV~7`k9}fhS5lH^?*1Y+@lImzu4sM}IwYejo
zGFI5b;lR#H7Vg<)^~<jAxFTYtK}6>nwkef+n1<77szx6U4_W1*=<32b>zTBxPK<7*
zdGwk(7jWqt*4EhOTC%Pn*<#`Arig{NM2)>ERf@JTgjnNKQI56jh9(JORyA;2ro^~x
zQ&pwD^XU%oc|es9HmC!e0@0@<7O#z;7sKi7!%KG4We7t}Vr#7sNk-@#W1E`dR>16b
zg_`DKTbm7Olznw37mr6RZNkwm*1<Ut<fl1K*>fEBK1p);EqV}MTAzfJx)2<esp`>~
ziCnvhX9yUCk>nZKCIL+WrT2a?nmZ|0V`CC)3L?VT!?uk*J)I1udfk~OT?j6m?~#uB
z{%qH)WzH90PnTgvVMD}>%NegB!1ak)@_ugVHZTf#t7BM|vR}UMr6tcE(WsU(_O#Is
z$8NFK(oAMI6J;IP9g>lR#k95LmSak|ZR0iS!;ZGI&8|T>v5x|C4hG85v^%G&D23XW
zwX_hfS2J=X*h)lmX)bQJvh}v(u^7fCCyD3)1eQ6aliNW|>UJZ8M{2rSMlVumMdT-1
z3P!cM=47-b>g_DMLkS6X&0!6WdN-MkZ)v#0KpA7$?>uvRYi+M<J~EpWV(J=^0NRg?
zdCm2t(VDjP8v$Z;!4t$!6iu}5!l8>Rj-uUjRJ7LF9O9n3^|MM&Y)RLlF{{^EV*%E7
zB^bcYpyw*Ac{KZPCkYP5c6!L-T)jR-EMqjct8R?JdcuchBc{$I6I8q~mD=g0mbX`k
zy#?WU;5gcZ#<G=c&K6USB0WT{;>WXLT8$+t4cgWWfJ(TN-rLq+VQDSoK%;UN6u@)C
z(1<dn%+u`1le|Pu*Tz|>;w&iJHgao-OElTHv0ghxzjscbo9_30LVWnk1FUb|rMZ60
z904v#NZ%CR8+?`s?kT0HT`E;vRJDNGKp|!-qZy1+!(OjzTdmp*&YKuzbpkrH(A2rs
zX2fB=eKcHcv^_7a?g4buV3(`kf%g30iQIHll0XESxqNkOLQAfdMAD%y(OEHVu>~Z3
zvL+r3g15eSq{2gG-6yOR!6Jex>DxyHLrRsspUbcuQj{T`h9V5Q244W6c|uW?9dk-k
zVce<<LCGmd5j(`#vSJoBmXC8Y?6ocJ*Bbj+)AZ+ht_NQX0dO{C2GoVhQBVpZN9To_
zqIi2K6x-ph=JNYY*Jpfq`c0RWCQRc*j`R_%UMPWW%~VA1LoHh|)woxalN1#A(oA-p
z&X2Ec3!QvY3$kA_whry@+b(y|#fumDv|q1pVUBH~NM<&js@<xW+qYg-qT>QaGDm-<
zQ_~S5Bo5vjxv}2!X3dy{ghwRG+4Al6F5BG@81C@eM~cc{5=oLwHkISMklF2}<+Sv>
zT=%{GRLjy=*EaN})#J9^&*w(+-Hs>@lq1s~J+&j@`Z`v<{bPQ!o$tGx$^8FUCD&BC
z`$Ki3!J2CwoxT7z;T~r0LNWGUz2479EBOw_N{45UxogRnGt~#SIZX3{j@a~iz(R9m
zuLCbxcdY)lN_a|hez71@JKwR{o*Y_qC4|$;CekUK@oKE?v3}d#*iqCi>l)UeYfe6w
zfKd6~BiHFZ-vi8w!%Y_u`(tJz?dy5RD(s4{mU4*s6h+poFUBp>$w4LQ^x<d2NNX<^
zCl0Pe%xo+x(=RCNEvhxnvQ|9_qe7WQ$;_`(%~f+;Uc0NjNl5c#Qetsb7E+s}x<aKL
zu|}O@R%2UaH4PzWw6Y$BT<g{qqQ|Q$(yNivI<<yM+AYo!kr81LP?xH7D+b1PwY3Q6
zw{yIcE`>!|YYRPlW++YV58vNEF(7xwhtcor-uQy!H%75+^0qcA=|xU!Mnhiv&k0M&
z&C^y)wb;bGQpok4iMm9&5^aq9P2|i01UkQ#gmk>lb@6!_==hkkFxKs^+RU7+wdifu
zO>b^A^QgMsYJ$-7h!6ugg|UN30zx{*8g6RMswA~dMr}e;VU&|DUALx{tnMRmRud{+
zysSZBmWHjBQr_EH6HBZsHkTOMlTEZ84a;3?S{H&AvcQ4ex!jPl9i=gqXi__Dnk{Nl
zaygc&GBuaecA;XgFnZapma8^r_0N0c`SYo}(54HcLdRWG8jjVW?3a5w`DQtCg!E)|
zbP=1*vQHfr*y`*<Z1aH{C`&fUqA(&h65L++IeT+zRWtC;ZiC=O^~LEV%?BIEW)np0
zSwVCQIAU5<&2v<4GrYwjKIEoA%hl})#~A^dx=o_!7MP{YtkQ%4oPoeXcin#O-^uj(
z=p-Z+9$24qX!JhS_-bG%R%q25T;dp6Ffe&yqpxXAHM$xkymKDQwyB&;N?NHH#jJ$9
z<uX8q*GoSiO}!hIn+=#tnHG#Bi-o5|R%{8RSK&F~MBznRH=N|WtiOABG%z;JY|FTX
zjlFnYNl7lMTGXkP){I!GE>eUw&b402yId)qJY#ttBcYQ=lO?ydCWyyeYYm5ObpmB9
z)$6u1RoZH^ZeoQvvXGG_31Y19fh-svDa-AmM1%&kRZatF4ulR`^@+8#b1y?yhcvF!
z7P<fd6ge~k)Fv}xv?7cgLehI%z(6&KUS;jN!wfM4<*1QSLCs@rT^_Z?Vx?3n7qw#9
zcqy&gl)EKIRLZI-qN~=#owpG!+YVM`x;S?N+BBm=oU)DYT2BYO<_uX8y7){1q3R=E
zLcGn=++0bnxUjbd&BVCA_4j4)uC^NGjZ(r|!)D`Ic5^InW~G-svzfi!iBpM<=+-r`
zHsQKeR7I57HtQzsE_zK}TJFOZ_Quy3pqGNm%abyaWo*NC?Txy<vI&jjNGVHew=Xj7
zV!@Ug-U_AKX*5}e(Ap(1@ow&NrbCK2=zVT1NaXHLF=ZUUcv)Q~g3t?gkh5&VO!YIw
zcqOB28DOi`WIU%3y7@ba61+~ewwB4xRY1v?Z9uZ<H)?1*31aD#jWw{eB#66POd$DV
zZMxAM`eq(N9+@$X<uX@KZl89_GHh3=ke<7?Lb8>T`zBOej55kwVXLOytZAbu6br1O
z8_zAqyF7<>XOqfKmD>}r$I#3H)PXv0_;ij%vXTz^-yBEAu-_xMg_Ab2<`lY`7>Z%6
z+GJLHGn|sVi#p6@&em<QwwxPh640xgR;jEvJ9<XB){@Z}RgF$>Osa#~>ocvPn}<71
z(sObqjJ9wieIv<vx=Uh}w@bsp2<))qh@jjlb7^L+W3C5YQP^JVpINZzF@<P59oWe@
zn|79pv?$zbEU;Ic<E|#M9W#3i(Qx_d7Hp2$8`&KSxU!>KjF9(FQX_TKx2<GeG~1l=
zaPKRe7aGdsl$f@+t!0*H!JH$*Mx$0&40-|)eO^Xl%XQ*dx)oAfBx^fZ-XmP?v0AMp
zQMi<_TWJYuj61TjwxU7V(*ayHx4j8HM1(vZ&YM}Ty-kWlbs%en!7h#9SUGlXwd-Y=
zLFN<pjb=_uUuqoo7G4>2a?!jlyJ9UR%)COUhFH7Kwp=@#thwk>nb52Yh9=KrrR}c~
zw1;9X){_ese8_p2p1*HS={nuqDtvHjlqtB7SfXl@M!Hy!gPc8;<?YHws)RCah3TnH
zk?79XT3Il<>EYV8O4K)F4Ct+`%?>DxmL$nLP_g=DngB%*%?c=$rEN<ua&JkAB+ts>
z<~UN(V-uyrD)3d}#+Ff2vrd;QThuDCqQ{8*!Wvs^6oJbM-U})0)jdb0!cR;>T&uJx
zsn|lBO>o>A+*yVYBem^DsU@vBbg(RsSQy7yl8q*I+RI*awry#PtwP4;EVdEjjc7wi
zlIn2`nVgeJq%<)6$-hO&CCw+}rHIBE;vt=$G(SDNbLTc9QJsj%E8CzmlyJz?MV-rn
z3hMb2S8VyDNooua#%s_gJV?kat1;upr=Y<~Th)yAbZcdn;NIF5)-kbEI+v^wGm0jr
zS*NeF!D546t0)cNinL}D8r6(U1`}N}?Axs+yIEIT8G0&<o7AdJSh=kRZ(A?DEi)Af
ztD>XC#<OpEAw$e>gBBPyER3<RqzXzqR*l9`hhC^n5~9k(+_3R&JZMPXc;e#M#<SEL
zN~lnjlp|tT`C_JY2D;qjDK?pvHZj!PheS<cor+0oa*)-rwx%e#+nKgdnX9D?=!Tea
zt(wv}vWF6ofpKMtFINu}XRjDUNs9?cNKeV7L^AtY671<_?#a;!nG{M*LYoCHa+1eZ
z8xYy+16#_mm5m2kbGuh^y_aKH(LNN=Uf%=fA2a3;rJW{WhH6XAIDc6>Q=e;2Qz5ib
z<f$JLlO_<~COrlM=f>5ezrxk{u00#5y{d<l<Qz?CBvWO2L<Ox_sseNmlb38RE9<6x
zHZFAbagi=9)eB+$4Yo1s##*RLAa-D-<PRM<JSV8|7oc(?e3UmdxM41~em{@t`3}zh
z-CY*x_di)qwQ2~S?~Xz``+S~k`aX+34VXTSgh!%9&}DT#{XM>(x^>U?V(2O4su9E7
zo@KiuBs~xe=oR1O@;L24$$6VCLxP$0)aK;%Y0HV#ip}$~r>|n@#(KE*n+OXcp_B@8
zeD?sYw=31Nl+fcL9XxA(#=lRUmJ;QfNGat>htX-f_;OZDt;a$=e`c9x<i3BW^#9kd
z+0c36i)!i)J?}iH={#$OrK!|07w-AIfO>#$ygSKlbB3nTgtT!X#%397#yGt7YEO0G
z=gm5s>$<qfd|c3K9v(VV1lk=2%3oXoJ(y;{c_=Ct)59B2V})6&HkEXtu6y>E(BC~0
z+TPeM*&amVQfYL{w<~*TdU}1cKN4fG`QBSFbZRBp@l~qF5hABjPV7(Bq`P^CQV#SM
zwW^h?<)=lgod{;j7KgAk6Ch^rNyn-X&cGj90)iei?^48+#qMa?(pwmHO;~j9v=o@m
zf`v!~E}S-H7gbwTQq*cz>JTjp{B~SJLiM022laO78Wc!vd8nO*<GXM?33<D?j^pI>
zjwtkJiZy|9v_`~f3ereKASLwQh&56}Qc=seot3XCQIj$RNFHQD%P{W44FojK>@h#d
zTxqPBt*t)Skf>FnKWKM0IuUn~F~bT*GA)3tAs`K68>Pj?P|<9NT1#3jR>f*{SS>k_
z+K}Aycd_ig^{waO?)Q2;P!>D<od*QSM^~FDF**W2IPS^s@C4~zWVPe7{<D7Hl{)oL
zbBAt{J1O^lN3i#I^VaA_y@fo>sAQ9_`gE~PivTfkLEP664Ak#X%_KbZtf*ZINLZ?~
z>wwt^sZ)=PICP+By&~{A^57i3hc~C8y%v66^t^q{xk-<F_Q~JV;kV;s<LcAq@zf5F
z4Y*^o=1Z}X@H7u!b)tTfNa<Z+9KEv;EdeRV+ub1thMpeY&Yy;UJq^bj)h2pli}p*5
z{ybA|HTG*ZJjb98Dzxx$?>t@JB2+xVjWA39_m>Y%#tU2e1+C)fJyJvXq$^QYg-!J2
zP3HSVS`mfGMI-`BK%@=m)(Hu@Te$m{UB5OwJ9K5oqqbQ9&8F`>)7PfNn$0kkr5MyC
zT2iLw6?hRmN09`Mn)Enc$HO#fjZhUNYt4qb9irpCP&df!w#!n~Kyt6b0>P{$G$SlB
z26kFTG6-UUj3s%9CX+6Vpf4!XIv)J$S)???GXTjujDeZ@9nelzX(<C}{?{PY{e_yU
zX`gyGOz~NWsERNZK4i3+4ZgA3_Rl3?#(R98dgDw|<>dK%`kT{=)y(a0xp@J0@aKc8
zRE3*~4aW8jiZisjl9&e;Kv_w!GrPq6k?B%PR3nm>yo8JgQMwKFVF$g^n&3|ZzexLy
z2kP}Ig?9Z>zcAB{5Dy?ZOYTAY_ujVdTn#6&l$HbA@VMYV8tGrh+~fFko<F|fJ$m(*
zpCwPy_}F4oh!#8)r8vKv@;H)zm(qOigVcN~!DZc{)-@`u=yOc3O2&Py-)~9k$>KBK
zsqGjS%#zWGVbK_o%uQJ;v4_`&GXkiS@$R$6nCHc;!&8BuGf0z0##Z6Qrf7$ljH=-U
znxRcv&aH0$9=a#52ZWhulBvIPeU5$pPrK$mmOf9la410-^5F!ev9Y}UY!QdcKDFWD
zCdB1-%6sr{tK|89Prvt@rhZ%@6F});MoO_ZamLI<5YZ8{#GS@7(U$WF{!4W&=)^C6
z5)yus-cfwZPt<4Uy`}pz?)p!6kJc<BH2I{*j{>Z3t*1M)ZksrtG`=67E<pM0WXs;y
z-hDbfpiM@(msr45J2W4kF=x+;pASc2uuU&OxC{wOrJFXgS#{X5>us-61mJ-sZf>d=
znZJoXjqV5JY+svFY+`4Xg}Mo;^>yvuWpUda+OW09X?CfWGZ1Yskegqx-LM(cd!=7e
z7SaNII~W&R_)e0crf6RbBHP~XpRv=X>_TbsmE}Htzi;z<@cR2twh>}5qOC%~6<LQj
zQ1nf<c;{yhrc@U_jhGhMEV<B^Bv|m2+98HiW;rc+p4KllJy~TVDg<cvX;9un=cgGY
z_02}c^>LBp0f7P@ypZvLS?i5dm#cvhFC)(LWf3?;=sRvs&Q4s$wB*7^W?6HMp4QmQ
zY?zoi1vv9C5W_Gen)jxG215)yFgaAH!03!6toGFHX^3e_D>t>`urzI$a?zHKJCcbW
zyJ1>99J_A5mG*^$2oQl53=b@}6OoS$$W5cYd_eDU<I_1|soKk=$oTi36-I9lrqy8K
z=E_(KaXoK2>$%q2+u8QV;^=%LENSz>;+2e^9WoJEYi1jzn+U2<qWFxgeP^sUk3+jN
z(%)NQpcdI_t(6QLn+V(R>_`*Nuo3_@@!A}s#ABGnN=6}3g&SorEmzGTQ^tK@GsI8;
z50xBNM6sF6uM+t?1Y@ByvQn(mI}*=(9`*L>5bFv!7pyOF2Fo10_Hky3qmHKQI}D6l
zNK(foZjT1dQC$W($=)u*oukVdzCJkl_Xl;s@}M6s#_QLQ&p#bcvd_JPYYIZr4W+!V
zN;mM>@~HEim9k?ym$G9aNe?C|vC!KU5<<Eu&q;-TW_56M08XB*p0rS63~nVhKvuDB
zPVzOO^f-2meWe^mdCoeU@~-rJU8rTH(b_P?bk<5rsuC(G!PRdWT{fW|QttRJJ>;uB
zyIT#AND}6Rb0s>q_JB#6%-seo6H6FmluuaW)6%zKLV5SSY_iwXZu++q#Vj}rVJsk%
z1ofV@7GB$Wmz84Ao>_^~#@Sq;ZBj5VP_>w8OhlFV?G0-JDxk#nvdf*jO`(K9>~ciI
z?Q!Mt$(K5zSqpCyJeb>D$o9TBwX*ta9;8-MVp0=0&XtBiU_jy;GJ8gx5%hd)<yPq6
zFhPvh+gzk=rL473Sz<+yme62*H1WNL@+URxLO&&T>h5-$%No|PcAHuh4y<l{=3~|*
zTbb`)Zj|UvZS)~qSoHx3np*adX0$qJ1j13a?EyP$rHqufcEYd@eEWJ|dbEd_S{+_{
zTUatLxqV~U4QASHtVxb`x0TQsf;r7e6A&?~h1RENVztr|vkVwbuthcmpJxHT0zmz0
zuI*SC+%pJDIE%RAu$X}YCcIkG#?Kca1b3Oo9I06|cU!Ls%+7ReCWh6yVi4*Ol!zXz
z+(II21KKv*VqLCilXcqbwKD~jw+~=bWXVJ~bwN;_h1*7V4x@OSJzQs+4K(h!E&Fo^
zm5u^Oxd9_a&XJx{QCez(mm#GRBf!N>G`XA%F3^TVTNiY2X~Qxl3-Gi*zPoR*RqLpi
z6oPX~jFu{d2RXRT&E`aDKTqtk>F%*-Hj96PIve%P00;@>^AC$c@<uIe<og6(d)Ym&
zve<j_ya(8>6<pnJt0*qkZsmrzTzyL%-m~8ID*Zn^a7`)VtcHxR-53xuUz=#+#HF4i
zK6;i1aH2$>x7xJW2vRx*#L~2-)+W?Q*+RWoJx+AgnJuNzpw!iJ?UuAF#wM2+O&uOK
zGQ(b0vYz0{<35%&JqWd{T9}5Z9#_nP4YF05L3+TkQ(GN`hMJdKZQ=~drsgEN;ev67
zd^sRvBf>%MN<cU(771)6m6y`{ddJ6?w8TiqSm80PuaBQkwe!D&_rDNu9>>HZ>Z#`V
zoe5m(V;KgYonmi8DbU1P9lYB{Geve=sK#j)GV6;lYZh9tu?ieOmi4D34lJ&_vlB9s
zRwmrY6{HszelF?Hq~I>URNU}<eVwg8Og<WCu<=e0U$mQgTaX}oO!W0{S8kw4yYYEH
z0SWV8s(N-}FHrk`LIcle__y`nqJ>W}o}JW#mt1}PH|#iul*yj7wn_Xg!+8R^@4WPh
z{r;pPo*igMX+<O>uj(oAXGPt7p!zU(ArwkZ^hu<h+|Q!Z*3_u&`~2{fi7n2Du*Mxv
z)`26y@7QNxNxZO)l9FmgY~M>0*m=?&pmOK4v!7Ls+HKT+$!1Gn_BrAc)ZU)G#EEbT
drWm7}*7LjmsPpiYSJ2<~F64@Ep&<O3y&h`lnfm|$

literal 0
HcmV?d00001

diff --git a/tests/data/references.yml b/tests/data/references.yml
index 6ab753a8..4a18ca69 100644
--- a/tests/data/references.yml
+++ b/tests/data/references.yml
@@ -44,6 +44,31 @@ COL1A1:
     - XP_005257115
   - - NM_000088
     - NP_000079
+PIK3R2:
+  accession: UD_144959560058
+  checksum: f696ee19bba83e899ed8c0f2c2f2ebc4
+  filename: UD_144959560058.gb.bz2
+  links:
+  - - XM_005259824
+    - XP_005259881
+  - - XM_005259825
+    - XP_005259882
+  - - NM_015016
+    - NP_055831
+  - - XM_005259822
+    - XP_005259879
+  - - XM_005259828
+    - null
+  - - XM_005259823
+    - XP_005259880
+  - - XM_005259827
+    - XP_005259884
+  - - XM_005259826
+    - XP_005259883
+  - - NR_073517
+    - null
+  - - NM_005027
+    - NP_005018
 DMD:
   accession: UD_139262478721
   checksum: d41d8cd98f00b204e9800998ecf8427e
diff --git a/tests/test_parsers_genbank.py b/tests/test_parsers_genbank.py
index f997e89c..4c27c9a2 100644
--- a/tests/test_parsers_genbank.py
+++ b/tests/test_parsers_genbank.py
@@ -37,17 +37,47 @@ def test_product_lists_mismatch(parser, products, expected):
     assert parser._find_mismatch(products) == expected
 
 
+@with_references('AB026906.1')
+def test_include_cds_without_mrna(settings, references, parser):
+    """
+    Annotated CDS without mRNA feature should be included since Mutalyzer can
+    construct the RNA from the CDS.
+    """
+    # Contains one gene with only a CDS annotated, no mRNA.
+    accession = references[0].accession
+    filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession)
+    record = parser.create_record(filename)
+    assert record.geneList[0].transcriptList[0].name == '001'
+
+
 @with_references('A1BG')
-def test_only_complete_genes_included(settings, references, parser):
+def test_only_complete_mrna_included(settings, references, parser):
     """
-    Incomplete genes from the reference file should be ignored.
+    Incomplete transcripts from the reference file should be ignored.
     """
-    # contains A1BG (complete) and A1BG-AS1, ZNF497, LOC100419840
-    # (incomplete).
+    # Contains A1BG (two complete transcripts) and A1BG-AS1, ZNF497,
+    # LOC100419840 (no complete transcripts).
     accession = references[0].accession
     filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession)
     record = parser.create_record(filename)
     assert [g.name for g in record.geneList] == ['A1BG']
+    assert len(record.geneList[0].transcriptList) == 2
+
+
+@with_references('PIK3R2')
+def test_complete_and_incomplete_mrna(settings, references, parser):
+    """
+    Incomplete transcripts from the reference file should be ignored, but the
+    gene should be included if it contains another complete transcript.
+    """
+    # Contains MAST3 without complete transcripts and PIK3R2 with one complete
+    # and one incomplete transcript.
+    accession = references[0].accession
+    filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession)
+    record = parser.create_record(filename)
+    assert [g.name for g in record.geneList] == ['PIK3R2']
+    assert len(record.geneList[0].transcriptList) == 1
+
 
 @with_references('ADAC')
 def test_no_version(settings, references, parser):
-- 
GitLab