From d2f9169094e8283f2622ba438a1af2859b164b13 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 14 Jul 2015 17:53:57 +0200
Subject: [PATCH] Uncertain stop codon in protein descriptions (fs and ext)

When a variant results in a frame shift or extension and we don't
see a new stop codon in the RNA, the protein description should use
the notation for an uncertain stop codon, e.g., `p.(Gln730Profs*?)`
instead of `p.(Gln730Profs*96)` where 96 is just the last codon in
our transcript [1].

To detect this, we now use `to_stop=False` in our `.translate()`
calls, since that will explicitely return `*` characters for stop
codons.

We also slightly fix the coloring of changes in the protein sequence
where previously changed stop codon characters where not included.

[1] http://www.hgvs.org/mutnomen/FAQ.html#nostop
---
 mutalyzer/util.py             |  87 ++++++++++++++++++++++------------
 mutalyzer/variantchecker.py   |  60 +++++++++++++----------
 tests/data/NM_001199.3.gb.bz2 | Bin 0 -> 5216 bytes
 tests/fixtures.py             |   2 +
 tests/test_variantchecker.py  |  33 +++++++++++++
 5 files changed, 129 insertions(+), 53 deletions(-)
 create mode 100644 tests/data/NM_001199.3.gb.bz2

diff --git a/mutalyzer/util.py b/mutalyzer/util.py
index ae709477..93d6de8f 100644
--- a/mutalyzer/util.py
+++ b/mutalyzer/util.py
@@ -407,18 +407,22 @@ def read_dna(handle):
     return ''.join(x for x in unicode(handle.read()).upper() if x in 'ATCG')
 
 
-def in_frame_description(s1, s2) :
+def in_frame_description(s1, s2):
     """
     Give a description of an inframe difference of two proteins. Also give
     the position at which the proteins start to differ and the positions at
     which they are the same again.
 
-        >>> in_frame_description('MTAPQQMT', 'MTAQQMT')
+        >>> in_frame_description('MTAPQQMT*', 'MTAQQMT*')
         ('p.(Pro4del)', 3, 4, 3)
-        >>> in_frame_description('MTAPQQMT', 'MTAQMT')
+        >>> in_frame_description('MTAPQQMT*', 'MTAQMT*')
         ('p.(Pro4_Gln5del)', 3, 5, 3)
-        >>> in_frame_description('MTAPQQT', 'MTAQQMT')
+        >>> in_frame_description('MTAPQQT*', 'MTAQQMT*')
         ('p.(Pro4_Gln6delinsGlnGlnMet)', 3, 6, 6)
+        >>> in_frame_description('MTAPQQMT*', 'MTAPQQMTMQ*')
+        ('p.(*9Metext*2)', 8, 9, 11)
+        >>> in_frame_description('MTAPQQMT*', 'MTAPQQMTMQ')
+        ('p.(*9Metext*?)', 8, 8, 10)
 
     @arg s1: The original protein.
     @type s1: unicode
@@ -439,6 +443,10 @@ def in_frame_description(s1, s2) :
         # Nothing happened.
         return ('p.(=)', 0, 0, 0)
 
+    s2_stop = '*' in s2
+    s1 = s1.rstrip('*')
+    s2 = s2.rstrip('*')
+
     lcp = len(longest_common_prefix(s1, s2))
     lcs = len(longest_common_suffix(s1[lcp:], s2[lcp:]))
     s1_end = len(s1) - lcs
@@ -447,9 +455,13 @@ def in_frame_description(s1, s2) :
     # Insertion / Duplication / Extention.
     if not s1_end - lcp:
         if len(s1) == lcp:
-            return ('p.(*%i%sext*%i)' % \
-                    (len(s1) + 1, seq3(s2[len(s1)]), abs(len(s1) - len(s2))),
-                    len(s1), len(s1), len(s2))
+            # http://www.hgvs.org/mutnomen/FAQ.html#nostop
+            stop = unicode(abs(len(s1) - len(s2))) if s2_stop else '?'
+
+            return ('p.(*%i%sext*%s)' % \
+                    (len(s1) + 1, seq3(s2[len(s1)]), stop),
+                    len(s1), len(s1) + 1, len(s2) + (1 if s2_stop else 0))
+
         ins_length = s2_end - lcp
 
         if lcp - ins_length >= 0 and s1[lcp - ins_length:lcp] == s2[lcp:s2_end]:
@@ -472,7 +484,7 @@ def in_frame_description(s1, s2) :
     if not s2_end - lcp:
         if len(s2) == lcp:
             return ('p.(%s%i*)' % (seq3(s1[len(s2)]), len(s2) + 1),
-                    0, 0, 0)
+                    lcp, len(s1) + 1, len(s2) + 1)
 
         if lcp + 1 == s1_end:
             return ('p.(%s%idel)' % (seq3(s1[lcp]), lcp + 1),
@@ -506,12 +518,14 @@ def out_of_frame_description(s1, s2):
     Also give the position at which the proteins start to differ and the
     end positions (to be compatible with the in_frame_description function).
 
-        >>> out_of_frame_description('MTAPQQMT', 'MTAQQMT')
-        ('p.(Pro4Glnfs*5)', 3, 8, 7)
-        >>> out_of_frame_description('MTAPQQMT', 'MTAQMT')
-        ('p.(Pro4Glnfs*4)', 3, 8, 6)
-        >>> out_of_frame_description('MTAPQQT', 'MTAQQMT')
-        ('p.(Pro4Glnfs*5)', 3, 7, 7)
+        >>> out_of_frame_description('MTAPQQMT*', 'MTAQQMT*')
+        ('p.(Pro4Glnfs*5)', 3, 9, 8)
+        >>> out_of_frame_description('MTAPQQMT*', 'MTAQMT*')
+        ('p.(Pro4Glnfs*4)', 3, 9, 7)
+        >>> out_of_frame_description('MTAPQQT*', 'MTAQQMT*')
+        ('p.(Pro4Glnfs*5)', 3, 8, 8)
+        >>> out_of_frame_description('MTAPQQT*', 'MTAQQMT')
+        ('p.(Pro4Glnfs*?)', 3, 8, 7)
 
     @arg s1: The original protein.
     @type s1: unicode
@@ -527,33 +541,44 @@ def out_of_frame_description(s1, s2):
 
     @todo: More intelligently handle longest_common_prefix().
     """
-    lcp = len(longest_common_prefix(s1, s2))
+    s1_seq = s1.rstrip('*')
+    s2_seq = s2.rstrip('*')
+    lcp = len(longest_common_prefix(s1_seq, s2_seq))
 
-    if lcp == len(s2): # NonSense mutation.
-        if lcp == len(s1): # Is this correct?
+    if lcp == len(s2_seq): # NonSense mutation.
+        if lcp == len(s1_seq): # Is this correct?
             return ('p.(=)', 0, 0, 0)
         return ('p.(%s%i*)' % (seq3(s1[lcp]), lcp + 1), lcp, len(s1), lcp)
-    if lcp == len(s1) :
-        return ('p.(*%i%sext*%i)' % \
-                (len(s1) + 1, seq3(s2[len(s1)]), abs(len(s1) - len(s2))),
-                len(s1), len(s1), len(s2))
-    return ('p.(%s%i%sfs*%i)' % \
-            (seq3(s1[lcp]), lcp + 1, seq3(s2[lcp]), len(s2) - lcp + 1),
+    if lcp == len(s1_seq):
+        # http://www.hgvs.org/mutnomen/FAQ.html#nostop
+        stop = unicode(abs(len(s1_seq) - len(s2_seq))) if '*' in s2 else '?'
+
+        return ('p.(*%i%sext*%s)' % \
+                (len(s1_seq) + 1, seq3(s2[len(s1_seq)]), stop),
+                len(s1_seq), len(s1), len(s2))
+
+    # http://www.hgvs.org/mutnomen/FAQ.html#nostop
+    stop = unicode(len(s2_seq) - lcp + 1) if '*' in s2 else '?'
+
+    return ('p.(%s%i%sfs*%s)' % \
+            (seq3(s1[lcp]), lcp + 1, seq3(s2[lcp]), stop),
             lcp, len(s1), len(s2))
 #out_of_frame_description
 
 
-def protein_description(cds_stop, s1, s2) :
+def protein_description(cds_stop, s1, s2):
     """
     Wrapper function for the in_frame_description() and
     out_of_frame_description() functions. It uses the value cds_stop to
     decide which one to call.
 
-        >>> protein_description(34, 'MTAPQQMT', 'MTAQQMT')
-        ('p.(Pro4Glnfs*5)', 3, 8, 7)
-        >>> protein_description(33, 'MTAPQQMT', 'MTAQQMT')
+        >>> protein_description(34, 'MTAPQQMT*', 'MTAQQMT*')
+        ('p.(Pro4Glnfs*5)', 3, 9, 8)
+        >>> protein_description(34, 'MTAPQQMT*', 'MTAQQMT')
+        ('p.(Pro4Glnfs*?)', 3, 9, 7)
+        >>> protein_description(33, 'MTAPQQMT*', 'MTAQQMT*')
         ('p.(Pro4del)', 3, 4, 3)
-        >>> protein_description(33, 'MTAPQQMT', 'TTAQQMT')
+        >>> protein_description(33, 'MTAPQQMT*', 'TTAQQMT*')
         ('p.?', 0, 4, 3)
 
     @arg cds_stop: Position of the stop codon in c. notation (CDS length).
@@ -639,10 +664,14 @@ def _insert_tag(s, pos1, pos2, tag1, tag2):
         if 0 <= pos1 < block:
             # Insert tag1.
             output = output[:pos1] + tag1 + output[pos1:]
-        if 0 <= pos2 < block:
+        if 0 < pos2 < block:
             # Insert tag2.
             output = output[:-(block - pos2)] + tag2 \
                      + output[-(block - pos2):]
+        if pos2 == block:
+            # Insert tag2. Special case, since s[:-0] would yield the empty
+            # string.
+            output = output + tag2
 
     return output
 #_insert_tag
diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py
index 3f0ee422..e385c336 100644
--- a/mutalyzer/variantchecker.py
+++ b/mutalyzer/variantchecker.py
@@ -1349,22 +1349,33 @@ def _add_transcript_info(mutator, transcript, output):
             cds_original = cds_original.reverse_complement()
             cds_variant = cds_variant.reverse_complement()
 
-        if '*' in cds_original.translate(table=transcript.txTable)[:-1]:
+        protein_original = cds_original.translate(table=transcript.txTable)
+
+        if not protein_original.endswith('*'):
+            output.addMessage(__file__, 3, 'ESTOP',
+                              'No stop codon found.')
+            return
+
+        if '*' in protein_original[:-1]:
             output.addMessage(__file__, 3, 'ESTOP',
                               'In frame stop codon found.')
             return
 
-        protein_original = cds_original.translate(table=transcript.txTable,
-                                                  to_stop=True)
-        protein_variant = cds_variant.translate(table=transcript.txTable,
-                                                to_stop=True)
+        protein_variant = cds_variant.translate(table=transcript.txTable)
+
+        # Up to and including the first '*', or the entire string.
+        try:
+            stop = unicode(protein_variant).index('*')
+            protein_variant = protein_variant[:stop + 1]
+        except ValueError:
+            pass
 
         # Note: addOutput('origCDS', ...) was first before the possible
         #       reverse complement operation above.
         output.addOutput('origCDS', unicode(cds_original))
-        output.addOutput("newCDS", unicode(cds_variant[:(len(protein_variant) + 1) * 3]))
+        output.addOutput("newCDS", unicode(cds_variant[:len(protein_variant) * 3]))
 
-        output.addOutput('oldprotein', unicode(protein_original) + '*')
+        output.addOutput('oldprotein', unicode(protein_original))
 
         # Todo: Don't generate the fancy HTML protein views here, do this in
         # website.py.
@@ -1373,9 +1384,9 @@ def _add_transcript_info(mutator, transcript, output):
         if not protein_variant or unicode(protein_variant[0]) != 'M':
             # Todo: Protein differences are not color-coded,
             # use something like below in protein_description().
-            util.print_protein_html(unicode(protein_original) + '*', 0, 0,
+            util.print_protein_html(unicode(protein_original), 0, 0,
                                     output, 'oldProteinFancy')
-            util.print_protein_html(unicode(protein_original) + '*', 0, 0,
+            util.print_protein_html(unicode(protein_original), 0, 0,
                                     output, 'oldProteinFancyText', text=True)
             if unicode(cds_variant[0:3]) in \
                    CodonTable.unambiguous_dna_by_id[transcript.txTable].start_codons:
@@ -1386,10 +1397,10 @@ def _add_transcript_info(mutator, transcript, output):
                 output.addOutput('altStart', unicode(cds_variant[0:3]))
                 if unicode(protein_original[1:]) != unicode(protein_variant[1:]):
                     output.addOutput('altProtein',
-                                     'M' + unicode(protein_variant[1:]) + '*')
-                    util.print_protein_html('M' + unicode(protein_variant[1:]) + '*', 0,
+                                     'M' + unicode(protein_variant[1:]))
+                    util.print_protein_html('M' + unicode(protein_variant[1:]), 0,
                         0, output, 'altProteinFancy')
-                    util.print_protein_html('M' + unicode(protein_variant[1:]) + '*', 0,
+                    util.print_protein_html('M' + unicode(protein_variant[1:]), 0,
                         0, output, 'altProteinFancyText', text=True)
             else :
                 output.addOutput('newprotein', '?')
@@ -1405,18 +1416,15 @@ def _add_transcript_info(mutator, transcript, output):
                                             unicode(protein_original),
                                             unicode(protein_variant))
 
-            # This is never used.
-            output.addOutput('myProteinDescription', descr)
-
-            util.print_protein_html(unicode(protein_original) + '*', first,
+            util.print_protein_html(unicode(protein_original), first,
                 last_original, output, 'oldProteinFancy')
-            util.print_protein_html(unicode(protein_original) + '*', first,
+            util.print_protein_html(unicode(protein_original), first,
                 last_original, output, 'oldProteinFancyText', text=True)
             if unicode(protein_original) != unicode(protein_variant):
-                output.addOutput('newprotein', unicode(protein_variant) + '*')
-                util.print_protein_html(unicode(protein_variant) + '*', first,
+                output.addOutput('newprotein', unicode(protein_variant))
+                util.print_protein_html(unicode(protein_variant), first,
                     last_variant, output, 'newProteinFancy')
-                util.print_protein_html(unicode(protein_variant) + '*', first,
+                util.print_protein_html(unicode(protein_variant), first,
                     last_variant, output, 'newProteinFancyText', text=True)
 #_add_transcript_info
 
@@ -1808,8 +1816,7 @@ def check_variant(description, output):
             if not len(cds_original) % 3:
                 try:
                     # FIXME this is a bit of a rancid fix.
-                    protein_original = cds_original.translate(
-                        table=transcript.txTable, cds=True, to_stop=True)
+                    protein_original = cds_original.translate(table=transcript.txTable, cds=True)
                 except CodonTable.TranslationError:
                     if transcript.current:
                         output.addMessage(
@@ -1825,8 +1832,13 @@ def check_variant(description, output):
                             % (gene.name, transcript.name))
                     transcript.proteinDescription = 'p.?'
                 else:
-                    protein_variant = cds_variant.translate(
-                        table=transcript.txTable, to_stop=True)
+                    protein_variant = cds_variant.translate(table=transcript.txTable)
+                    # Up to and including the first '*', or the entire string.
+                    try:
+                        stop = unicode(protein_variant).index('*')
+                        protein_variant = protein_variant[:stop + 1]
+                    except ValueError:
+                        pass
                     try:
                         cds_length = util.cds_length(
                             mutator.shift_sites(transcript.CDS.positionList))
diff --git a/tests/data/NM_001199.3.gb.bz2 b/tests/data/NM_001199.3.gb.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..a7dd692a2d5eac5e71aa433be19ae21ecbc639d0
GIT binary patch
literal 5216
zcmV-m6rbxtT4*^jL0KkKStEvK5C9A$e}I4dAOKkB|N1}w|N6iG|L|ZJ{Oo)#@a|*L
zT;XFt1sRkLvqK;&eeeUlJ-PHyTQ#}^ITWDJE7@~Xss@f(PiFH5f&yp&5Scv_LNsW?
zQQ|X6Jx?UaHjGU}K-BaTKur@(NT;d#gHULCk5kGtVhsQQ1Jg;M8lKToO%p)G0iX>U
z0000002%-#JrYQXq|l89fN7vI4FCYqpa1}98cGI$03M(K00x?AkN^Mx03<{p2m}pN
zXlbf=qthc(Hll4o<pWIs01tTmiu&iMA7Rrq_rJ<hO^Ob;Gcax>z%+2Bh6I-8W?}Lt
zH<vrgnnY$YNfX|rv9(ohnSwUl)8ZxR*7|$A8yg!pXHS>4$?0m<_RhvvQPzSVNWfmR
ze451zLVpGqyUP7q$N}Bw?9{2Atwzq*xt+~>%-zM45F!m?5b*e!`#OF+K0nz06MV47
z{D2^XUqR%sY8!>xxPl&Tw1RSKhof7@(}st)amA-10Emc-a2q|J<3LNHzljZVXYcN!
zk%${qYOEBy_(gV`*%wDmP*0^OV8#tLk<`*l0ITe#`f{+67XwxK(Ax&s-<uYb);n$y
zLRhXc=eM_AHQ~bmMq(N+XWOz02xDSQ8xYr>qZ@f%vPPN$?Pzh(#wJzBiH0%kOu$71
z4JkHuekNV*W~1w9g?yi5>X>uOcs5Mpp>HoYjq`tQ+UbPiC4Ih+VD*uU!_Kt@g~k<_
zkHUp}Of<b5aI=IWX)HGQK-nF(wV&Kb1x4%_U!HTI@{p!wvP!YPwVcRoiLd?Bw)^y|
zdgTIXj~r#X*{E^OXU#GIkP+)3OJ&#CU@Oll#WlceuWInRA^wT^O2ClZ0V6UE(x}I@
z#{A9raGX(zp!A-3+hI9!_aX-pqCU;hT4Kd<m||5GWPOi$iJYpARY2+QOUq+h<??lY
zpFYmNSIN^+hSeN`rh1$mnhWqT5N4RD!LLp_8x<Maa{;n}Baxhu5e^Ys>D>>&bC|6G
zCZsHhq9JJ@Ya<6G$wF08cm-InGe!4(-WMCbL{e!|P@*CWGKFZJ=8aIrNZ<H7d(mMG
zjH0xhTpq<J8Bmdcj7W~iEQ>3EjHQ&!XVOir5?tBz6@t!71y&4*!Byea%gmr8%uYDe
zn*|vQeE1srDlJ+fWOhi&9)GXJD}me~63|`t7EpwTK>+qF3qR7;2I-Ni!T>e~NfzO3
zd8W0tKs72x^ND@EgZhK$Zgn1EgVbUuN+LFd0+DUAEp5T1MwLz7y)J@usF3mLO=Vs7
zXwJz_`k2Matk0|A9==dWIS+3iqx$=Pn%o$j<0|s}RRK`}ma%8n&0p~Zk?<qz_>yb%
zgs=l-Xkk(4<9ygfdO1e&+i;q2VIL>g(Yy#4Cw{K}UN(FCjPgGY`Pi(4SmT@Mzh7Ux
zaQl*{dyG#$bTTwW#Qp7<&#w)m#LYIQmG0c*7ZLKk5s)?k3FaML5LNp9MMKz~R@?bg
zJ|j+vvJA)fUl*pI7l)yy#_($gtS)u>V0me`cAaW{?cm_oYk7q|`(o!Qu+*6qYkvYI
zFbv3*=)f?AY5+JtckBMzv;r?_84;cHtqdIE$khkRKiC9D<;|kn-t>mJ2qflEpt(@M
zsx;gPb(kT8swGxWMfpBIp8v|5$T&91ZIqG=Q{WYcg;-Ub#NE*g%8wHu|8|nQA|wcL
zAj}Ze4FCli<h6Z*8<5fKAE(Z9_QFZ@I&lCAUUsF8$NQYS1L7U5p;!&O74~0yi(0Vg
z5t`wDcmiDY0=!POza|Xp2&AM$_+jLCSchFGXKCE7;;vv?UpL$x=ic%%6vvHt++<RG
zN-D(oE6GYQ8hLTX+=@?O=Du~lE44LJa5mX$-z2gmgF{rH%PXY%OgMa2^6D)$*e5uE
z#50>wwGQ^#HX(hr5*e=o<4*{jzeIS*32>8pJY%X9QNU5i@abdO&d$!2so!a-$Mx{H
zH5d42;`<qb0px1n0H6tWCC=MJ>mY;)y*s@6oSx<Jg%N%o-q@k9mFF5%QDG89%ZoY#
zfhjCvs}!IeC2|;5cQn>S*g#};NU9*7msgGM`%>({ETOPYu$wmRqUK~$?AL4O=AH8p
zKWJp7D%&_(Ct;9pog~GkB1jVGy|zZk!KHGw5Q0FkMID-oi4CPo1SUZwdV(n&l9{6K
zgy9<ibioh+HBUmh1$!>miCrd^n^Kh8+t!g(tqydPYh|X$iPX>v3Z~eN8|mthP(5)q
z*<di9dPtziTL1*~Kw8a2L!#T13}jn$D%C){x}SYkw@nd%WIIg<r}7N?Q}TY#qsQxY
zi@!<mBm>HNPw)6W4;zME&)YZ@&;tYFjnTe*U@ZVwfUVL3t+`|FpHl&Qn@j6ue#rqa
zFpK~y21uR*s{dU*YSbAQy*%FK?|#hP`}}_(v54I+GNcgFcLt!J^PXq$Ao^YNMii>1
zkU|x7H28G5gbbtW@%{9>_K1XiU@rmy_l<j*ey7}Z>K$M%ar5VOsa&ZSun;z{BlZ}?
zYlg=hb%g*H-wUg~pa5H~I9J=t$0o!`$YNngg8&6*8wMlobS#PAOq~odISb>M;>;N{
z=%Hipd*BkFV+m2N&NR`5nys2CxKdCADIyS2xF0ZM>i=S22Ld0RVU(TVf#wirhBL;>
zTn(=TS3xM2P*bq=PzW73J~J~Y#xSU5oYJySIvm%Gi*c9w5_ktYr;WUoW(YnFw;7oy
zqtS294pp_#b#Y+iV08CFLL%dRqkjqYM2frbylB&Hu~2w_2flpq9QV4%3czi?cqW3x
zT^kK|#rfS@#awLIet?D8!xz~c^yt&P#NADzB&JQ9Rm+P7E*gxz$1#B2IDiTxAy=G!
zG*f)QTNPeQu-vqAU>;11XHt~5ZH0!WZ3G<c%C$>y$DCVS^fjhh&Oi)zm<vKH&i$uB
z-F5Hp)(2&cyRhzY5ttF@OJ<syx~u|(ybxQa64U@GSO6!&R4@=Vy%`Di-wH*Xv3W}e
zT^pN<**#WV86ibVAlff5N73X`mZ&0yJnn*vhYWy1TL&m1$zIoJAGAD(0I9i=LY;0h
zEpCg#1{K5!hnNdRK!KW_yOdq5**m9@TBY!+dmT`oLY-ODBNljx#03cpRQPt=x2Q%u
z#8au9`Si(?Qh*=?&$bXQI1hb*LC>dVHsI@i{(Spwvf58)W|z>|Q0UVaQnS&n3y6i8
ze?;hv>IiNz`tCunO+mi;Ze?kOnC8xA<ve4}4Tz4t{ymUX@D^WncX`z-77iSzv=EM?
z0+2z|GO3}md$OlKY8_qf@c|?#Xm;QN7@+e&&*oQx8@N2<2u*-SZAUX*Acm{p_d-p}
zHZ6{EAl_u$ZLpFV0uJdt=GYi@FeR(%_2G@!owK;b7jjv`x50{$%-XZrY(;U?sA&48
zODcs7xq^$0&0_S~2TA-3n5MP#(Ql-WtjqumqrXD6gfrJ+u`e24=eHhELTg$GTJ!)C
zf)AMR$~vP-7Izv>aPaN6NgH2w;ygk-yBLm~#CX=*i}h>D$Yaf;FTL44^gC1`I!P&$
z4qCS`DZyoXZq|@k%mSNI-0NFjrDs1jE+iw%es}2a-#QQ9owSS3d>~9T>aZZez;XnW
zXiU)Ok;8RW2kCh0>3|Bqa7By~763CD*n#h*cc-E(NvZB`Zp|A3goK2N>mJL)hdkp?
z;{9b#`JUfyobh)Q%MkEnix(l*?^jFCMq4Zh3?)+8X%<wpix+Lw*A`!y-1Pn4{rkxH
zFshM~FNRn$qgL1v5riQ}KK;)W^KhjeQ>n9SEPM`gn<%m!Jr|U1ET00Ay0VtRMeDZu
zJ<i$NPSfGnzdU*+L#Hb;B*-Lj5<Nm?BEl;(TVXVpBriF)c)`f9utzLvG8H^BL{QjG
zDQ}gwNMht2YY?8U<f{n1(yLB?!`}6K`}t77sb)X%&3gdYz#H5E8H$O5l`tMY-eehX
zd0Ap?I<Llol5l6Ss1SB>B=vmP`(`lYyFVM>vWZwUWypyuK<)01!<#TQ=qFPHkg_p=
zqMfedaN}O;#Rl*~L+P$yqah=t)e%NUw^!IGc5#ml(*U!eHX}@!z=oofy+!hE7Bw~j
zRahb#4kkb443QdY3&~a}gkKWW46`4KdL*`@PxT1&{rkE#Ui*k=h$@$%IqTVnqcaao
z^oVEX_*=dLazwRN8sjYyvf`FDR2F+%yQ4XVGd3psQ+<wPypV%!jmX4Ej2bNp*cc*`
zP=#U|>gLI|-AQbjS(_|{E;Ai!G;Az`Y+7uk!6b*NoIZnxTsrDn%{{5kZ#+)v&EknC
zL4=()%O$0VlG$kx@IuR2C{)lCtgwnYR;0t2z&g}4a!D+ulLfI#iEL~e3dy+cYL%pv
z)nqE&cXieiO_r%mhUV?f)zetrCW$WHK0mGoZ*1t74x`Y-hiF1pS~5J=>R4H6BFH64
zuC^g8B3+c(lOj!Mo3lvkBaMnhSkM)^MOBJeEXI8L3rojWXNRdcMaF`6MM*d)f_);m
z(_yj1Et3H<OLAd39~Yf;$CYr66~^U~uZ@L@O^(fJ3e??8HkWm_uxjZA(KU2oHXx;i
zimMFmVl{Gu)p7+Bl4c-)vwQezUjA$kCk?@Do~4ABB{R`x=#*pFC3Gu>QvSLaxiEMS
zeJQq_?G=D!-DW-2ZOTCH*+K6$ohPSI&z1(&DkvGnT_xDeO!DkZk>|CY*j`Q;(DgF{
z{y18ID!{-hT7pZ-lv5nxuUx^6H!HEBNKEH#`9W@&0QyttVR{3HtEbJ>Y{L8e=F`!w
z%RZa?39n}jEY3tAH;cc+5dg%}6KO1~Cj1Ue$gl?6qfV(@)cHY{Id7M{ohGK&tx^mk
zzlQt3sY%&Oa<8tI?T6ETqAE~=Ds44jM(UO`-geeXAy)3c7nlQ>vbgOIh5+{UpgXdm
zHb6zZw7Fe^DiHzLkZ{O%%o51N^^o>@-Z5fzUTLnAO4#mF6n6+4V^>l>EJ`qvXHqDZ
zcfB`eklGs)kZftfAe1es+!7JMxEi1_WadSRERe#GN~rC1g|gb)R=JhOImEn-3i-yj
zlCoP$M$)5M8rWHCannRxU9~Mu(PYgdcN~<J&E15RklQNK)OKApZFZWQy5%C3gpw-?
z7Y$5X6(bpH8gmJX%-Onwqioh>l{QVu={`xG$ObtD4D)ngbaqbGcbZX>Ld$Ha*tW9W
zq?JcoZm?sD$gE_dU=_zLb)q&qVMLoQ#~qNkiq~BhCIsWSx^hTNl3cNpvao2aPZq7y
zs|l+jU4$v+<eoiv`c3cF0CIPH21mjziDtW|!6?nyxkq<KEmvBt@nN${%U%!}BSwzm
zJGrv<^S(LQ>^&?a<hVFtL4549LEVlL7<(r(94yA$`DZ^MEicZc_qqc;_BtQR%s88$
zYhznHUNTIjdSarAXi$cTG>e7t5yX$c;RG?nqthyY(aP}bcc1`t7yy0Raz=|T{xKQo
zMF=XGFkr|$A3~F79OMAD0^9DSPd9q$A%DZf;4cOb8b}oN8-;$(%(7VwfT0o+O_!ng
zJa-#-(>8?oJ5MbQZv~BJL8gtYtH^2{TU^hVJ>z<dSuBA;1dwDEf*W}H6=oPj0y;GC
z_lJyL^(Nr3LdIYLfNk|*D7mIyXw>R!IIY<`?hk1<ncZN=3(f%$=}f-h7QZH3ZF%Jj
z%J)8v7<s-|k5pQMo9}?cV1^MLfCIrpn;@M6AQnjsPC1_15Ink#^1epE2xG#7^(9*S
z6y@1=8Um^<#YhY107$QvNZmf6N~?fWM8YqLNF|2o=$c_fLx!7a5eXv{hzHy*0qMNr
zS`c4W9`0Q2SrT}ja0#WIO{PF7gL&vKrekjB19FrNd>jBvp%F18OK)?a3m-5k%63rV
z@aac?553ykt;-?PPJ~K2@c@Kg18Cd<O%-%|P`qq%R^y0iCp!f**~?boe8SeDYk%Mj
z!Lb`}AxklSkY2NHc^SCA`>LD(UW0t^?<&lW!h&zLpsv6Iw$%_AWY1T%!U502dED0%
ztHKEoZZ?s%@XG19di0$7Ps$mdV<S_SdEX{rsbK2i9b3#-a0NYgMoUkN$IMu2B<&gO
zVpgAhKHf;VIl0JnwMUzK_()8UHZ;c0=O)HF_c*EF+;}$4K1&#6n8qvWYsGmE<Dg2F
zZ9Sq7JDGd$K@`Y)hnT!0K|ij+v8e+9&G9`ugMsX+aWEjD{%iqNx2zD`GA1OCYr1+q
zCY+EFF!UU+Jznnge6Ibh-Ww)Ogt0)&tpr*XYck}X>f<H+QDHz}+~Qa2lPd_a>zX^D
z@;Zi8GrWEI&K@zxeg@hG3n!AC@hY}igJFqcDdjY&V!NSRF`~dM*7d-_#Bmvwe3)sO
z(Pb(ku8J@sj>Kj@Hh^HwLIfpIN=Vj<7iPmENC3lzPbDi1;8cy~8A2q<Aqd9XrZ7e%
zmTL0WaxxvXAAi+$+blA{%tLeN+>e8&HJa?^@Zj?on6%mqVXbbq404HMW}~Z0qgz_#
zV<aRbP@zXOhcqciNEydFEHw?X5P;US=r)$<SkD3Lo^K~o3iT`JHYB9o#<I&j9Gp#z
zi|CiGQt}%%Riim*4<KsTCnQ>7M{1Si5Xb<^LKvbk7&OG(ku;KM$)QgTyBTuwG%dw8
zN^NLtLgcU8uB5RL;j5TQX-Re8%2yb==_nm`FuE~oDsM5~L-R(Kt4f0Sur}P;>Drud
z*RAU}cv!<zH8fNRpvhFwu2kM=_v=CD>YMQN^NNN1wivrRgMQd567YdatKM74M>!7J
z0e@ZdQQCr*YUh3pL-w}`ws7;ouXC$Y7#?cWtHKFt$`BIK;}U%wvy*C_``{(vXt)K%
a0?(CiI#tKAY>W+m_`8xR!i0ewGcbVX_s|sp

literal 0
HcmV?d00001

diff --git a/tests/fixtures.py b/tests/fixtures.py
index 652d2f86..f579bcd9 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -51,6 +51,8 @@ REFERENCES = {
     'NM_002001.2':  {'filename':   'NM_002001.2.gb.bz2',
                      'checksum':   '7fd5aa4fe864fd5193f224fca8cea70d',
                      'geninfo_id': '31317229'},
+    'NM_001199.3':  {'filename':   'NM_001199.3.gb.bz2',
+                     'checksum':   'e750b6dcead66b8bb953ce445bcd3093'},
     'NG_008939.1':  {'filename':   'NG_008939.1.gb.bz2',
                      'checksum':   '114a03e16ad2f63531d796c2fb0d7039',
                      'geninfo_id': '211938431',
diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py
index 735ff27b..8f0eea3c 100644
--- a/tests/test_variantchecker.py
+++ b/tests/test_variantchecker.py
@@ -1316,3 +1316,36 @@ class TestVariantchecker(MutalyzerTest):
         check_variant('NM_003002.2:c.1del', self.output)
         w_exon_annotation = self.output.getMessagesWithErrorCode('WEXON_ANNOTATION')
         assert len(w_exon_annotation) == 0
+
+    @fix(cache('NM_001199.3'))
+    def test_fs_no_stop(self):
+        """
+        Frame shift yielding no stop codon should be described with
+        uncertainty of the stop codon.
+
+        http://www.hgvs.org/mutnomen/FAQ.html#nostop
+        """
+        check_variant('NM_001199.3(BMP1):c.2188dup', self.output)
+        assert 'NM_001199.3(BMP1_i001):p.(Gln730Profs*?)' in self.output.getOutput('protDescriptions')
+
+    @fix(cache('NM_000193.2'))
+    def test_ext_no_stop(self):
+        """
+        Extension yielding no stop codon should be described with
+        uncertainty of the stop codon.
+
+        http://www.hgvs.org/mutnomen/FAQ.html#nostop
+        """
+        check_variant('NM_000193.2:c.1388G>C', self.output)
+        assert 'NM_000193.2(SHH_i001):p.(*463Serext*?)' in self.output.getOutput('protDescriptions')
+
+    @fix(cache('NM_000193.2'))
+    def test_fs_ext_no_stop(self):
+        """
+        Extension yielding no stop codon should be described with
+        uncertainty of the stop codon.
+
+        http://www.hgvs.org/mutnomen/FAQ.html#nostop
+        """
+        check_variant('NM_000193.2:c.1388_1389insC', self.output)
+        assert 'NM_000193.2(SHH_i001):p.(*463Cysext*?)' in self.output.getOutput('protDescriptions')
-- 
GitLab