From d2f9169094e8283f2622ba438a1af2859b164b13 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Tue, 14 Jul 2015 17:53:57 +0200 Subject: [PATCH] Uncertain stop codon in protein descriptions (fs and ext) When a variant results in a frame shift or extension and we don't see a new stop codon in the RNA, the protein description should use the notation for an uncertain stop codon, e.g., `p.(Gln730Profs*?)` instead of `p.(Gln730Profs*96)` where 96 is just the last codon in our transcript [1]. To detect this, we now use `to_stop=False` in our `.translate()` calls, since that will explicitely return `*` characters for stop codons. We also slightly fix the coloring of changes in the protein sequence where previously changed stop codon characters where not included. [1] http://www.hgvs.org/mutnomen/FAQ.html#nostop --- mutalyzer/util.py | 87 ++++++++++++++++++++++------------ mutalyzer/variantchecker.py | 60 +++++++++++++---------- tests/data/NM_001199.3.gb.bz2 | Bin 0 -> 5216 bytes tests/fixtures.py | 2 + tests/test_variantchecker.py | 33 +++++++++++++ 5 files changed, 129 insertions(+), 53 deletions(-) create mode 100644 tests/data/NM_001199.3.gb.bz2 diff --git a/mutalyzer/util.py b/mutalyzer/util.py index ae709477..93d6de8f 100644 --- a/mutalyzer/util.py +++ b/mutalyzer/util.py @@ -407,18 +407,22 @@ def read_dna(handle): return ''.join(x for x in unicode(handle.read()).upper() if x in 'ATCG') -def in_frame_description(s1, s2) : +def in_frame_description(s1, s2): """ Give a description of an inframe difference of two proteins. Also give the position at which the proteins start to differ and the positions at which they are the same again. - >>> in_frame_description('MTAPQQMT', 'MTAQQMT') + >>> in_frame_description('MTAPQQMT*', 'MTAQQMT*') ('p.(Pro4del)', 3, 4, 3) - >>> in_frame_description('MTAPQQMT', 'MTAQMT') + >>> in_frame_description('MTAPQQMT*', 'MTAQMT*') ('p.(Pro4_Gln5del)', 3, 5, 3) - >>> in_frame_description('MTAPQQT', 'MTAQQMT') + >>> in_frame_description('MTAPQQT*', 'MTAQQMT*') ('p.(Pro4_Gln6delinsGlnGlnMet)', 3, 6, 6) + >>> in_frame_description('MTAPQQMT*', 'MTAPQQMTMQ*') + ('p.(*9Metext*2)', 8, 9, 11) + >>> in_frame_description('MTAPQQMT*', 'MTAPQQMTMQ') + ('p.(*9Metext*?)', 8, 8, 10) @arg s1: The original protein. @type s1: unicode @@ -439,6 +443,10 @@ def in_frame_description(s1, s2) : # Nothing happened. return ('p.(=)', 0, 0, 0) + s2_stop = '*' in s2 + s1 = s1.rstrip('*') + s2 = s2.rstrip('*') + lcp = len(longest_common_prefix(s1, s2)) lcs = len(longest_common_suffix(s1[lcp:], s2[lcp:])) s1_end = len(s1) - lcs @@ -447,9 +455,13 @@ def in_frame_description(s1, s2) : # Insertion / Duplication / Extention. if not s1_end - lcp: if len(s1) == lcp: - return ('p.(*%i%sext*%i)' % \ - (len(s1) + 1, seq3(s2[len(s1)]), abs(len(s1) - len(s2))), - len(s1), len(s1), len(s2)) + # http://www.hgvs.org/mutnomen/FAQ.html#nostop + stop = unicode(abs(len(s1) - len(s2))) if s2_stop else '?' + + return ('p.(*%i%sext*%s)' % \ + (len(s1) + 1, seq3(s2[len(s1)]), stop), + len(s1), len(s1) + 1, len(s2) + (1 if s2_stop else 0)) + ins_length = s2_end - lcp if lcp - ins_length >= 0 and s1[lcp - ins_length:lcp] == s2[lcp:s2_end]: @@ -472,7 +484,7 @@ def in_frame_description(s1, s2) : if not s2_end - lcp: if len(s2) == lcp: return ('p.(%s%i*)' % (seq3(s1[len(s2)]), len(s2) + 1), - 0, 0, 0) + lcp, len(s1) + 1, len(s2) + 1) if lcp + 1 == s1_end: return ('p.(%s%idel)' % (seq3(s1[lcp]), lcp + 1), @@ -506,12 +518,14 @@ def out_of_frame_description(s1, s2): Also give the position at which the proteins start to differ and the end positions (to be compatible with the in_frame_description function). - >>> out_of_frame_description('MTAPQQMT', 'MTAQQMT') - ('p.(Pro4Glnfs*5)', 3, 8, 7) - >>> out_of_frame_description('MTAPQQMT', 'MTAQMT') - ('p.(Pro4Glnfs*4)', 3, 8, 6) - >>> out_of_frame_description('MTAPQQT', 'MTAQQMT') - ('p.(Pro4Glnfs*5)', 3, 7, 7) + >>> out_of_frame_description('MTAPQQMT*', 'MTAQQMT*') + ('p.(Pro4Glnfs*5)', 3, 9, 8) + >>> out_of_frame_description('MTAPQQMT*', 'MTAQMT*') + ('p.(Pro4Glnfs*4)', 3, 9, 7) + >>> out_of_frame_description('MTAPQQT*', 'MTAQQMT*') + ('p.(Pro4Glnfs*5)', 3, 8, 8) + >>> out_of_frame_description('MTAPQQT*', 'MTAQQMT') + ('p.(Pro4Glnfs*?)', 3, 8, 7) @arg s1: The original protein. @type s1: unicode @@ -527,33 +541,44 @@ def out_of_frame_description(s1, s2): @todo: More intelligently handle longest_common_prefix(). """ - lcp = len(longest_common_prefix(s1, s2)) + s1_seq = s1.rstrip('*') + s2_seq = s2.rstrip('*') + lcp = len(longest_common_prefix(s1_seq, s2_seq)) - if lcp == len(s2): # NonSense mutation. - if lcp == len(s1): # Is this correct? + if lcp == len(s2_seq): # NonSense mutation. + if lcp == len(s1_seq): # Is this correct? return ('p.(=)', 0, 0, 0) return ('p.(%s%i*)' % (seq3(s1[lcp]), lcp + 1), lcp, len(s1), lcp) - if lcp == len(s1) : - return ('p.(*%i%sext*%i)' % \ - (len(s1) + 1, seq3(s2[len(s1)]), abs(len(s1) - len(s2))), - len(s1), len(s1), len(s2)) - return ('p.(%s%i%sfs*%i)' % \ - (seq3(s1[lcp]), lcp + 1, seq3(s2[lcp]), len(s2) - lcp + 1), + if lcp == len(s1_seq): + # http://www.hgvs.org/mutnomen/FAQ.html#nostop + stop = unicode(abs(len(s1_seq) - len(s2_seq))) if '*' in s2 else '?' + + return ('p.(*%i%sext*%s)' % \ + (len(s1_seq) + 1, seq3(s2[len(s1_seq)]), stop), + len(s1_seq), len(s1), len(s2)) + + # http://www.hgvs.org/mutnomen/FAQ.html#nostop + stop = unicode(len(s2_seq) - lcp + 1) if '*' in s2 else '?' + + return ('p.(%s%i%sfs*%s)' % \ + (seq3(s1[lcp]), lcp + 1, seq3(s2[lcp]), stop), lcp, len(s1), len(s2)) #out_of_frame_description -def protein_description(cds_stop, s1, s2) : +def protein_description(cds_stop, s1, s2): """ Wrapper function for the in_frame_description() and out_of_frame_description() functions. It uses the value cds_stop to decide which one to call. - >>> protein_description(34, 'MTAPQQMT', 'MTAQQMT') - ('p.(Pro4Glnfs*5)', 3, 8, 7) - >>> protein_description(33, 'MTAPQQMT', 'MTAQQMT') + >>> protein_description(34, 'MTAPQQMT*', 'MTAQQMT*') + ('p.(Pro4Glnfs*5)', 3, 9, 8) + >>> protein_description(34, 'MTAPQQMT*', 'MTAQQMT') + ('p.(Pro4Glnfs*?)', 3, 9, 7) + >>> protein_description(33, 'MTAPQQMT*', 'MTAQQMT*') ('p.(Pro4del)', 3, 4, 3) - >>> protein_description(33, 'MTAPQQMT', 'TTAQQMT') + >>> protein_description(33, 'MTAPQQMT*', 'TTAQQMT*') ('p.?', 0, 4, 3) @arg cds_stop: Position of the stop codon in c. notation (CDS length). @@ -639,10 +664,14 @@ def _insert_tag(s, pos1, pos2, tag1, tag2): if 0 <= pos1 < block: # Insert tag1. output = output[:pos1] + tag1 + output[pos1:] - if 0 <= pos2 < block: + if 0 < pos2 < block: # Insert tag2. output = output[:-(block - pos2)] + tag2 \ + output[-(block - pos2):] + if pos2 == block: + # Insert tag2. Special case, since s[:-0] would yield the empty + # string. + output = output + tag2 return output #_insert_tag diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py index 3f0ee422..e385c336 100644 --- a/mutalyzer/variantchecker.py +++ b/mutalyzer/variantchecker.py @@ -1349,22 +1349,33 @@ def _add_transcript_info(mutator, transcript, output): cds_original = cds_original.reverse_complement() cds_variant = cds_variant.reverse_complement() - if '*' in cds_original.translate(table=transcript.txTable)[:-1]: + protein_original = cds_original.translate(table=transcript.txTable) + + if not protein_original.endswith('*'): + output.addMessage(__file__, 3, 'ESTOP', + 'No stop codon found.') + return + + if '*' in protein_original[:-1]: output.addMessage(__file__, 3, 'ESTOP', 'In frame stop codon found.') return - protein_original = cds_original.translate(table=transcript.txTable, - to_stop=True) - protein_variant = cds_variant.translate(table=transcript.txTable, - to_stop=True) + protein_variant = cds_variant.translate(table=transcript.txTable) + + # Up to and including the first '*', or the entire string. + try: + stop = unicode(protein_variant).index('*') + protein_variant = protein_variant[:stop + 1] + except ValueError: + pass # Note: addOutput('origCDS', ...) was first before the possible # reverse complement operation above. output.addOutput('origCDS', unicode(cds_original)) - output.addOutput("newCDS", unicode(cds_variant[:(len(protein_variant) + 1) * 3])) + output.addOutput("newCDS", unicode(cds_variant[:len(protein_variant) * 3])) - output.addOutput('oldprotein', unicode(protein_original) + '*') + output.addOutput('oldprotein', unicode(protein_original)) # Todo: Don't generate the fancy HTML protein views here, do this in # website.py. @@ -1373,9 +1384,9 @@ def _add_transcript_info(mutator, transcript, output): if not protein_variant or unicode(protein_variant[0]) != 'M': # Todo: Protein differences are not color-coded, # use something like below in protein_description(). - util.print_protein_html(unicode(protein_original) + '*', 0, 0, + util.print_protein_html(unicode(protein_original), 0, 0, output, 'oldProteinFancy') - util.print_protein_html(unicode(protein_original) + '*', 0, 0, + util.print_protein_html(unicode(protein_original), 0, 0, output, 'oldProteinFancyText', text=True) if unicode(cds_variant[0:3]) in \ CodonTable.unambiguous_dna_by_id[transcript.txTable].start_codons: @@ -1386,10 +1397,10 @@ def _add_transcript_info(mutator, transcript, output): output.addOutput('altStart', unicode(cds_variant[0:3])) if unicode(protein_original[1:]) != unicode(protein_variant[1:]): output.addOutput('altProtein', - 'M' + unicode(protein_variant[1:]) + '*') - util.print_protein_html('M' + unicode(protein_variant[1:]) + '*', 0, + 'M' + unicode(protein_variant[1:])) + util.print_protein_html('M' + unicode(protein_variant[1:]), 0, 0, output, 'altProteinFancy') - util.print_protein_html('M' + unicode(protein_variant[1:]) + '*', 0, + util.print_protein_html('M' + unicode(protein_variant[1:]), 0, 0, output, 'altProteinFancyText', text=True) else : output.addOutput('newprotein', '?') @@ -1405,18 +1416,15 @@ def _add_transcript_info(mutator, transcript, output): unicode(protein_original), unicode(protein_variant)) - # This is never used. - output.addOutput('myProteinDescription', descr) - - util.print_protein_html(unicode(protein_original) + '*', first, + util.print_protein_html(unicode(protein_original), first, last_original, output, 'oldProteinFancy') - util.print_protein_html(unicode(protein_original) + '*', first, + util.print_protein_html(unicode(protein_original), first, last_original, output, 'oldProteinFancyText', text=True) if unicode(protein_original) != unicode(protein_variant): - output.addOutput('newprotein', unicode(protein_variant) + '*') - util.print_protein_html(unicode(protein_variant) + '*', first, + output.addOutput('newprotein', unicode(protein_variant)) + util.print_protein_html(unicode(protein_variant), first, last_variant, output, 'newProteinFancy') - util.print_protein_html(unicode(protein_variant) + '*', first, + util.print_protein_html(unicode(protein_variant), first, last_variant, output, 'newProteinFancyText', text=True) #_add_transcript_info @@ -1808,8 +1816,7 @@ def check_variant(description, output): if not len(cds_original) % 3: try: # FIXME this is a bit of a rancid fix. - protein_original = cds_original.translate( - table=transcript.txTable, cds=True, to_stop=True) + protein_original = cds_original.translate(table=transcript.txTable, cds=True) except CodonTable.TranslationError: if transcript.current: output.addMessage( @@ -1825,8 +1832,13 @@ def check_variant(description, output): % (gene.name, transcript.name)) transcript.proteinDescription = 'p.?' else: - protein_variant = cds_variant.translate( - table=transcript.txTable, to_stop=True) + protein_variant = cds_variant.translate(table=transcript.txTable) + # Up to and including the first '*', or the entire string. + try: + stop = unicode(protein_variant).index('*') + protein_variant = protein_variant[:stop + 1] + except ValueError: + pass try: cds_length = util.cds_length( mutator.shift_sites(transcript.CDS.positionList)) diff --git a/tests/data/NM_001199.3.gb.bz2 b/tests/data/NM_001199.3.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a7dd692a2d5eac5e71aa433be19ae21ecbc639d0 GIT binary patch literal 5216 zcmV-m6rbxtT4*^jL0KkKStEvK5C9A$e}I4dAOKkB|N1}w|N6iG|L|ZJ{Oo)#@a|*L zT;XFt1sRkLvqK;&eeeUlJ-PHyTQ#}^ITWDJE7@~Xss@f(PiFH5f&yp&5Scv_LNsW? zQQ|X6Jx?UaHjGU}K-BaTKur@(NT;d#gHULCk5kGtVhsQQ1Jg;M8lKToO%p)G0iX>U z0000002%-#JrYQXq|l89fN7vI4FCYqpa1}98cGI$03M(K00x?AkN^Mx03<{p2m}pN zXlbf=qthc(Hll4o<pWIs01tTmiu&iMA7Rrq_rJ<hO^Ob;Gcax>z%+2Bh6I-8W?}Lt zH<vrgnnY$YNfX|rv9(ohnSwUl)8ZxR*7|$A8yg!pXHS>4$?0m<_RhvvQPzSVNWfmR ze451zLVpGqyUP7q$N}Bw?9{2Atwzq*xt+~>%-zM45F!m?5b*e!`#OF+K0nz06MV47 z{D2^XUqR%sY8!>xxPl&Tw1RSKhof7@(}st)amA-10Emc-a2q|J<3LNHzljZVXYcN! zk%${qYOEBy_(gV`*%wDmP*0^OV8#tLk<`*l0ITe#`f{+67XwxK(Ax&s-<uYb);n$y zLRhXc=eM_AHQ~bmMq(N+XWOz02xDSQ8xYr>qZ@f%vPPN$?Pzh(#wJzBiH0%kOu$71 z4JkHuekNV*W~1w9g?yi5>X>uOcs5Mpp>HoYjq`tQ+UbPiC4Ih+VD*uU!_Kt@g~k<_ zkHUp}Of<b5aI=IWX)HGQK-nF(wV&Kb1x4%_U!HTI@{p!wvP!YPwVcRoiLd?Bw)^y| zdgTIXj~r#X*{E^OXU#GIkP+)3OJ&#CU@Oll#WlceuWInRA^wT^O2ClZ0V6UE(x}I@ z#{A9raGX(zp!A-3+hI9!_aX-pqCU;hT4Kd<m||5GWPOi$iJYpARY2+QOUq+h<??lY zpFYmNSIN^+hSeN`rh1$mnhWqT5N4RD!LLp_8x<Maa{;n}Baxhu5e^Ys>D>>&bC|6G zCZsHhq9JJ@Ya<6G$wF08cm-InGe!4(-WMCbL{e!|P@*CWGKFZJ=8aIrNZ<H7d(mMG zjH0xhTpq<J8Bmdcj7W~iEQ>3EjHQ&!XVOir5?tBz6@t!71y&4*!Byea%gmr8%uYDe zn*|vQeE1srDlJ+fWOhi&9)GXJD}me~63|`t7EpwTK>+qF3qR7;2I-Ni!T>e~NfzO3 zd8W0tKs72x^ND@EgZhK$Zgn1EgVbUuN+LFd0+DUAEp5T1MwLz7y)J@usF3mLO=Vs7 zXwJz_`k2Matk0|A9==dWIS+3iqx$=Pn%o$j<0|s}RRK`}ma%8n&0p~Zk?<qz_>yb% zgs=l-Xkk(4<9ygfdO1e&+i;q2VIL>g(Yy#4Cw{K}UN(FCjPgGY`Pi(4SmT@Mzh7Ux zaQl*{dyG#$bTTwW#Qp7<&#w)m#LYIQmG0c*7ZLKk5s)?k3FaML5LNp9MMKz~R@?bg zJ|j+vvJA)fUl*pI7l)yy#_($gtS)u>V0me`cAaW{?cm_oYk7q|`(o!Qu+*6qYkvYI zFbv3*=)f?AY5+JtckBMzv;r?_84;cHtqdIE$khkRKiC9D<;|kn-t>mJ2qflEpt(@M zsx;gPb(kT8swGxWMfpBIp8v|5$T&91ZIqG=Q{WYcg;-Ub#NE*g%8wHu|8|nQA|wcL zAj}Ze4FCli<h6Z*8<5fKAE(Z9_QFZ@I&lCAUUsF8$NQYS1L7U5p;!&O74~0yi(0Vg z5t`wDcmiDY0=!POza|Xp2&AM$_+jLCSchFGXKCE7;;vv?UpL$x=ic%%6vvHt++<RG zN-D(oE6GYQ8hLTX+=@?O=Du~lE44LJa5mX$-z2gmgF{rH%PXY%OgMa2^6D)$*e5uE z#50>wwGQ^#HX(hr5*e=o<4*{jzeIS*32>8pJY%X9QNU5i@abdO&d$!2so!a-$Mx{H zH5d42;`<qb0px1n0H6tWCC=MJ>mY;)y*s@6oSx<Jg%N%o-q@k9mFF5%QDG89%ZoY# zfhjCvs}!IeC2|;5cQn>S*g#};NU9*7msgGM`%>({ETOPYu$wmRqUK~$?AL4O=AH8p zKWJp7D%&_(Ct;9pog~GkB1jVGy|zZk!KHGw5Q0FkMID-oi4CPo1SUZwdV(n&l9{6K zgy9<ibioh+HBUmh1$!>miCrd^n^Kh8+t!g(tqydPYh|X$iPX>v3Z~eN8|mthP(5)q z*<di9dPtziTL1*~Kw8a2L!#T13}jn$D%C){x}SYkw@nd%WIIg<r}7N?Q}TY#qsQxY zi@!<mBm>HNPw)6W4;zME&)YZ@&;tYFjnTe*U@ZVwfUVL3t+`|FpHl&Qn@j6ue#rqa zFpK~y21uR*s{dU*YSbAQy*%FK?|#hP`}}_(v54I+GNcgFcLt!J^PXq$Ao^YNMii>1 zkU|x7H28G5gbbtW@%{9>_K1XiU@rmy_l<j*ey7}Z>K$M%ar5VOsa&ZSun;z{BlZ}? zYlg=hb%g*H-wUg~pa5H~I9J=t$0o!`$YNngg8&6*8wMlobS#PAOq~odISb>M;>;N{ z=%Hipd*BkFV+m2N&NR`5nys2CxKdCADIyS2xF0ZM>i=S22Ld0RVU(TVf#wirhBL;> zTn(=TS3xM2P*bq=PzW73J~J~Y#xSU5oYJySIvm%Gi*c9w5_ktYr;WUoW(YnFw;7oy zqtS294pp_#b#Y+iV08CFLL%dRqkjqYM2frbylB&Hu~2w_2flpq9QV4%3czi?cqW3x zT^kK|#rfS@#awLIet?D8!xz~c^yt&P#NADzB&JQ9Rm+P7E*gxz$1#B2IDiTxAy=G! zG*f)QTNPeQu-vqAU>;11XHt~5ZH0!WZ3G<c%C$>y$DCVS^fjhh&Oi)zm<vKH&i$uB z-F5Hp)(2&cyRhzY5ttF@OJ<syx~u|(ybxQa64U@GSO6!&R4@=Vy%`Di-wH*Xv3W}e zT^pN<**#WV86ibVAlff5N73X`mZ&0yJnn*vhYWy1TL&m1$zIoJAGAD(0I9i=LY;0h zEpCg#1{K5!hnNdRK!KW_yOdq5**m9@TBY!+dmT`oLY-ODBNljx#03cpRQPt=x2Q%u z#8au9`Si(?Qh*=?&$bXQI1hb*LC>dVHsI@i{(Spwvf58)W|z>|Q0UVaQnS&n3y6i8 ze?;hv>IiNz`tCunO+mi;Ze?kOnC8xA<ve4}4Tz4t{ymUX@D^WncX`z-77iSzv=EM? z0+2z|GO3}md$OlKY8_qf@c|?#Xm;QN7@+e&&*oQx8@N2<2u*-SZAUX*Acm{p_d-p} zHZ6{EAl_u$ZLpFV0uJdt=GYi@FeR(%_2G@!owK;b7jjv`x50{$%-XZrY(;U?sA&48 zODcs7xq^$0&0_S~2TA-3n5MP#(Ql-WtjqumqrXD6gfrJ+u`e24=eHhELTg$GTJ!)C zf)AMR$~vP-7Izv>aPaN6NgH2w;ygk-yBLm~#CX=*i}h>D$Yaf;FTL44^gC1`I!P&$ z4qCS`DZyoXZq|@k%mSNI-0NFjrDs1jE+iw%es}2a-#QQ9owSS3d>~9T>aZZez;XnW zXiU)Ok;8RW2kCh0>3|Bqa7By~763CD*n#h*cc-E(NvZB`Zp|A3goK2N>mJL)hdkp? z;{9b#`JUfyobh)Q%MkEnix(l*?^jFCMq4Zh3?)+8X%<wpix+Lw*A`!y-1Pn4{rkxH zFshM~FNRn$qgL1v5riQ}KK;)W^KhjeQ>n9SEPM`gn<%m!Jr|U1ET00Ay0VtRMeDZu zJ<i$NPSfGnzdU*+L#Hb;B*-Lj5<Nm?BEl;(TVXVpBriF)c)`f9utzLvG8H^BL{QjG zDQ}gwNMht2YY?8U<f{n1(yLB?!`}6K`}t77sb)X%&3gdYz#H5E8H$O5l`tMY-eehX zd0Ap?I<Llol5l6Ss1SB>B=vmP`(`lYyFVM>vWZwUWypyuK<)01!<#TQ=qFPHkg_p= zqMfedaN}O;#Rl*~L+P$yqah=t)e%NUw^!IGc5#ml(*U!eHX}@!z=oofy+!hE7Bw~j zRahb#4kkb443QdY3&~a}gkKWW46`4KdL*`@PxT1&{rkE#Ui*k=h$@$%IqTVnqcaao z^oVEX_*=dLazwRN8sjYyvf`FDR2F+%yQ4XVGd3psQ+<wPypV%!jmX4Ej2bNp*cc*` zP=#U|>gLI|-AQbjS(_|{E;Ai!G;Az`Y+7uk!6b*NoIZnxTsrDn%{{5kZ#+)v&EknC zL4=()%O$0VlG$kx@IuR2C{)lCtgwnYR;0t2z&g}4a!D+ulLfI#iEL~e3dy+cYL%pv z)nqE&cXieiO_r%mhUV?f)zetrCW$WHK0mGoZ*1t74x`Y-hiF1pS~5J=>R4H6BFH64 zuC^g8B3+c(lOj!Mo3lvkBaMnhSkM)^MOBJeEXI8L3rojWXNRdcMaF`6MM*d)f_);m z(_yj1Et3H<OLAd39~Yf;$CYr66~^U~uZ@L@O^(fJ3e??8HkWm_uxjZA(KU2oHXx;i zimMFmVl{Gu)p7+Bl4c-)vwQezUjA$kCk?@Do~4ABB{R`x=#*pFC3Gu>QvSLaxiEMS zeJQq_?G=D!-DW-2ZOTCH*+K6$ohPSI&z1(&DkvGnT_xDeO!DkZk>|CY*j`Q;(DgF{ z{y18ID!{-hT7pZ-lv5nxuUx^6H!HEBNKEH#`9W@&0QyttVR{3HtEbJ>Y{L8e=F`!w z%RZa?39n}jEY3tAH;cc+5dg%}6KO1~Cj1Ue$gl?6qfV(@)cHY{Id7M{ohGK&tx^mk zzlQt3sY%&Oa<8tI?T6ETqAE~=Ds44jM(UO`-geeXAy)3c7nlQ>vbgOIh5+{UpgXdm zHb6zZw7Fe^DiHzLkZ{O%%o51N^^o>@-Z5fzUTLnAO4#mF6n6+4V^>l>EJ`qvXHqDZ zcfB`eklGs)kZftfAe1es+!7JMxEi1_WadSRERe#GN~rC1g|gb)R=JhOImEn-3i-yj zlCoP$M$)5M8rWHCannRxU9~Mu(PYgdcN~<J&E15RklQNK)OKApZFZWQy5%C3gpw-? z7Y$5X6(bpH8gmJX%-Onwqioh>l{QVu={`xG$ObtD4D)ngbaqbGcbZX>Ld$Ha*tW9W zq?JcoZm?sD$gE_dU=_zLb)q&qVMLoQ#~qNkiq~BhCIsWSx^hTNl3cNpvao2aPZq7y zs|l+jU4$v+<eoiv`c3cF0CIPH21mjziDtW|!6?nyxkq<KEmvBt@nN${%U%!}BSwzm zJGrv<^S(LQ>^&?a<hVFtL4549LEVlL7<(r(94yA$`DZ^MEicZc_qqc;_BtQR%s88$ zYhznHUNTIjdSarAXi$cTG>e7t5yX$c;RG?nqthyY(aP}bcc1`t7yy0Raz=|T{xKQo zMF=XGFkr|$A3~F79OMAD0^9DSPd9q$A%DZf;4cOb8b}oN8-;$(%(7VwfT0o+O_!ng zJa-#-(>8?oJ5MbQZv~BJL8gtYtH^2{TU^hVJ>z<dSuBA;1dwDEf*W}H6=oPj0y;GC z_lJyL^(Nr3LdIYLfNk|*D7mIyXw>R!IIY<`?hk1<ncZN=3(f%$=}f-h7QZH3ZF%Jj z%J)8v7<s-|k5pQMo9}?cV1^MLfCIrpn;@M6AQnjsPC1_15Ink#^1epE2xG#7^(9*S z6y@1=8Um^<#YhY107$QvNZmf6N~?fWM8YqLNF|2o=$c_fLx!7a5eXv{hzHy*0qMNr zS`c4W9`0Q2SrT}ja0#WIO{PF7gL&vKrekjB19FrNd>jBvp%F18OK)?a3m-5k%63rV z@aac?553ykt;-?PPJ~K2@c@Kg18Cd<O%-%|P`qq%R^y0iCp!f**~?boe8SeDYk%Mj z!Lb`}AxklSkY2NHc^SCA`>LD(UW0t^?<&lW!h&zLpsv6Iw$%_AWY1T%!U502dED0% ztHKEoZZ?s%@XG19di0$7Ps$mdV<S_SdEX{rsbK2i9b3#-a0NYgMoUkN$IMu2B<&gO zVpgAhKHf;VIl0JnwMUzK_()8UHZ;c0=O)HF_c*EF+;}$4K1n8qvWYsGmE<Dg2F zZ9Sq7JDGd$K@`Y)hnT!0K|ij+v8e+9&G9`ugMsX+aWEjD{%iqNx2zD`GA1OCYr1+q zCY+EFF!UU+Jznnge6Ibh-Ww)Ogt0)&tpr*XYck}X>f<H+QDHz}+~Qa2lPd_a>zX^D z@;Zi8GrWEI&K@zxeg@hG3n!AC@hY}igJFqcDdjY&V!NSRF`~dM*7d-_#Bmvwe3)sO z(Pb(ku8J@sj>Kj@Hh^HwLIfpIN=Vj<7iPmENC3lzPbDi1;8cy~8A2q<Aqd9XrZ7e% zmTL0WaxxvXAAi+$+blA{%tLeN+>e8&HJa?^@Zj?on6%mqVXbbq404HMW}~Z0qgz_# zV<aRbP@zXOhcqciNEydFEHw?X5P;US=r)$<SkD3Lo^K~o3iT`JHYB9o#<I&j9Gp#z zi|CiGQt}%%Riim*4<KsTCnQ>7M{1Si5Xb<^LKvbk7&OG(ku;KM$)QgTyBTuwG%dw8 zN^NLtLgcU8uB5RL;j5TQX-Re8%2yb==_nm`FuE~oDsM5~L-R(Kt4f0Sur}P;>Drud z*RAU}cv!<zH8fNRpvhFwu2kM=_v=CD>YMQN^NNN1wivrRgMQd567YdatKM74M>!7J z0e@ZdQQCr*YUh3pL-w}`ws7;ouXC$Y7#?cWtHKFt$`BIK;}U%wvy*C_``{(vXt)K% a0?(CiI#tKAY>W+m_`8xR!i0ewGcbVX_s|sp literal 0 HcmV?d00001 diff --git a/tests/fixtures.py b/tests/fixtures.py index 652d2f86..f579bcd9 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -51,6 +51,8 @@ REFERENCES = { 'NM_002001.2': {'filename': 'NM_002001.2.gb.bz2', 'checksum': '7fd5aa4fe864fd5193f224fca8cea70d', 'geninfo_id': '31317229'}, + 'NM_001199.3': {'filename': 'NM_001199.3.gb.bz2', + 'checksum': 'e750b6dcead66b8bb953ce445bcd3093'}, 'NG_008939.1': {'filename': 'NG_008939.1.gb.bz2', 'checksum': '114a03e16ad2f63531d796c2fb0d7039', 'geninfo_id': '211938431', diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py index 735ff27b..8f0eea3c 100644 --- a/tests/test_variantchecker.py +++ b/tests/test_variantchecker.py @@ -1316,3 +1316,36 @@ class TestVariantchecker(MutalyzerTest): check_variant('NM_003002.2:c.1del', self.output) w_exon_annotation = self.output.getMessagesWithErrorCode('WEXON_ANNOTATION') assert len(w_exon_annotation) == 0 + + @fix(cache('NM_001199.3')) + def test_fs_no_stop(self): + """ + Frame shift yielding no stop codon should be described with + uncertainty of the stop codon. + + http://www.hgvs.org/mutnomen/FAQ.html#nostop + """ + check_variant('NM_001199.3(BMP1):c.2188dup', self.output) + assert 'NM_001199.3(BMP1_i001):p.(Gln730Profs*?)' in self.output.getOutput('protDescriptions') + + @fix(cache('NM_000193.2')) + def test_ext_no_stop(self): + """ + Extension yielding no stop codon should be described with + uncertainty of the stop codon. + + http://www.hgvs.org/mutnomen/FAQ.html#nostop + """ + check_variant('NM_000193.2:c.1388G>C', self.output) + assert 'NM_000193.2(SHH_i001):p.(*463Serext*?)' in self.output.getOutput('protDescriptions') + + @fix(cache('NM_000193.2')) + def test_fs_ext_no_stop(self): + """ + Extension yielding no stop codon should be described with + uncertainty of the stop codon. + + http://www.hgvs.org/mutnomen/FAQ.html#nostop + """ + check_variant('NM_000193.2:c.1388_1389insC', self.output) + assert 'NM_000193.2(SHH_i001):p.(*463Cysext*?)' in self.output.getOutput('protDescriptions') -- GitLab