From 9191352b7192b9b01f82a87505bbd6ab2ef8ffa0 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <>
Date: Wed, 10 Feb 2016 14:26:27 +0100
Subject: [PATCH] Don't report ext*? when variant RNA has stop codon

With the change introduced by #65 we forgot if the variant RNA has an
alternative downstream stop codon and therefore always reported ext*?
when the original stop codon was removed.

Fixes #145
 mutalyzer/            |  4 ++--
 tests/           | 37 ++++++++++++++++++++++++++++++++++++
 tests/ |  9 +++++++++
 3 files changed, 48 insertions(+), 2 deletions(-)
 create mode 100644 tests/

diff --git a/mutalyzer/ b/mutalyzer/
index a76ad361..c53d9130 100644
--- a/mutalyzer/
+++ b/mutalyzer/
@@ -424,7 +424,7 @@ def in_frame_description(s1, s2):
         >>> in_frame_description('MTAPQQMT*', 'MTAPQQMTMQ*')
         ('p.(*9Metext*2)', 8, 9, 11)
         >>> in_frame_description('MTAPQQMT*', 'MTAPQQMTMQ')
-        ('p.(*9Metext*?)', 8, 8, 10)
+        ('p.(*9Metext*?)', 8, 9, 10)
     @arg s1: The original protein.
     @type s1: unicode
@@ -441,6 +441,7 @@ def in_frame_description(s1, s2):
     @todo: More intelligently handle longest_common_prefix().
     @todo: Refactor this code (too many return statements).
+    s2_stop = '*' in s2
     s1 = s1.rstrip('*')
     s2 = s2.rstrip('*')
@@ -448,7 +449,6 @@ def in_frame_description(s1, s2):
         # Nothing happened.
         return ('p.(=)', 0, 0, 0)
-    s2_stop = '*' in s2
     lcp = len(longest_common_prefix(s1, s2))
     lcs = len(longest_common_suffix(s1[lcp:], s2[lcp:]))
     s1_end = len(s1) - lcs
diff --git a/tests/ b/tests/
new file mode 100644
index 00000000..77183098
--- /dev/null
+++ b/tests/
@@ -0,0 +1,37 @@
+Tests for the mutalyzer.util module.
+from __future__ import unicode_literals
+import pytest
+from mutalyzer import util
+@pytest.mark.parametrize('ref,var,descr,first,last_ref,last_var', [
+    ('MTAPQQMT*', 'MTAQQMT*', 'p.(Pro4del)', 3, 4, 3),
+    ('MTAPQQMT*', 'MTAQMT*', 'p.(Pro4_Gln5del)', 3, 5, 3),
+    ('MTAPQQT*', 'MTAQQMT*', 'p.(Pro4_Gln6delinsGlnGlnMet)', 3, 6, 6),
+    ('MTAPQQMT*', 'MTAPQQMTMQ*', 'p.(*9Metext*2)', 8, 9, 11),
+    ('MTAPQQMT*', 'MTAPQQMTMQ', 'p.(*9Metext*?)', 8, 9, 10)])
+def test_in_frame_description(ref, var, descr, first, last_ref, last_var):
+    """
+    In-frame description of difference between two proteins.
+    """
+    assert util.in_frame_description(ref, var) == (
+        descr, first, last_ref, last_var)
+@pytest.mark.parametrize('ref,var,descr,first,last_ref,last_var', [
+    ('MTAPQQMT*', 'MTAQQMT*', 'p.(Pro4Glnfs*5)', 3, 9, 8),
+    ('MTAPQQMT*', 'MTAQMT*', 'p.(Pro4Glnfs*4)', 3, 9, 7),
+    ('MTAPQQT*', 'MTAQQMT*', 'p.(Pro4Glnfs*5)', 3, 8, 8),
+    ('MTAPQQT*', 'MTAQQMT', 'p.(Pro4Glnfs*?)', 3, 8, 7)])
+def test_out_of_frame_description(ref, var, descr, first, last_ref, last_var):
+    """
+    Out-of-frame description of difference between two proteins.
+    """
+    assert util.out_of_frame_description(ref, var) == (
+        descr, first, last_ref, last_var)
diff --git a/tests/ b/tests/
index 897a3e08..0018472a 100644
--- a/tests/
+++ b/tests/
@@ -1590,3 +1590,12 @@ def test_legend_mrna_by_construction(output, checker):
         ['SDHD_v001', None, None, None, 'construction'],
         ['SDHD_i001', 'BAA81889.1', None, 'small subunit of cytochrome b of succinate dehydrogenase', 'construction']
+def test_protein_ext_stop(output, checker):
+    """
+    Variant in stop codon where an alternative stop codon is found downstream
+    in the RNA should yield `ext*P` where P is a position.
+    """
+    checker('NM_000143.3:c.1531T>G')
+    assert 'NM_000143.3(FH_i001):p.(*511Glyext*3)' in output.getOutput('protDescriptions')