From bb1611604de8b1ddbd2cc0ec7859a64fa819c630 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Fri, 25 Feb 2011 13:09:27 +0000
Subject: [PATCH] Don't include insertion directly outside CDS in CDS.

src/Modules/Mutator.py:
- The newSplice() method no longer includes insertions in exons for the
  first and last sites in the given list of splice sites. The result is
  that this inclusion in exons does not apply to translation start/end
  and CDS start/end. This fixes issue #39.



git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@190 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
---
 src/Modules/Mutator.py      | 19 ++++++++--
 src/tests/test_mutalyzer.py | 72 +++++++++++++++++++++++++++++++++++++
 src/tests/test_mutator.py   | 56 ++++++++++++++++++++++++-----
 3 files changed, 136 insertions(+), 11 deletions(-)
 create mode 100755 src/tests/test_mutalyzer.py

diff --git a/src/Modules/Mutator.py b/src/Modules/Mutator.py
index b4eb1baf..a913397a 100644
--- a/src/Modules/Mutator.py
+++ b/src/Modules/Mutator.py
@@ -430,17 +430,22 @@ class Mutator() :
 
         new_sites = []
 
-        prev_donor = -1
+        prev_donor = sites[0] - 1
         sites_iter = iter(sites)
         for acceptor, donor in izip_longest(sites_iter, sites_iter):
 
             # We don't want to do the -1+1 dance if
             # 1) there is a deletion directly before the exon, or
-            # 2) there is another exon directly before this exon.
+            # 2) there is another exon directly before this exon, or
+            # 3) this is the first site in the list.
             #
             # A consequence of check 2) is that insertions between two
             # directly adjacent exons are seen as insertions in the first
             # exon.
+            #
+            # Condition 3) makes sure we don't include insertions directly
+            # in front of CDS start in the CDS. It also affects translation
+            # start, but this should be no problem.
             if prev_donor == acceptor - 1 or self.shift_minus_at(acceptor):
                 new_sites.append(self.shiftpos(acceptor))
             else:
@@ -449,7 +454,15 @@ class Mutator() :
             # Should never happen since splice sites come in pairs.
             if not donor: continue
 
-            new_sites.append(self.shiftpos(donor + 1) - 1)
+            # We don't want to do the +1-1 dance if this is the last site
+            # in the list. This makes sure we don't include insertions
+            # directly at CDS end in the CDS. It also affects translation
+            # end, but this should be no problem.
+            if donor == sites[-1]:
+                new_sites.append(self.shiftpos(donor))
+            else:
+                new_sites.append(self.shiftpos(donor + 1) - 1)
+
             prev_donor = donor
 
         return new_sites
diff --git a/src/tests/test_mutalyzer.py b/src/tests/test_mutalyzer.py
new file mode 100755
index 00000000..b6169e52
--- /dev/null
+++ b/src/tests/test_mutalyzer.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+
+"""
+Tests for the Mutalyzer module.
+"""
+
+#import logging; logging.basicConfig()
+import re
+import os
+import random
+import unittest
+import site
+from Bio.Seq import Seq
+
+# Todo: Can this be done in a more elegant way?
+os.chdir('../..')
+site.addsitedir('src')
+
+from Modules import Config
+from Modules import Output
+import Mutalyzer
+
+
+class TestMutalyzer(unittest.TestCase):
+    """
+    Test the Mutalyzer module.
+    """
+
+    def setUp(self):
+        """
+        Initialize test Mutalyzer module.
+        """
+        self.config = Config.Config()
+        self.output = Output.Output(__file__, self.config.Output)
+
+    def test_roll(self):
+        """
+        Just a variant where we should roll.
+        """
+        Mutalyzer.process('NM_003002.2:c.273del', self.config, self.output)
+        wroll = self.output.getMessagesWithErrorCode('WROLL')
+        self.assertTrue(len(wroll) > 0)
+
+    def test_no_roll(self):
+        """
+        Just a variant where we should not roll.
+        """
+        Mutalyzer.process('NM_003002.2:c.274del', self.config, self.output)
+        wroll = self.output.getMessagesWithErrorCode('WROLL')
+        self.assertTrue(len(wroll) == 0)
+
+    def test_ins_cds_start(self):
+        """
+        Insertion on CDS start boundary should not be included in CDS.
+        """
+        Mutalyzer.process('NM_000143.3:c.-1_1insCAT', self.config, self.output)
+        self.assertEqual(self.output.getIndexedOutput("newprotein", 0), None)
+
+    def test_ins_cds_start_after(self):
+        """
+        Insertion after CDS start boundary should be included in CDS.
+        """
+        Mutalyzer.process('NM_000143.3:c.1_2insCAT', self.config, self.output)
+        self.assertEqual(self.output.getIndexedOutput("newprotein", 0), '?')
+
+
+if __name__ == '__main__':
+    # Usage:
+    #   ./test_mutalyzer.py -v
+    # Or, selecting a specific test:
+    #   ./test_mutalyzer.py -v TestMutalyzer.test_ins_cds_start
+    unittest.main()
diff --git a/src/tests/test_mutator.py b/src/tests/test_mutator.py
index e4cd0253..fd6af9fa 100755
--- a/src/tests/test_mutator.py
+++ b/src/tests/test_mutator.py
@@ -285,6 +285,16 @@ class TestMutator(unittest.TestCase):
         m.insM(13, 'A')   # g.13_14insA
         self.assertEqual(m.newSplice(sites), [4, 9, 14, 18, 26, 28])
 
+    def test_newSplice_first_acc_ins_on(self):
+        """
+        Insertion in first intron/exon boundary not be included.
+        """
+        l = 30
+        sites = [4, 9, 14, 17, 25, 27]
+        m = self._mutator(_seq(l))
+        m.insM(3, 'A')   # g.3_4insA
+        self.assertEqual(m.newSplice(sites), [5, 10, 15, 18, 26, 28])
+
     def test_newSplice_acc_ins_after(self):
         """
         Insertion 1 position after intron/exon boundary.
@@ -315,6 +325,16 @@ class TestMutator(unittest.TestCase):
         m.insM(17, 'A')   # g.17_18insA
         self.assertEqual(m.newSplice(sites), [4, 9, 14, 18, 26, 28])
 
+    def test_newSplice_last_don_ins_on(self):
+        """
+        Insertion in last exon/intron boundary should not be included.
+        """
+        l = 30
+        sites = [4, 9, 14, 17, 25, 27]
+        m = self._mutator(_seq(l))
+        m.insM(27, 'A')   # g.27_28insA
+        self.assertEqual(m.newSplice(sites), [4, 9, 14, 17, 25, 27])
+
     def test_newSplice_don_ins_after(self):
         """
         Insertion 1 position after exon/intron boundary.
@@ -339,15 +359,15 @@ class TestMutator(unittest.TestCase):
         m.insM(12, 'AT')   # g.12_13insAT
         self.assertEqual(m.newSplice(sites), [4, 9, 16, 19, 27, 29])
 
-    def test_newSplice_acc_ins2_on(self):
+    def test_newSplice_first_acc_ins2_on(self):
         """
-        Insertion of 2 in intron/exon boundary.
+        Insertion of 2 in last exon/intron boundary should not be included.
         """
         l = 30
         sites = [4, 9, 14, 17, 25, 27]
         m = self._mutator(_seq(l))
-        m.insM(13, 'AT')   # g.13_14insAT
-        self.assertEqual(m.newSplice(sites), [4, 9, 14, 19, 27, 29])
+        m.insM(3, 'AT')   # g.3_4insAT
+        self.assertEqual(m.newSplice(sites), [6, 11, 16, 19, 27, 29])
 
     def test_newSplice_acc_ins2_after(self):
         """
@@ -369,15 +389,15 @@ class TestMutator(unittest.TestCase):
         m.insM(16, 'AT')   # g.16_17insAT
         self.assertEqual(m.newSplice(sites), [4, 9, 14, 19, 27, 29])
 
-    def test_newSplice_don_ins2_on(self):
+    def test_newSplice_last_don_ins2_on(self):
         """
-        Insertion of 2 in exon/intron boundary.
+        Insertion of 2 in last exon/intron boundary should not be included.
         """
         l = 30
         sites = [4, 9, 14, 17, 25, 27]
         m = self._mutator(_seq(l))
-        m.insM(17, 'AT')   # g.17_18insAT
-        self.assertEqual(m.newSplice(sites), [4, 9, 14, 19, 27, 29])
+        m.insM(27, 'AT')   # g.27_28insAT
+        self.assertEqual(m.newSplice(sites), [4, 9, 14, 17, 25, 27])
 
     def test_newSplice_don_ins2_after(self):
         """
@@ -413,6 +433,16 @@ class TestMutator(unittest.TestCase):
         m.insM(13, 'ATT')   # g.13_14insATT
         self.assertEqual(m.newSplice(sites), [4, 9, 14, 20, 28, 30])
 
+    def test_newSplice_first_acc_ins3_on(self):
+        """
+        Insertion of 3 in first intron/exon boundary should not be included.
+        """
+        l = 30
+        sites = [4, 9, 14, 17, 25, 27]
+        m = self._mutator(_seq(l))
+        m.insM(3, 'ATT')   # g.3_4insATT
+        self.assertEqual(m.newSplice(sites), [7, 12, 17, 20, 28, 30])
+
     def test_newSplice_acc_ins3_after(self):
         """
         Insertion of 3 1 position after intron/exon boundary.
@@ -443,6 +473,16 @@ class TestMutator(unittest.TestCase):
         m.insM(17, 'ATT')   # g.17_18insATT
         self.assertEqual(m.newSplice(sites), [4, 9, 14, 20, 28, 30])
 
+    def test_newSplice_last_don_ins3_on(self):
+        """
+        Insertion of 3 in last exon/intron boundary should not be included.
+        """
+        l = 30
+        sites = [4, 9, 14, 17, 25, 27]
+        m = self._mutator(_seq(l))
+        m.insM(27, 'ATT')   # g.27_28insATT
+        self.assertEqual(m.newSplice(sites), [4, 9, 14, 17, 25, 27])
+
     def test_newSplice_don_ins3_after(self):
         """
         Insertion of 3 1 position after exon/intron boundary.
-- 
GitLab