From 697c044e963e7a573bdd4828de7f797c6f174ccc Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Thu, 24 Feb 2011 16:21:42 +0000 Subject: [PATCH] Reworked the calculation of new splice site positions. src/Modules/Mutator.py: - For deletions, position shifts are now active from the first position following the deletion. Previous behaviour was from (but not including) the first position of the deletion itself. - Added method to check if the resulting shift gets smaller at some specific position: Mutator.shift_minus_at(position). - The positions in the shift list are now interpreted as a shift from (and including) the listed position. Previous behaviour was following (and not including) the listed position. - The Mutator.newSplice(sites) method has some additional logic. This fixes some bugs, including Trac bug #30. src/tests/test_mutator.py: - Written a lot of unit tests for the changes in Mutator.py. git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@188 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- src/Modules/Mutator.py | 135 ++++++-- src/tests/test_mutator.py | 637 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 747 insertions(+), 25 deletions(-) create mode 100755 src/tests/test_mutator.py diff --git a/src/Modules/Mutator.py b/src/Modules/Mutator.py index 5b23af57..b4eb1baf 100644 --- a/src/Modules/Mutator.py +++ b/src/Modules/Mutator.py @@ -12,6 +12,7 @@ effects on restriction sites are also analysed. The original as well as the mutated string are stored here. +@requires: itertools.izip_longest @requires: Bio.Restriction @requires: Bio.Seq.Seq @requires: Bio.Alphabet.IUPAC.IUPACAmbiguousDNA @@ -21,6 +22,7 @@ The original as well as the mutated string are stored here. # - Mutator ; Mutate a string and register all shift points. +from itertools import izip_longest from Bio import Restriction from Bio.Seq import Seq from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA @@ -179,12 +181,12 @@ class Mutator() : """ Compare two lists, and count those elements which are only present in list1. - + @arg list1: some list @type list1: list @arg list2: some (other) list @type list2: list - + @return: the elements only present in list 1, together with the number of occurrences, if more than once present @rtype: list @@ -232,7 +234,7 @@ class Mutator() : @type pos2: integer @arg ins: The insertion @type ins: string - + @return: visualisation @rtype: string """ @@ -291,7 +293,7 @@ class Mutator() : self.mutated = self.mutated[:self.shiftpos(pos1)] + ins + \ self.mutated[self.shiftpos(pos2):] - self.__sortins([pos1 + 1, len(ins) + pos1 - pos2]) + self.__sortins([pos2 + 1, len(ins) + pos1 - pos2]) return visualisation #__mutate @@ -300,10 +302,10 @@ class Mutator() : """ If the length of a sequence is larger than a certain maxvissize, the string is clipped; otherwise the string is just returned. - + @arg string: DNA sequence @type string: string - + @return: either the original sequence, or an abbreviation of it @rtype: string """ @@ -315,6 +317,25 @@ class Mutator() : return string #visualiseIns + def shift_minus_at(self, position): + """ + Indicates if the position-shift gets smaller at exactly the given + position. + + @arg position: Position in the original string. + @type position: int + + @return: True if the position-shift gets smaller at exactly the + given position, False otherwise. + @rtype: bool + + @todo: Since the __shift list is sorted we could optimize this a + bit. + """ + return reduce(lambda b,s: b or (s[0] == position and s[1] < 0), + self.__shift, False) + #shift_minus_at + def shiftpos(self, position) : """ Calculate the position in the mutated string, given a position in @@ -333,7 +354,7 @@ class Mutator() : ret = position for i in range(len(self.__shift)) : - if self.__shift[i][0] >= position : + if self.__shift[i][0] > position : return ret ret += self.__shift[i][1] @@ -346,27 +367,92 @@ class Mutator() : """ Generate a list of new splice sites. - @arg sites: A list of old splice sites - @type sites: list + @arg sites: A list of old splice sites. + @type sites: list of int - @return: A list of new splice sites - @rtype: list - """ + @return: A list of new splice sites. + @rtype: list of int - ret = [] - j = 0 - for i in sites : - if (j % 2) : - ret.append(self.shiftpos(i + 1) - 1) - #ret.append(self.shiftpos(i)) - else : - ret.append(self.shiftpos(i - 1) + 1) - #ret.append(self.shiftpos(i)) - j += 1 - #for + Example 1 (DNA): NG_012772.1 - return ret + ...---------[=========]----------... + ^ ^ + 18964 19013 + + Variant Expected new location for splice site 18964 + g.18963del 18963 + g.18964del 18964 + g.18963_18964ins 18964 + g.18964_18965ins 18964 + + Variant Expected new location for splice site 19013 + g.19013del 19012 + g.19014del 19013 + g.19013_19014ins 19014 + + + Example 2 (RNA): NM_000088.3 + + ...============][==============... + /\ + 229 230 + + Variant Expected new location for splice sites 229,230 + n.228del 228,229 + n.229del 228,229 + n.230del 229,230 + n.231del 229,230 + n.228_229ins 230,231 + n.229_230ins 229,230 or 230,231 + n.230_231ins 229,230 + """ + + # We use shiftpos(i+1)-1 instead of shiftpos(i) (and its mirror) + # to make sure insertions directly before or after an exon are + # placed inside the exon. + # + # Example: + # + # -----SPLICE[======]SPLICE----------SPLICE[=======]SPLICE----- + # ^ ^ + # ins ins + # + # These two insertions should be mapped inside the exons because + # they are before and after (respectively) their exons and don't + # hit the (biological) splice sites. + # + # This also makes sure deletions of the last exon base are really + # removed from the exon. The problem is that positions following + # (but not including) the deletion get a shift, but the splice site + # is stored by the position of the last exon base. So the splice + # site position would not be decremented without the +1-1 dance. + + new_sites = [] + + prev_donor = -1 + sites_iter = iter(sites) + for acceptor, donor in izip_longest(sites_iter, sites_iter): + + # We don't want to do the -1+1 dance if + # 1) there is a deletion directly before the exon, or + # 2) there is another exon directly before this exon. + # + # A consequence of check 2) is that insertions between two + # directly adjacent exons are seen as insertions in the first + # exon. + if prev_donor == acceptor - 1 or self.shift_minus_at(acceptor): + new_sites.append(self.shiftpos(acceptor)) + else: + new_sites.append(self.shiftpos(acceptor - 1) + 1) + + # Should never happen since splice sites come in pairs. + if not donor: continue + + new_sites.append(self.shiftpos(donor + 1) - 1) + prev_donor = donor + + return new_sites #newSplice def delM(self, pos1, pos2) : @@ -403,7 +489,6 @@ class Mutator() : @arg ins: The insertion @type ins: string """ - visualisation = ["insertion between %i and %i" % (pos, pos + 1)] visualisation.extend(self.__mutate(pos, pos, ins)) self.__output.addOutput("visualisation", visualisation) diff --git a/src/tests/test_mutator.py b/src/tests/test_mutator.py new file mode 100755 index 00000000..e4cd0253 --- /dev/null +++ b/src/tests/test_mutator.py @@ -0,0 +1,637 @@ +#!/usr/bin/env python + +""" +Tests for the Mutator module. +""" + +#import logging; logging.basicConfig() +import re +import os +import random +import unittest +import site +from Bio.Seq import Seq + +# Todo: Can this be done in a more elegant way? +os.chdir('../..') +site.addsitedir('src') + +from Modules import Config +from Modules import Output +from Modules import Mutator + + +def _seq(length): + """ + Return random DNA sequence of given length. + """ + sequence = '' + for i in range(length): + sequence += random.choice('ACGT') + return Seq(sequence) + + +class TestMutator(unittest.TestCase): + """ + Test the Mutator module. + """ + + def setUp(self): + """ + Initialize test Mutator module. + """ + self.config = Config.Config() + self.output = Output.Output(__file__, self.config.Output) + + def _mutator(self, sequence): + """ + Create a Mutator object for a given sequence. + """ + return Mutator.Mutator(sequence, + self.config.Mutator, + self.output) + + def test_shiftpos_no_change(self): + """ + No change, no shifts. + """ + l = 10 + m = self._mutator(_seq(l)) + # Numbering is 1-based + for i in range(1, l + 1): + self.assertEqual(m.shiftpos(i), i) + + def test_shiftpos_del_example(self): + """ + Example of g.2del. + """ + m = self._mutator(Seq('ATCGATCG')) + m.delM(2, 2) + self.assertEqual(m.shiftpos(1), 1) + self.assertEqual(m.shiftpos(2), 2) + self.assertEqual(m.shiftpos(3), 2) + + def test_shiftpos_del(self): + """ + Starting from the deleted position (not included), shift -1. + """ + l = 10 + for d in range(1, l + 1): + m = self._mutator(_seq(l)) + m.delM(d, d) + for p in range(1, d + 1): + self.assertEqual(m.shiftpos(p), p) + for p in range(d + 1, l + 1): + self.assertEqual(m.shiftpos(p), p - 1) + + def test_shiftpos_del2(self): + """ + Starting from the deleted positions (not included), shift -2. + """ + l = 10 + for d in range(1, l): + m = self._mutator(_seq(l)) + m.delM(d, d + 1) + for p in range(1, d + 2): + self.assertEqual(m.shiftpos(p), p) + for p in range(d + 2, l + 1): + self.assertEqual(m.shiftpos(p), p - 2) + + def test_shiftpos_ins_example(self): + """ + Example of g.2_3insA. + """ + m = self._mutator(Seq('ATCGATCG')) + m.insM(2, 'A') + self.assertEqual(m.shiftpos(1), 1) + self.assertEqual(m.shiftpos(2), 2) + self.assertEqual(m.shiftpos(3), 4) + + def test_shiftpos_ins(self): + """ + Starting from the interbase insertion position, shift +1. + """ + l = 10 + for i in range(0, l + 1): + m = self._mutator(_seq(l)) + m.insM(i, 'T') + for p in range(1, i + 1): + self.assertEqual(m.shiftpos(p), p) + for p in range(i + 1, l + 1): + self.assertEqual(m.shiftpos(p), p + 1) + + def test_shiftpos_ins2(self): + """ + Starting from the interbase insertion position, shift +2. + """ + l = 10 + for i in range(0, l + 1): + m = self._mutator(_seq(l)) + m.insM(i, 'TT') + for p in range(1, i + 1): + self.assertEqual(m.shiftpos(p), p) + for p in range(i + 1, l + 1): + self.assertEqual(m.shiftpos(p), p + 2) + + def test_newSplice_no_change(self): + """ + No change, no shifts. + + @note: Splice sites come in pairs (acceptor and donor site) and the + numbers are the first, respectively last, position in the exon. + + So in this example we have: ---======----======-----===--- + | | | | | | + 4 9 14 19 25 27 + """ + l = 30 + sites = [4, 9, 14, 19, 25, 27] + m = self._mutator(_seq(l)) + self.assertEqual(m.newSplice(sites), sites) + + def test_newSplice_acc_del_before(self): + """ + Deletion in intron directly before exon. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(13, 13) # g.13del + self.assertEqual(m.newSplice(sites), [4, 9, 13, 16, 24, 26]) + + def test_newSplice_acc_del_after(self): + """ + Deletion at first exon position. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(14, 14) # g.14del + self.assertEqual(m.newSplice(sites), [4, 9, 14, 16, 24, 26]) + + def test_newSplice_don_del_before(self): + """ + Deletion at last exon position. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(17, 17) # g.17del + self.assertEqual(m.newSplice(sites), [4, 9, 14, 16, 24, 26]) + + def test_newSplice_don_del_after(self): + """ + Deletion in intron directly after exon. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(18, 18) # g.18del + self.assertEqual(m.newSplice(sites), [4, 9, 14, 17, 24, 26]) + + def test_newSplice_acc_del2_before(self): + """ + Deletion of 2 in intron directly before exon. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(12, 13) # g.12_13del + self.assertEqual(m.newSplice(sites), [4, 9, 12, 15, 23, 25]) + + def test_newSplice_acc_del2_on(self): + """ + Deletion of 2 in intron/exon. + + @note: This hits a splice site, so we don't really support it. + """ + return # Disabled (see docstring) + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(13, 14) # g.13_14del + self.assertEqual(m.newSplice(sites), [4, 9, 13, 15, 23, 25]) + + def test_newSplice_acc_del2_after(self): + """ + Deletion of 2 at first exon position. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(14, 15) # g.14_15del + self.assertEqual(m.newSplice(sites), [4, 9, 14, 15, 23, 25]) + + def test_newSplice_don_del2_before(self): + """ + Deletion of 2 at last exon positions. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(16, 17) # g.16_17del + self.assertEqual(m.newSplice(sites), [4, 9, 14, 15, 23, 25]) + + def test_newSplice_don_del2_on(self): + """ + Deletion of 2 in exon/intron. + + @note: This hits a splice site, so we don't really support it. + """ + return # Disabled (see docstring) + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(17, 18) # g.17_18del + self.assertEqual(m.newSplice(sites), [4, 9, 14, 16, 23, 25]) + + def test_newSplice_don_del2_after(self): + """ + Deletion of 2 in intron directly after exon. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.delM(18, 19) # g.18_19del + self.assertEqual(m.newSplice(sites), [4, 9, 14, 17, 23, 25]) + + def test_newSplice_acc_ins_before(self): + """ + Insertion 1 position before intron/exon boundary. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(12, 'A') # g.12_13insA + self.assertEqual(m.newSplice(sites), [4, 9, 15, 18, 26, 28]) + + def test_newSplice_acc_ins_on(self): + """ + Insertion in intron/exon boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(13, 'A') # g.13_14insA + self.assertEqual(m.newSplice(sites), [4, 9, 14, 18, 26, 28]) + + def test_newSplice_acc_ins_after(self): + """ + Insertion 1 position after intron/exon boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(14, 'A') # g.14_15insA + self.assertEqual(m.newSplice(sites), [4, 9, 14, 18, 26, 28]) + + def test_newSplice_don_ins_before(self): + """ + Insertion 1 position before exon/intron boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(16, 'A') # g.16_17insA + self.assertEqual(m.newSplice(sites), [4, 9, 14, 18, 26, 28]) + + def test_newSplice_don_ins_on(self): + """ + Insertion in exon/intron boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(17, 'A') # g.17_18insA + self.assertEqual(m.newSplice(sites), [4, 9, 14, 18, 26, 28]) + + def test_newSplice_don_ins_after(self): + """ + Insertion 1 position after exon/intron boundary. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(18, 'A') # g.18_19insA + self.assertEqual(m.newSplice(sites), [4, 9, 14, 17, 26, 28]) + + def test_newSplice_acc_ins2_before(self): + """ + Insertion of 2 1 position before intron/exon boundary. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(12, 'AT') # g.12_13insAT + self.assertEqual(m.newSplice(sites), [4, 9, 16, 19, 27, 29]) + + def test_newSplice_acc_ins2_on(self): + """ + Insertion of 2 in intron/exon boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(13, 'AT') # g.13_14insAT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 19, 27, 29]) + + def test_newSplice_acc_ins2_after(self): + """ + Insertion of 2 1 position after intron/exon boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(14, 'AT') # g.14_15insAT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 19, 27, 29]) + + def test_newSplice_don_ins2_before(self): + """ + Insertion of 2 1 position before exon/intron boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(16, 'AT') # g.16_17insAT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 19, 27, 29]) + + def test_newSplice_don_ins2_on(self): + """ + Insertion of 2 in exon/intron boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(17, 'AT') # g.17_18insAT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 19, 27, 29]) + + def test_newSplice_don_ins2_after(self): + """ + Insertion of 2 1 position after exon/intron boundary. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(18, 'AT') # g.18_19insAT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 17, 27, 29]) + + def test_newSplice_acc_ins3_before(self): + """ + Insertion of 3 1 position before intron/exon boundary. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(12, 'ATT') # g.12_13insATT + self.assertEqual(m.newSplice(sites), [4, 9, 17, 20, 28, 30]) + + def test_newSplice_acc_ins3_on(self): + """ + Insertion of 3 in intron/exon boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(13, 'ATT') # g.13_14insATT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 20, 28, 30]) + + def test_newSplice_acc_ins3_after(self): + """ + Insertion of 3 1 position after intron/exon boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(14, 'ATT') # g.14_15insATT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 20, 28, 30]) + + def test_newSplice_don_ins3_before(self): + """ + Insertion of 3 1 position before exon/intron boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(16, 'ATT') # g.16_17insATT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 20, 28, 30]) + + def test_newSplice_don_ins3_on(self): + """ + Insertion of 3 in exon/intron boundary. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(17, 'ATT') # g.17_18insATT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 20, 28, 30]) + + def test_newSplice_don_ins3_after(self): + """ + Insertion of 3 1 position after exon/intron boundary. + + @note: This hits a splice site, so we don't really support it. + """ + l = 30 + sites = [4, 9, 14, 17, 25, 27] + m = self._mutator(_seq(l)) + m.insM(18, 'ATT') # g.18_19insATT + self.assertEqual(m.newSplice(sites), [4, 9, 14, 17, 28, 30]) + + def test_newSplice_adj_del_before1(self): + """ + Adjacent exons: deletion at second-last position of first exon. + + @note: In this example we have adjacent exons (like e.g. in RNA), + which looks like this (the square brackets [ and ] are part of the + exons): + ---[====][======][========]--- + | / \ / \ | + 4 9 10 17 18 27 + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(16, 16) # g.16del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 16, 17, 26]) + + def test_newSplice_adj_del_before(self): + """ + Adjacent exons: deletion at last position of first exon. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(17, 17) # g.17del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 16, 17, 26]) + + def test_newSplice_adj_del_after(self): + """ + Adjacent exons: deletion at first position of second exon. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(18, 18) # g.18del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 17, 18, 26]) + + def test_newSplice_adj_del_after1(self): + """ + Adjacent exons: deletion at second position of second exon. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(19, 19) # g.19del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 17, 18, 26]) + + def test_newSplice_adj_ins_before(self): + """ + Adjacent exons: insertion 1 position before exon/exon boundary. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.insM(16, 'A') # g.16_17insA + self.assertEqual(m.newSplice(sites), [4, 9, 10, 18, 19, 28]) + + def test_newSplice_adj_ins_on(self): + """ + Adjacent exons: insertion at exon/exon boundary. + + @note: This insertion could be seen as being + 1) at the end of the first exon, or + 2) at the start of the second exon. + Both would probably be 'correct', but we would like consistent + results. Therefore, we stick to the first option. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.insM(17, 'A') # g.17_18insA + self.assertEqual(m.newSplice(sites), [4, 9, 10, 18, 19, 28]) + + def test_newSplice_adj_ins_after(self): + """ + Adjacent exons: insertion 1 position after exon/exon boundary. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.insM(18, 'A') # g.18_19insA + self.assertEqual(m.newSplice(sites), [4, 9, 10, 17, 18, 28]) + + def test_newSplice_adj_del2_before1(self): + """ + Adjacent exons: deletion of 2 at second-last position of first exon. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(15, 16) # g.15_16del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 15, 16, 25]) + + def test_newSplice_adj_del2_before(self): + """ + Adjacent exons: deletion of 2 at last position of first exon. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(16, 17) # g.16_17del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 15, 16, 25]) + + def test_newSplice_adj_del2_on(self): + """ + Adjacent exons: deletion of 2 at exon/exon boundary. + + @todo: This is a special case of bug #????. Once fixed, the two + exons will be joined to one new exon. + """ + return # Disabled (see docstring) + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(17, 18) # g.17_18del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 16, 17, 25]) + + def test_newSplice_adj_del2_after(self): + """ + Adjacent exons: deletion of 2 at first position of second exon. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(18, 19) # g.18_19del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 17, 18, 25]) + + def test_newSplice_adj_del2_after1(self): + """ + Adjacent exons: deletion of 2 at second position of second exon. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.delM(19, 20) # g.19_20del + self.assertEqual(m.newSplice(sites), [4, 9, 10, 17, 18, 25]) + + def test_newSplice_adj_ins2_before(self): + """ + Adjacent exons: insertion of 2 1 position before exon/exon boundary. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.insM(16, 'AT') # g.16_17insAT + self.assertEqual(m.newSplice(sites), [4, 9, 10, 19, 20, 29]) + + def test_newSplice_adj_ins2_on(self): + """ + Adjacent exons: insertion of 2 at exon/exon boundary. + + @note: This insertion could be seen as being + 1) at the end of the first exon, or + 2) at the start of the second exon. + Both would probably be 'correct', but we would like consistent + results. Therefore, we stick to the first option. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.insM(17, 'AT') # g.17_18insAT + self.assertEqual(m.newSplice(sites), [4, 9, 10, 19, 20, 29]) + + def test_newSplice_adj_ins2_after(self): + """ + Adjacent exons: insertion of 2 1 position after exon/exon boundary. + """ + l = 30 + sites = [4, 9, 10, 17, 18, 27] + m = self._mutator(_seq(l)) + m.insM(18, 'AT') # g.18_19insAT + self.assertEqual(m.newSplice(sites), [4, 9, 10, 17, 18, 29]) + + +if __name__ == '__main__': + # Usage: + # ./test_mutator.py -v + # Or, selecting a specific test: + # ./test_mutator.py -v TestMutator.test_mutated + unittest.main() -- GitLab