diff --git a/src/Modules/Mutator.py b/src/Modules/Mutator.py index f9af2303c2659bca45c07f3862565de9eb7955e6..a913397ae094269556202566304553c9121018a7 100644 --- a/src/Modules/Mutator.py +++ b/src/Modules/Mutator.py @@ -22,7 +22,7 @@ The original as well as the mutated string are stored here. # - Mutator ; Mutate a string and register all shift points. -from itertools import ifilter, izip_longest +from itertools import izip_longest from Bio import Restriction from Bio.Seq import Seq from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA @@ -43,8 +43,6 @@ class Mutator() : where the modifications in length are stored. Each first element of the tuples in this list is unique, each second element is non-zero. - - __removed_sites ; Set of splice sites to ignore in mutated - string. - __restrictionBatch ; Public variables: @@ -108,7 +106,6 @@ class Mutator() : self.__config = config self.__output = output self.__shift = [] - self.__removed_sites = set() self.__restrictionBatch = Restriction.RestrictionBatch([], ['N']) self.orig = orig @@ -366,19 +363,6 @@ class Mutator() : return ret #shiftpos - def add_removed_sites(self, sites): - """ - Add sites to the set of splice sites to ignore in the mutated string. - - @arg sites: A list of splice sites to ignore. - @type sites: list of int - - @todo: Resulting list of ignored sites should always be even. - """ - for site in sites: - self.__removed_sites.add(site) - #add_ignore_sites - def newSplice(self, sites) : """ Generate a list of new splice sites. @@ -390,7 +374,7 @@ class Mutator() : @rtype: list of int - Example 1 (DNA): NG_012772.1(BRCA2_v001) + Example 1 (DNA): NG_012772.1 ...---------[=========]----------... ^ ^ @@ -446,9 +430,8 @@ class Mutator() : new_sites = [] - prev_donor = None - sites_iter = ifilter(lambda s: s not in self.__removed_sites, sites) - + prev_donor = sites[0] - 1 + sites_iter = iter(sites) for acceptor, donor in izip_longest(sites_iter, sites_iter): # We don't want to do the -1+1 dance if @@ -463,8 +446,7 @@ class Mutator() : # Condition 3) makes sure we don't include insertions directly # in front of CDS start in the CDS. It also affects translation # start, but this should be no problem. - if not prev_donor or prev_donor == acceptor - 1 or \ - self.shift_minus_at(acceptor): + if prev_donor == acceptor - 1 or self.shift_minus_at(acceptor): new_sites.append(self.shiftpos(acceptor)) else: new_sites.append(self.shiftpos(acceptor - 1) + 1) diff --git a/src/Mutalyzer.py b/src/Mutalyzer.py index 6a7a24d5485828e01285e00e7f0880cf7100315c..3a3edeaa22db37995152afd7c1797fb1fef888f0 100644 --- a/src/Mutalyzer.py +++ b/src/Mutalyzer.py @@ -536,7 +536,7 @@ def __overSplice(pos1, pos2, sites) : @arg pos1: The first coordinate of the range in g. notation. @type pos1: integer - @arg pos2: The second coordinate of the range in g. notation. + @arg pos2: The first coordinate of the range in g. notation. @type pos2: integer @arg sites: A list of splice sites in g. notation. @type sites: list(integer) @@ -1043,7 +1043,6 @@ def checkInsertion(start_g, end_g, Arg1, MUU, GenRecordInstance, O) : Arg1, start_g, start_g + 1, MUU.mutated[newStart + shift:newStop + shift], newStart + shift, newStart + shift + 1)) - #if if shift != roll[1]: O.addMessage(__file__, 1, "IROLLBACK", "Insertion of %s at position %i_%i was not corrected to an " \ @@ -1056,7 +1055,6 @@ def checkInsertion(start_g, end_g, Arg1, MUU, GenRecordInstance, O) : GenRecordInstance.name(start_g, start_g + 1, "ins", MUU.mutated[newStart + shift:newStop + shift] , "", (roll[0], shift)) - #else #checkInsertion def __ivs2g(location, transcript) : @@ -1151,10 +1149,7 @@ def __normal2g(RawVar, transcript) : def __rv(MUU, RawVar, GenRecordInstance, parts, O, transcript) : """ - Process one raw variant. - @todo: documentation - @todo: parts argument is not used """ # FIXME check this @@ -1234,54 +1229,10 @@ def __rv(MUU, RawVar, GenRecordInstance, parts, O, transcript) : Arg1 = Bio.Seq.reverse_complement(RawVar.Arg1) Arg2 = Bio.Seq.reverse_complement(RawVar.Arg2) - splice_abort = False - - # If we hit a splice site, issue a warning. Later on we decide if we - # can still process this variant in any way (e.g. if it deletes an - # entire exon). if transcript and __overSplice(start_g, end_g, transcript.CM.RNA) : - splice_abort = True O.addMessage(__file__, 2, "WOVERSPLICE", "Variant hits one or more splice sites.") - # If we have a deletion, and it covers exactly an even number of splice - # sites, remove these splice sites. - # Todo: Special cases for first/last exon? Upstream/downstream exons? - # Note, this is not the same as __overSplice(). Here we collect - # sites where the delection borders the exon/intron boundary. - if transcript and RawVar.MutationType == 'del': - removed_sites = [] - sites = iter(transcript.CM.RNA) - for acceptor, donor in izip_longest(sites, sites): - if start_g <= acceptor <= end_g + 1: - removed_sites.append(acceptor) - if start_g - 1 <= donor <= end_g: - removed_sites.append(donor) - - if len(removed_sites) and not len(removed_sites) % 2: - # An even number of splice sites was removed. We can deal with - # this, but issue a warning. - splice_abort = False - MUU.add_removed_sites(removed_sites) - O.addMessage(__file__, 1, "IDELSPLICE", "Removed %i splice " \ - "sites from transcript." % len(removed_sites)) - - # If splice_abort is set, this basically means WOVERSPLICE was called and - # IDELSPLICE was not called. - # I guess in that case we do want to generate the visualisation, the - # genomic description, and affected transcripts. But NOT the predicted - # protein. - # The following solution is a bit of a hack. By setting the .translate - # field of the transcript to False, we force that no protein is predicted. - if splice_abort: - #return - transcript.translate = False - # The affected protein description for this transcript will now be - # a question mark, e.g. "NG_012772.1(BRCA2_i001):?". But protein - # descriptions for other transcripts (where splice sites are also - # crippled) are still shown. I think we ideally would not want this. - # However, some transcripts might be unaffected and should be shown. - if RawVar.MutationType in ["del", "dup", "subst", "delins"] : __checkOptArg(MUU.orig, start_g, end_g, Arg1, O) diff --git a/src/tests/test_mutalyzer.py b/src/tests/test_mutalyzer.py index a1996b39e519e9da89fdf67a673351faa374163e..d6420ec8fbf05f1ca31d820d0855df401055a330 100755 --- a/src/tests/test_mutalyzer.py +++ b/src/tests/test_mutalyzer.py @@ -84,7 +84,6 @@ class TestMutalyzer(unittest.TestCase): """ Mutalyzer.process('NM_000143.3:c.-1_1insCAT', self.config, self.output) self.assertEqual(self.output.getIndexedOutput("newprotein", 0), None) - # Todo: is this a good test? def test_ins_cds_start_after(self): """ @@ -92,100 +91,6 @@ class TestMutalyzer(unittest.TestCase): """ Mutalyzer.process('NM_000143.3:c.1_2insCAT', self.config, self.output) self.assertEqual(self.output.getIndexedOutput("newprotein", 0), '?') - # Todo: is this a good test? - - def test_del_splice_site(self): - """ - Deletion hitting one splice site should not be possible. - """ - Mutalyzer.process('NG_012772.1(BRCA2_v001):c.632-5_670del', - self.config, self.output) - woversplice = self.output.getMessagesWithErrorCode('WOVERSPLICE') - self.assertTrue(len(woversplice) > 0) - idelsplice = self.output.getMessagesWithErrorCode('IDELSPLICE') - self.assertTrue(len(idelsplice) == 0) - # Todo: For now, the following is how to check if no proteins - # prediction is done. - self.assertFalse(self.output.getOutput('newprotein')) - - def test_del_exon(self): - """ - Deletion of an entire exon should be possible. - """ - Mutalyzer.process('NG_012772.1(BRCA2_v001):c.632-5_681+7del', - self.config, self.output) - woversplice = self.output.getMessagesWithErrorCode('WOVERSPLICE') - self.assertTrue(len(woversplice) > 0) - idelsplice = self.output.getMessagesWithErrorCode('IDELSPLICE') - self.assertTrue(len(idelsplice) > 0) - # Todo: For now, the following is how to check if no proteins - # prediction is done. - self.assertTrue(self.output.getOutput('newprotein')) - - def test_del_exon_in_frame(self): - """ - Deletion of an entire exon with length a triplicate should give a - proteine product with just this deletion (and possibly substitutions - directly before and after). - - NG_012772.1(BRCA2_v001):c.68-7_316+7del is such a variant, since - positions 68 through 316 are exactly one exon and (316-68+1)/3 = 83. - """ - Mutalyzer.process('NG_012772.1(BRCA2_v001):c.68-7_316+7del', - self.config, self.output) - woversplice = self.output.getMessagesWithErrorCode('WOVERSPLICE') - self.assertTrue(len(woversplice) > 0) - idelsplice = self.output.getMessagesWithErrorCode('IDELSPLICE') - self.assertTrue(len(idelsplice) > 0) - # Todo: For now, the following is how to check if no proteins - # prediction is done. - self.assertTrue(self.output.getOutput('newprotein')) - # Todo: assert that protein products indeed have only this difference. - - def test_del_exons(self): - """ - Deletion of two entire exons should be possible. - """ - Mutalyzer.process('NG_012772.1(BRCA2_v001):c.632-5_793+7del', - self.config, self.output) - woversplice = self.output.getMessagesWithErrorCode('WOVERSPLICE') - self.assertTrue(len(woversplice) > 0) - idelsplice = self.output.getMessagesWithErrorCode('IDELSPLICE') - self.assertTrue(len(idelsplice) > 0) - # Todo: For now, the following is how to check if no proteins - # prediction is done. - self.assertTrue(self.output.getOutput('newprotein')) - - def test_del_intron(self): - """ - Deletion of an entire intron should be possible (fusion of remaining - exonic parts). - """ - Mutalyzer.process('NG_012772.1(BRCA2_v001):c.622_674del', - self.config, self.output) - woversplice = self.output.getMessagesWithErrorCode('WOVERSPLICE') - self.assertTrue(len(woversplice) > 0) - idelsplice = self.output.getMessagesWithErrorCode('IDELSPLICE') - self.assertTrue(len(idelsplice) > 0) - # Todo: For now, the following is how to check if no proteins - # prediction is done. - self.assertTrue(self.output.getOutput('newprotein')) - - def test_del_intron_in_frame(self): - """ - Deletion of an entire intron should be possible (fusion of remaining - exonic parts). - """ - Mutalyzer.process('NG_012772.1(BRCA2_v001):c.622_672del', - self.config, self.output) - woversplice = self.output.getMessagesWithErrorCode('WOVERSPLICE') - self.assertTrue(len(woversplice) > 0) - idelsplice = self.output.getMessagesWithErrorCode('IDELSPLICE') - self.assertTrue(len(idelsplice) > 0) - # Todo: For now, the following is how to check if no proteins - # prediction is done. - self.assertTrue(self.output.getOutput('newprotein')) - # Todo: assert that protein products indeed have only this difference. if __name__ == '__main__': diff --git a/src/tests/test_mutator.py b/src/tests/test_mutator.py index 7f942ccf93ff93f9e8c61f29d2a9c1744a6a4805..fd6af9fa2430998e98b310142d36f8d7aed2634e 100755 --- a/src/tests/test_mutator.py +++ b/src/tests/test_mutator.py @@ -668,30 +668,6 @@ class TestMutator(unittest.TestCase): m.insM(18, 'AT') # g.18_19insAT self.assertEqual(m.newSplice(sites), [4, 9, 10, 17, 18, 29]) - def test_newSplice_removed_sites(self): - """ - After removing splice sites, newSplice() should filter them. - """ - l = 40 - sites = [4, 9, 14, 19, 25, 27, 32, 38] - m = self._mutator(_seq(l)) - m.add_removed_sites([19, 25]) - self.assertEqual(m.newSplice(sites), [4, 9, 14, 27, 32, 38]) - m.add_removed_sites([27, 32]) - self.assertEqual(m.newSplice(sites), [4, 9, 14, 38]) - m.insM(13, 'A') # g.13_14insA - self.assertEqual(m.newSplice(sites), [4, 9, 14, 39]) - - def test_sites_even_invariant(self): - """ - The number of splice sites should always be even. Modifying the list - of splice sites must always prevent the result of an odd number of - splice sites. - - Todo: this test. - """ - pass - if __name__ == '__main__': # Usage: