From a6434a924bd52a69a6d99e507e0bb01bc7747de1 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Tue, 16 Aug 2011 10:56:39 +0000 Subject: [PATCH] Fix: don't remove flanking splice sites on deletion of exons on a transcript reference. git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/refactor-mutalyzer-branch@322 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- README | 2 +- mutalyzer/variantchecker.py | 29 ++++++- ...st_mutalyzer.py => test_variantchecker.py} | 79 ++++++++++++++----- tests/test_website.py | 4 +- 4 files changed, 90 insertions(+), 24 deletions(-) rename tests/{test_mutalyzer.py => test_variantchecker.py} (78%) diff --git a/README b/README index 616fbac1..0774f980 100644 --- a/README +++ b/README @@ -73,7 +73,7 @@ Todo list: - Check for os.path.join vulnerabilities. - Use web.config.debug=False on production server and perhaps put this in the configuration file. -- Add database indices to extras/post-install.sh script. +- Solution for database schema migration on version updates. Code style guide: - Follow PEP 8 (code) and PEP 257 (docstrings). diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py index 99896625..f717c3ca 100644 --- a/mutalyzer/variantchecker.py +++ b/mutalyzer/variantchecker.py @@ -978,9 +978,32 @@ def process_raw_variant(mutator, variant, record, transcript, output): if transcript and variant.MutationType == 'del': removed_sites = [] for acceptor, donor in util.grouper(transcript.CM.RNA): - if first <= acceptor <= last + 1: + + # If we have introns, we match splice sites in a fuzzy way. This + # Means that in the case of + # + # a b + # ===========------------============= + # + # with splice sites a and b, a deletion a+1_b-1 of the entire + # intron gets treated as a deletion of both splice sites. + # + # We don't want this behaviour on e.g. RNA, where we only have + # exons. In the case of + # + # a b c d + # ========== ============= =========== + # + # with splice sites a b c d, a deletion b_c of the middle exon + # should only remove splice sites b and c, not a and d. + if record.record.molType == 'g': + fuzzy = 1 + else: + fuzzy = 0 + + if first <= acceptor <= last + fuzzy: removed_sites.append(acceptor) - if first - 1 <= donor <= last: + if first - fuzzy <= donor <= last: removed_sites.append(donor) if len(removed_sites) and not len(removed_sites) % 2: @@ -1004,6 +1027,8 @@ def process_raw_variant(mutator, variant, record, transcript, output): output.addMessage(__file__, 1, 'IDELSPLICE', 'Removed %i splice sites from selected ' \ 'transcript.' % len(removed_sites)) + # This is primarily for use in unittests. + output.addOutput('removedSpliceSites', len(removed_sites)) # If splice_abort is set, this basically means WOVERSPLICE was called and # IDELSPLICE was not called. diff --git a/tests/test_mutalyzer.py b/tests/test_variantchecker.py similarity index 78% rename from tests/test_mutalyzer.py rename to tests/test_variantchecker.py index ddbaf0c3..1fba607f 100644 --- a/tests/test_mutalyzer.py +++ b/tests/test_variantchecker.py @@ -1,5 +1,5 @@ """ -Tests for the Mutalyzer module. +Tests for the variantchecker module. """ @@ -16,14 +16,13 @@ from mutalyzer.output import Output from mutalyzer.variantchecker import check_variant -class TestMutalyzer(): +class TestVariantchecker(): """ - Test the Mutalyzer module. + Test the variantchecker module. """ - def setUp(self): """ - Initialize test Mutalyzer module. + Initialize test variantchecker module. """ self.config = Config() self.output = Output(__file__, self.config.Output) @@ -42,7 +41,7 @@ class TestMutalyzer(): """ check_variant('NM_003002.2:c.274del', self.config, self.output) wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') - assert len(wroll) == 0 + assert_equal(len(wroll), 0) def test_no_roll_splice(self): """ @@ -52,7 +51,7 @@ class TestMutalyzer(): wrollback = self.output.getMessagesWithErrorCode('IROLLBACK') assert len(wrollback) > 0 wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') - assert len(wroll) == 0 + assert_equal(len(wroll), 0) def test_partial_roll_splice(self): """ @@ -104,7 +103,7 @@ class TestMutalyzer(): check_variant('AL449423.14:g.65470_65471insTAC', self.config, self.output) assert 'AL449423.14(CDKN2A_v001):c.99_100insTAG' in self.output.getOutput('descriptions') assert_equal ('AL449423.14:g.65471_65472insACT', self.output.getIndexedOutput('genomicDescription', 0, '')) - assert len(self.output.getMessagesWithErrorCode('WROLLFORWARD')) == 1 + assert_equal(len(self.output.getMessagesWithErrorCode('WROLLFORWARD')), 1) def test_roll_reverse_ins(self): """ @@ -114,7 +113,7 @@ class TestMutalyzer(): check_variant('AL449423.14:g.65471_65472insACT', self.config, self.output) assert 'AL449423.14(CDKN2A_v001):c.99_100insTAG' in self.output.getOutput('descriptions') assert_equal ('AL449423.14:g.65471_65472insACT', self.output.getIndexedOutput('genomicDescription', 0, '')) - assert len(self.output.getMessagesWithErrorCode('WROLLFORWARD')) == 0 + assert_equal(len(self.output.getMessagesWithErrorCode('WROLLFORWARD')), 0) def test_roll_message_forward(self): """ @@ -122,8 +121,8 @@ class TestMutalyzer(): strand (forward). """ check_variant('AL449423.14:g.65470_65471insTAC', self.config, self.output) - assert len(self.output.getMessagesWithErrorCode('WROLLFORWARD')) == 1 - assert len(self.output.getMessagesWithErrorCode('WROLLREVERSE')) == 0 + assert_equal(len(self.output.getMessagesWithErrorCode('WROLLFORWARD')), 1) + assert_equal(len(self.output.getMessagesWithErrorCode('WROLLREVERSE')), 0) def test_roll_message_reverse(self): """ @@ -131,8 +130,8 @@ class TestMutalyzer(): strand (reverse). """ check_variant('AL449423.14(CDKN2A_v001):c.98_99insGTA', self.config, self.output) - assert len(self.output.getMessagesWithErrorCode('WROLLFORWARD')) == 0 - assert len(self.output.getMessagesWithErrorCode('WROLLREVERSE')) == 1 + assert_equal(len(self.output.getMessagesWithErrorCode('WROLLFORWARD')), 0) + assert_equal(len(self.output.getMessagesWithErrorCode('WROLLREVERSE')), 1) def test_ins_cds_start(self): """ @@ -157,7 +156,7 @@ class TestMutalyzer(): check_variant('NG_012772.1(BRCA2_v001):c.632-5_670del', self.config, self.output) assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) == 0 + assert_equal(self.output.getOutput('removedSpliceSites'), []) # Todo: For now, the following is how to check if no protein # prediction is done. assert not self.output.getOutput('newprotein') @@ -169,7 +168,19 @@ class TestMutalyzer(): check_variant('NG_012772.1(BRCA2_v001):c.632-5_681+7del', self.config, self.output) assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + assert_equal(self.output.getOutput('removedSpliceSites'), [2]) + # Todo: For now, the following is how to check if protein + # prediction is done. + assert self.output.getOutput('newprotein') + + def test_del_exon_exact(self): + """ + Deletion of exactly an exon should be possible. + """ + check_variant('NG_012772.1(BRCA2_v001):c.632_681del', + self.config, self.output) + assert_equal(len(self.output.getMessagesWithErrorCode('WOVERSPLICE')), 0) + assert_equal(self.output.getOutput('removedSpliceSites'), [2]) # Todo: For now, the following is how to check if protein # prediction is done. assert self.output.getOutput('newprotein') @@ -186,7 +197,7 @@ class TestMutalyzer(): check_variant('NG_012772.1(BRCA2_v001):c.68-7_316+7del', self.config, self.output) assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + assert_equal(self.output.getOutput('removedSpliceSites'), [2]) # Todo: For now, the following is how to check if protein # prediction is done. assert self.output.getOutput('newprotein') @@ -199,7 +210,7 @@ class TestMutalyzer(): check_variant('NG_012772.1(BRCA2_v001):c.632-5_793+7del', self.config, self.output) assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + assert_equal(self.output.getOutput('removedSpliceSites'), [4]) # Todo: For now, the following is how to check if protein # prediction is done. assert self.output.getOutput('newprotein') @@ -212,11 +223,27 @@ class TestMutalyzer(): check_variant('NG_012772.1(BRCA2_v001):c.622_674del', self.config, self.output) assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + assert_equal(self.output.getOutput('removedSpliceSites'), [2]) # Todo: For now, the following is how to check if protein # prediction is done. assert self.output.getOutput('newprotein') + def test_del_intron_exact(self): + """ + Deletion of exactly an intron should be possible (fusion of flanking + exons). + """ + check_variant('NG_012772.1(BRCA2_v001):c.681+1_682-1del', + self.config, self.output) + assert_equal(self.output.getMessagesWithErrorCode('WOVERSPLICE'), []) + assert_equal(self.output.getOutput('removedSpliceSites'), [2]) + # Note: The protein prediction is done, but 'newprotein' is not set + # because we have no change. So to check if the prediction is done, we + # check if 'oldprotein' is set and to check if the prediction is + # correct, we check if 'newprotein' is not set. + assert self.output.getOutput('oldprotein') + assert not self.output.getOutput('newprotein') + def test_del_intron_in_frame(self): """ Deletion of an entire intron should be possible (fusion of remaining @@ -225,7 +252,7 @@ class TestMutalyzer(): check_variant('NG_012772.1(BRCA2_v001):c.622_672del', self.config, self.output) assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + assert_equal(self.output.getOutput('removedSpliceSites'), [2]) # Todo: For now, the following is how to check if protein # prediction is done. assert self.output.getOutput('newprotein') @@ -296,3 +323,17 @@ class TestMutalyzer(): in self.output.getOutput('descriptions') # Todo: .c notation should still be c.632-?_681+?del, but what about # other transcripts? + + def test_del_exon_transcript_reference(self): + """ + Deletion of entire exon on a transcript reference should remove the + expected splice sites (only that of the deleted exon), and not those + of the flanking exons (as would happen using the mechanism for genomic + references). + """ + check_variant('NM_018723.3:c.758_890del', self.config, self.output) + assert_equal(len(self.output.getMessagesWithErrorCode('WOVERSPLICE')), 0) + assert_equal(self.output.getOutput('removedSpliceSites'), [2]) + # Todo: For now, the following is how to check if protein + # prediction is done. + assert self.output.getOutput('newprotein') diff --git a/tests/test_website.py b/tests/test_website.py index e740edd4..de31f2c7 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -520,8 +520,8 @@ facilisi.""" r.mustcontain('0 Errors', '0 Warnings', 'Raw variant 1: substitution at 7055') - assert r.body.find('go to bottom') == -1 - assert r.body.find('<input') == -1 + assert_equal(r.body.find('go to bottom'), -1) + assert_equal(r.body.find('<input'), -1) def test_variantinfo_g2c(self): """ -- GitLab