""" Tests for the mapping module. """ from __future__ import unicode_literals #import logging; logging.basicConfig() from sqlalchemy import or_ from mutalyzer.db.models import Assembly from mutalyzer.output import Output from mutalyzer.mapping import Converter from fixtures import database, hg19, hg19_transcript_mappings from utils import MutalyzerTest class TestConverter(MutalyzerTest): """ Test the Converter class. """ fixtures = (database, hg19, hg19_transcript_mappings) def setup(self): super(TestConverter, self).setup() self.output = Output(__file__) def _converter(self, assembly_name_or_alias): """ Create a Converter instance for a given genome assembly. """ assembly = Assembly.query \ .filter(or_(Assembly.name == assembly_name_or_alias, Assembly.alias == assembly_name_or_alias)) \ .one() return Converter(assembly, self.output) def test_converter(self): """ Simple test. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274G>T') assert genomic == 'NC_000011.9:g.111959695G>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274G>T' in coding # Fix for r536: disable the -u and +d convention. #assert 'NR_028383.1:c.1-u2173C>A' in coding assert 'NR_028383.1:n.-2173C>A' in coding def test_converter_non_coding(self): """ Test with variant on non-coding transcript. """ converter = self._converter('hg19') genomic = converter.c2chrom('NR_028383.1:n.-2173C>A') assert genomic == 'NC_000011.9:g.111959695G>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274G>T' in coding # Fix for r536: disable the -u and +d convention. #assert 'NR_028383.1:c.1-u2173C>A' in coding assert 'NR_028383.1:n.-2173C>A' in coding def test_converter_compound(self): """ Test with compound variant. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.[274G>T;278A>G]') assert genomic == 'NC_000011.9:g.[111959695G>T;111959699A>G]' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.[274G>T;278A>G]' in coding assert 'NR_028383.1:n.[-2173C>A;-2177T>C]' in coding def test_hla_cluster(self): """ Convert to primary assembly. Transcript NM_000500.5 is mapped to different chromosome locations, but we like to just see the primary assembly mapping to chromosome 6. See also bug #58. """ # Todo: This test is bogus now that we use a fixture that has just the # mapping to chromosome 6. However, I think we only get this mapping # from our current source (NCBI seq_gene.md) anyway, so I'm not sure # where we got the other mappings from in the past (but haven't # investigated really). converter = self._converter('hg19') genomic = converter.c2chrom('NM_000500.5:c.92C>T') assert genomic == 'NC_000006.11:g.32006291C>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_000500.5:c.92C>T' in coding def test_converter_del_length_reverse(self): """ Position converter on deletion (denoted by length) on transcripts located on the reverse strand. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000022.10:g.51016285_51017117del123456789', 'list') # Fix for r536: disable the -u and +d convention. #assert 'NM_001145134.1:c.-138-u21_60del123456789' in coding #assert 'NR_021492.1:c.1-u5170_1-u4338del123456789' in coding assert 'NM_001145134.1:c.-159_60del123456789' in coding assert 'NR_021492.1:n.-5170_-4338del123456789' in coding def test_S_Venkata_Suresh_Kumar(self): """ Test for correct mapping information on genes where CDS start or stop is exactly on the border of an exon. Bug reported February 24, 2012 by S Venkata Suresh Kumar. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000001.10:g.115259837_115259837delT', 'list') assert 'NM_001007553.1:c.3863delA' not in coding assert 'NM_001007553.1:c.*953delA' in coding assert 'NM_001130523.1:c.*953delA' in coding def test_S_Venkata_Suresh_Kumar_more(self): """ Another test for correct mapping information on genes where CDS start or stop is exactly on the border of an exon. Bug reported March 21, 2012 by S Venkata Suresh Kumar. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000001.10:g.160012314_160012329del16', 'list') assert 'NM_002241.4:c.-27250-7_-27242del16' not in coding assert 'NM_002241.4:c.1-7_9del16' in coding def test_range_order_forward_correct(self): """ Just a normal position converter call, both directions. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.-1_274del') assert genomic == 'NC_000011.9:g.111957631_111959695del' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.-1_274del' in coding def test_range_order_forward_incorrect_c2chrom(self): """ Incorrect order of a range on the forward strand. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274_-1del') assert genomic == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_range_order_reverse_correct(self): """ Just a normal position converter call on the reverse strand, both directions. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_001162505.1:c.-1_40del') assert genomic == 'NC_000020.10:g.48770135_48770175del' coding = converter.chrom2c(genomic, 'list') assert 'NM_001162505.1:c.-1_40del' in coding def test_range_order_reverse_incorrect_c2chrom(self): """ Incorrect order of a range on the reverse strand. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_001162505.1:c.40_-1del') assert genomic == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_range_order_incorrect_chrom2c(self): """ Incorrect order of a chromosomal range. See Trac #95. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695_111957631del', 'list') assert coding == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_delins_large_ins_c2chrom(self): """ Delins with multi-base insertion c. to chrom. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274delinsTAAA') assert genomic == 'NC_000011.9:g.111959695delinsTAAA' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_explicit_c2chrom(self): """ Delins with multi-base insertion and explicit deleted sequence c. to chrom. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274delGinsTAAA') assert genomic == 'NC_000011.9:g.111959695delinsTAAA' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_chrom2c(self): """ Delins with multi-base insertion chrom to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695delinsTAAA', 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_explicit_chrom2c(self): """ Delins with multi-base insertion and explicit deleted sequence chrom to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695delGinsTAAA', 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_chrm_chrom2c(self): """ Mitochondrial m. to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_012920.1:m.12030del', 'list') assert 'NC_012920.1(ND4_v001):c.1271del' in coding def test_chrm_name_chrom2c(self): """ Mitochondrial m. (by chromosome name) to c. """ converter = self._converter('hg19') variant = converter.correctChrVariant('chrM:m.12030del') coding = converter.chrom2c(variant, 'list') assert 'NC_012920.1(ND4_v001):c.1271del' in coding def test_chrm_c2chrom(self): """ Mitochondrial c. to m. """ converter = self._converter('hg19') genomic = converter.c2chrom('NC_012920.1(ND4_v001):c.1271del') assert genomic == 'NC_012920.1:m.12030del' def test_nm_without_selector_chrom2c(self): """ NM reference without transcript selection c. to g. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2:c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_nm_with_selector_chrom2c(self): """ NM reference with transcript selection c. to g. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7_v001):c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_nm_c2chrom_no_selector(self): """ To NM reference should never result in transcript selection. """ converter = self._converter('hg19') variant = converter.correctChrVariant('NC_000008.10:g.61654100A>T') coding = converter.chrom2c(variant, 'list') assert 'NM_017780.2:c.109A>T' in coding def test_incorrect_selector_c2chrom(self): """ Incorrect selector. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_incorrect_selector_version_c2chrom(self): """ Incorrect selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7_v002):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_no_selector_version_c2chrom(self): """ Selector but no selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7):c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_incorrect_selector_no_selector_version_c2chrom(self): """ Incorrect selector, no selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_ins_seq_chrom2c(self): """ Insertion of a sequence (chrom2c). """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111957482_111957483insGAT', 'list') assert 'NM_003002.2:c.-150_-149insGAT' in coding assert 'NM_012459.2:c.10_11insATC' in coding def test_ins_seq_seq(self): """ Insertion of two sequences (chrom2c). """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111957482_111957483ins[GAT;AAA]', 'list') assert 'NM_003002.2:c.-150_-149ins[GAT;AAA]' in coding assert 'NM_012459.2:c.10_11ins[TTT;ATC]' in coding def test_ins_seq_c2chrom_reverse(self): """ Insertion of a sequence on reverse strand (c2chrom). """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_012459.2:c.10_11insATC') assert genomic == 'NC_000011.9:g.111957482_111957483insGAT' def test_ins_seq_seq_c2chrom_reverse(self): """ Insertion of two sequences on reverse strand (c2chrom). """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_012459.2:c.10_11ins[TTT;ATC]') assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]'