diff --git a/migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py b/migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py new file mode 100644 index 0000000000000000000000000000000000000000..7556cedd329cb0517ebfad71ce6a09501b978477 --- /dev/null +++ b/migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py @@ -0,0 +1,64 @@ +"""Add EBI to mappings source enum + +Revision ID: 56ddeb75114e +Revises: 1ed411f9fdfa +Create Date: 2016-02-11 04:21:40.658981 + +""" + +from __future__ import unicode_literals + +# revision identifiers, used by Alembic. +revision = '56ddeb75114e' +down_revision = u'1ed411f9fdfa' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + context = op.get_context() + + if context.bind.dialect.name == 'postgresql': + # In PostgreSQL < 9.1 there was no ALTER TYPE for enums, so it would + # have been something like: + # + # ALTER TABLE foo ALTER COLUMN bar TYPE new_type USING bar::text::new_type; + # + # However, all my installations are PostgreSQL >= 9.1 and I think the + # USING syntax is PostgreSQL-specific, so let's ignore that. It would + # also come with all the hassle of moving old column values into the + # new column. + if context.bind.dialect.server_version_info >= (9, 3): + op.execute('COMMIT') + op.execute("ALTER TYPE source ADD VALUE IF NOT EXISTS 'ebi'") + return + if context.bind.dialect.server_version_info >= (9, 1): + op.execute('COMMIT') + op.execute("ALTER TYPE source ADD VALUE 'ebi'") + return + + elif context.bind.dialect.name == 'sqlite': + # SQLite doesn't support altering columns, so we have to wrap this in + # a batch operation. + with op.batch_alter_table('transcript_mappings') as batch_op: + batch_op.alter_column( + 'source', nullable=False, + type_=sa.Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source') + ) + return + + elif context.bind.dialect.name == 'mysql': + # In MySQL we can simply alter the column. + op.alter_column( + 'transcript_mappings', 'source', nullable=False, + type_=sa.Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source'), + existing_type=sa.Enum('ucsc', 'ncbi', 'reference', name='source') + ) + return + + raise Exception('Sorry, only PostgreSQL >= 9.1, SQLite, and MySQL are supported by this migration') + + +def downgrade(): + raise Exception('Downgrade not supported by this migration') diff --git a/mutalyzer/db/models.py b/mutalyzer/db/models.py index b6e397e34bd543e02f729243edfa17a493a79b07..73a99c48994c3240b13875057bef2f113b823236 100644 --- a/mutalyzer/db/models.py +++ b/mutalyzer/db/models.py @@ -375,11 +375,11 @@ class TranscriptMapping(db.Base): #: If `False`, variant descriptions can use just the accession number #: without gene and transcript selector (e.g., ``NM_000020:c.1del``, #: ``LRG_1:c.1del``). If `True`, gene and transcript selection is - #: necessary (e.g., ``NC_012920(TRNI_v001):c.1del``, ``LRG_1_t1:c.1del``). + #: necessary (e.g., ``NC_012920(TRNI_v001):c.1del``, ``LRG_1t1:c.1del``). select_transcript = Column(Boolean, nullable=False) #: Source of the mapping. - source = Column(Enum('ucsc', 'ncbi', 'reference', name='source'), + source = Column(Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source'), nullable=False) #: The :class:`Assembly` this chromosome is in. diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py index b9a55afefdec70ecddbb1dd98eede58420ec95f1..31e67e4d210c543b76831ec67344a70c2db49184 100644 --- a/mutalyzer/mapping.py +++ b/mutalyzer/mapping.py @@ -151,9 +151,9 @@ class Converter(object) : "Could not parse the given variant") return None #if - if not parseTree.RefSeqAcc: #In case of LRG for example + if not (parseTree.RefSeqAcc or parseTree.LrgAcc): self.__output.addMessage(__file__, 4, "EONLYGB", - "Currently we only support GenBank Records") + "Currently we only support GenBank and LRG records") return None #if self.parseTree = parseTree @@ -175,7 +175,6 @@ class Converter(object) : """ versions = [m.version for m in TranscriptMapping.query.filter( TranscriptMapping.accession == acc, - TranscriptMapping.version != None, TranscriptMapping.chromosome.has(assembly=self.assembly))] if not versions: @@ -207,10 +206,10 @@ class Converter(object) : if not self.mapping: self.__output.addMessage(__file__, 4, "EACCNOTINDB", - "The accession number %s version %s " + "The accession number %s %s" "with transcript %s version %s could not be found " "in our database." % - (acc, version, selector, selector_version)) + (acc, 'version %s ' % version if version else '', selector, selector_version)) return if not version: @@ -417,7 +416,7 @@ class Converter(object) : """ variant = "%s:%s" % (accNo, mutation) if self._parseInput(variant) : - acc = self.parseTree.RefSeqAcc + acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc try: version = int(self.parseTree.Version) except ValueError: @@ -503,7 +502,7 @@ class Converter(object) : @rtype: unicode """ if self._parseInput(variant): - acc = self.parseTree.RefSeqAcc + acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc try: version = int(self.parseTree.Version) except ValueError: @@ -511,6 +510,10 @@ class Converter(object) : if self.parseTree.Gene: selector = self.parseTree.Gene.GeneSymbol selector_version = int(self.parseTree.Gene.TransVar or 1) + elif self.parseTree.LRGTranscriptID: + selector = None + selector_version = int(self.parseTree.LRGTranscriptID) + pass else: selector = selector_version = None self._get_mapping(acc, version, selector, selector_version) @@ -669,7 +672,7 @@ class Converter(object) : if not self._parseInput(variant) : return None - acc = self.parseTree.RefSeqAcc + acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc version = self.parseTree.Version chromosome = Chromosome.query \ @@ -733,9 +736,14 @@ class Converter(object) : #balen continue # construct the variant description - accNo = "%s.%s" % (self.mapping.accession, self.mapping.version) + if self.mapping.reference_type == 'lrg': + accNo = self.mapping.accession + else: + accNo = "%s.%s" % (self.mapping.accession, self.mapping.version) if self.mapping.select_transcript: - if self.mapping.transcript: + if self.mapping.reference_type == 'lrg': + selector = 't%d' % self.mapping.transcript + elif self.mapping.transcript: selector = '(%s_v%.3i)' % (self.mapping.gene, self.mapping.transcript) else: selector = '(%s)' % self.mapping.gene diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index 9cf89b9c979e558241f0f90640a7e8f9cbe5e3a4..e0137d9330aef341b422aa154309af15c5e8cb3b 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -558,7 +558,7 @@ class MutalyzerService(ServiceBase): @type LOVD_ver: string @arg build: The genome build (hg19, hg18, mm10). @type build: string - @arg accNo: The NM accession number and version. + @arg accNo: The NM accession number and version or LRG. @type accNo: string @arg variant: The variant. @type variant: string @@ -614,7 +614,7 @@ class MutalyzerService(ServiceBase): @type LOVD_ver: string @arg build: The genome build (hg19, hg18, mm10). @type build: string - @arg accNo: The NM accession number and version. + @arg accNo: The NM accession number and version or LRG. @type accNo: string @return: Complex object: @@ -729,7 +729,7 @@ class MutalyzerService(ServiceBase): @arg build: The genome build (hg19, hg18, mm10). @type build: string - @arg acc: The NM accession number (version NOT included). + @arg acc: The NM accession number (version NOT included) or LRG. @type acc: string @return: The name of a chromosome. @@ -768,7 +768,7 @@ class MutalyzerService(ServiceBase): @arg build: The genome build (hg19, hg18, mm10). @type build: string @arg variant: The variant in either I{c.} or I{g.} notation, full HGVS - notation, including NM_ or NC_ accession number. + notation, including NM_, NC_, or LRG_ accession number. @type variant: string @kwarg gene: Optional gene name. If given, return variant descriptions on all transcripts for this gene. diff --git a/mutalyzer/website/templates/position-converter.html b/mutalyzer/website/templates/position-converter.html index 0db57523fd018b27136e7e3cf98ec9217e1cbcc5..db9c6c1ae4beae009b26796ace56e21e6211b6dc 100644 --- a/mutalyzer/website/templates/position-converter.html +++ b/mutalyzer/website/templates/position-converter.html @@ -29,8 +29,16 @@ normalize it to HGVS. Use the <a href="{{ url_for('.name_checker') }}">Name Chec <label for="description">Variant description</label> <input type="text" name="description" id="description" value="{{ description }}" class="form-control form-pre" placeholder="Variant description using HGVS format"> - <p>Examples: <code class="example-input" - data-for="description">NM_003002.3:c.274G>T</code>, <code class="example-input" data-for="description">chr11:g.111959693G>T</code> and <code class="example-input" data-for="description">NC_000011.9:g.111959693G>T</code></p> + <p>Examples: + <code class="example-input" + data-for="description">NM_003002.3:c.274G>T</code>, + <code class="example-input" + data-for="description">LRG_9t1:c.274G>T</code>, + <code class="example-input" + data-for="description">chr11:g.111959693G>T</code> and + <code class="example-input" + data-for="description">NC_000011.9:g.111959693G>T</code> + </p> </div> <div class="form-group button-group"> <input type="submit" class="btn btn-primary" value="Convert variant description"> diff --git a/tests/fixtures.py b/tests/fixtures.py index f96a9c3a82c62d83e166d77265107ad79681f604..28e168f12e12d62473b5984597b9d2eab76fbdea 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -593,6 +593,54 @@ def hg19_transcript_mappings(db, hg19): cds=(37035039, 37092144), select_transcript=False, version=3)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr17').one(), + 'lrg', + 'LRG_1', + 'COL1A1', + 'reverse', + 48261457, + 48279000, + [48261457, 48263139, 48263678, 48264001, 48264376, 48264845, 48265237, + 48265457, 48265891, 48266103, 48266264, 48266529, 48266738, 48267040, + 48267220, 48267362, 48267688, 48267904, 48268178, 48268744, 48269149, + 48269341, 48269836, 48270001, 48270158, 48270355, 48271304, 48271491, + 48271710, 48271934, 48272082, 48272408, 48272593, 48272795, 48272928, + 48273284, 48273516, 48273675, 48273845, 48273978, 48274371, 48274541, + 48275093, 48275310, 48275522, 48275794, 48276587, 48276779, 48276917, + 48277114, 48278772], + [48263009, 48263381, 48263868, 48264283, 48264483, 48264898, 48265344, + 48265510, 48265998, 48266156, 48266371, 48266636, 48266899, 48267093, + 48267273, 48267469, 48267741, 48267957, 48268285, 48268851, 48269247, + 48269385, 48269889, 48270054, 48270211, 48270408, 48271402, 48271544, + 48271808, 48271987, 48272189, 48272461, 48272691, 48272839, 48273026, + 48273337, 48273560, 48273728, 48273889, 48274031, 48274424, 48274594, + 48275146, 48275363, 48275566, 48275865, 48276688, 48276814, 48276951, + 48277308, 48279000], + 'ebi', + transcript=1, + cds=(48262863, 48278874), + select_transcript=True)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr1').one(), + 'lrg', + 'LRG_348', + 'CR2', + 'forward', + 207627645, + 207663240, + [207627645, 207639871, 207641872, 207642145, 207642495, 207643040, + 207644085, 207644342, 207644768, 207646117, 207646890, 207647146, + 207647586, 207648169, 207649579, 207651230, 207652602, 207653323, + 207658809, 207662487], + [207627821, 207640257, 207642060, 207642244, 207642577, 207643447, + 207644261, 207644432, 207644844, 207646524, 207647066, 207647230, + 207647668, 207648561, 207649764, 207651415, 207652625, 207653398, + 207658917, 207663240], + 'ebi', + transcript=1, + cds=(207627764, 207658899), + select_transcript=True)) db.session.commit() diff --git a/tests/test_mapping.py b/tests/test_mapping.py index 30b4c7efd8bebd9f0fa650a73b6ac88c763efed6..8ce9d64418db2a5566fbcacc87f9c32d792d989c 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -14,6 +14,39 @@ from mutalyzer.db.models import TranscriptMapping from mutalyzer import mapping +# Some example positional coding/chromosomal mappings we use in the tests. +LRG_1_T1_POSITIONS = [ + ('-150', 48279024), + ('-126', 48279000), + ('-1', 48278875), + ('1', 48278874), + ('103', 48278772), + ('103+5', 48278767), + ('104-5', 48277313), + ('104', 48277308), + ('870', 48273878), + ('4248', 48263139), + ('4249', 48263009), + ('4395', 48262863), + ('*1', 48262862), + ('*1406', 48261457), + ('*1407', 48261456), + ('*1417', 48261446)] +LRG_348_T1_POSITIONS = [ + ('-150', 207627614), + ('-119', 207627645), + ('-1', 207627763), + ('1', 207627764), + ('58', 207627821), + ('58+5', 207627826), + ('59-5', 207639866), + ('59', 207639871), + ('3279', 207658899), + ('*1', 207658900), + ('*772', 207663240), + ('*780', 207663248)] + + pytestmark = pytest.mark.usefixtures('hg19_transcript_mappings') @@ -328,6 +361,44 @@ def test_ins_seq_seq_c2chrom_reverse(converter): assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]' +@pytest.mark.parametrize('coding,chromosomal', LRG_1_T1_POSITIONS) +def test_lrg_1t1_c2chrom(converter, coding, chromosomal): + """ + Conversion from LRG reference on reverse strand. + """ + chromosomal_descr = converter.c2chrom('LRG_1t1:c.%sdel' % coding) + assert chromosomal_descr == 'NC_000017.10:g.%ddel' % chromosomal + + +@pytest.mark.parametrize('coding,chromosomal', LRG_1_T1_POSITIONS) +def test_lrg_1t1_chrom2c(converter, coding, chromosomal): + """ + Conversion to LRG reference on reverse strand. + """ + coding_descr = converter.chrom2c( + 'NC_000017.10:g.%ddel' % chromosomal, 'list') + assert 'LRG_1t1:c.%sdel' % coding in coding_descr + + +@pytest.mark.parametrize('coding,chromosomal', LRG_348_T1_POSITIONS) +def test_lrg_348t1_c2chrom(converter, coding, chromosomal): + """ + Conversion from LRG reference on forward strand. + """ + chromosomal_descr = converter.c2chrom('LRG_348t1:c.%sdel' % coding) + assert chromosomal_descr == 'NC_000001.10:g.%ddel' % chromosomal + + +@pytest.mark.parametrize('coding,chromosomal', LRG_348_T1_POSITIONS) +def test_lrg_348t1_chrom2c(converter, coding, chromosomal): + """ + Conversion to LRG reference on forward strand. + """ + coding_descr = converter.chrom2c( + 'NC_000001.10:g.%ddel' % chromosomal, 'list') + assert 'LRG_348t1:c.%sdel' % coding in coding_descr + + def test_import_mapview(hg19): original_count = TranscriptMapping.query.count()