From d9335656fa3e956fdf2702fc3c8f9346b1f28057 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 11 Feb 2016 19:37:45 +0100
Subject: [PATCH] Support LRG transcripts in the position converter

Note that we explicitely only support LRG references as transcripts,
so using c. positioning to convert to/from chromosomal positioning.

Supporting LRG references as genomic referenes, so using g. positioning
can be future work but converting them to/from LRG transcripts is of
course already done by the name checker.

Converting between genomic LRG positioning and chromosomal positioning
directly is not something that can be easily supported in the current
setup of the position converter.
---
 ...b75114e_add_ebi_to_mappings_source_enum.py | 64 +++++++++++++++++
 mutalyzer/db/models.py                        |  4 +-
 mutalyzer/mapping.py                          | 28 +++++---
 mutalyzer/services/rpc.py                     |  8 +--
 .../website/templates/position-converter.html | 12 +++-
 tests/fixtures.py                             | 48 +++++++++++++
 tests/test_mapping.py                         | 71 +++++++++++++++++++
 7 files changed, 217 insertions(+), 18 deletions(-)
 create mode 100644 migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py

diff --git a/migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py b/migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py
new file mode 100644
index 00000000..7556cedd
--- /dev/null
+++ b/migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py
@@ -0,0 +1,64 @@
+"""Add EBI to mappings source enum
+
+Revision ID: 56ddeb75114e
+Revises: 1ed411f9fdfa
+Create Date: 2016-02-11 04:21:40.658981
+
+"""
+
+from __future__ import unicode_literals
+
+# revision identifiers, used by Alembic.
+revision = '56ddeb75114e'
+down_revision = u'1ed411f9fdfa'
+
+from alembic import op
+import sqlalchemy as sa
+
+
+def upgrade():
+    context = op.get_context()
+
+    if context.bind.dialect.name == 'postgresql':
+        # In PostgreSQL < 9.1 there was no ALTER TYPE for enums, so it would
+        # have been something like:
+        #
+        #     ALTER TABLE foo ALTER COLUMN bar TYPE new_type USING bar::text::new_type;
+        #
+        # However, all my installations are PostgreSQL >= 9.1 and I think the
+        # USING syntax is PostgreSQL-specific, so let's ignore that. It would
+        # also come with all the hassle of moving old column values into the
+        # new column.
+        if context.bind.dialect.server_version_info >= (9, 3):
+            op.execute('COMMIT')
+            op.execute("ALTER TYPE source ADD VALUE IF NOT EXISTS 'ebi'")
+            return
+        if context.bind.dialect.server_version_info >= (9, 1):
+            op.execute('COMMIT')
+            op.execute("ALTER TYPE source ADD VALUE 'ebi'")
+            return
+
+    elif context.bind.dialect.name == 'sqlite':
+        # SQLite doesn't support altering columns, so we have to wrap this in
+        # a batch operation.
+        with op.batch_alter_table('transcript_mappings') as batch_op:
+            batch_op.alter_column(
+                'source', nullable=False,
+                type_=sa.Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source')
+            )
+        return
+
+    elif context.bind.dialect.name == 'mysql':
+        # In MySQL we can simply alter the column.
+        op.alter_column(
+            'transcript_mappings', 'source', nullable=False,
+            type_=sa.Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source'),
+            existing_type=sa.Enum('ucsc', 'ncbi', 'reference', name='source')
+        )
+        return
+
+    raise Exception('Sorry, only PostgreSQL >= 9.1, SQLite, and MySQL are supported by this migration')
+
+
+def downgrade():
+    raise Exception('Downgrade not supported by this migration')
diff --git a/mutalyzer/db/models.py b/mutalyzer/db/models.py
index b6e397e3..73a99c48 100644
--- a/mutalyzer/db/models.py
+++ b/mutalyzer/db/models.py
@@ -375,11 +375,11 @@ class TranscriptMapping(db.Base):
     #: If `False`, variant descriptions can use just the accession number
     #: without gene and transcript selector (e.g., ``NM_000020:c.1del``,
     #: ``LRG_1:c.1del``). If `True`, gene and transcript selection is
-    #: necessary (e.g., ``NC_012920(TRNI_v001):c.1del``, ``LRG_1_t1:c.1del``).
+    #: necessary (e.g., ``NC_012920(TRNI_v001):c.1del``, ``LRG_1t1:c.1del``).
     select_transcript = Column(Boolean, nullable=False)
 
     #: Source of the mapping.
-    source = Column(Enum('ucsc', 'ncbi', 'reference', name='source'),
+    source = Column(Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source'),
                     nullable=False)
 
     #: The :class:`Assembly` this chromosome is in.
diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py
index b9a55afe..31e67e4d 100644
--- a/mutalyzer/mapping.py
+++ b/mutalyzer/mapping.py
@@ -151,9 +151,9 @@ class Converter(object) :
                     "Could not parse the given variant")
             return None
         #if
-        if not parseTree.RefSeqAcc: #In case of LRG for example
+        if not (parseTree.RefSeqAcc or parseTree.LrgAcc):
             self.__output.addMessage(__file__, 4, "EONLYGB",
-                "Currently we only support GenBank Records")
+                "Currently we only support GenBank and LRG records")
             return None
         #if
         self.parseTree = parseTree
@@ -175,7 +175,6 @@ class Converter(object) :
         """
         versions = [m.version for m in TranscriptMapping.query.filter(
                       TranscriptMapping.accession == acc,
-                      TranscriptMapping.version != None,
                       TranscriptMapping.chromosome.has(assembly=self.assembly))]
 
         if not versions:
@@ -207,10 +206,10 @@ class Converter(object) :
 
             if not self.mapping:
                 self.__output.addMessage(__file__, 4, "EACCNOTINDB",
-                                         "The accession number %s version %s "
+                                         "The accession number %s %s"
                                          "with transcript %s version %s could not be found "
                                          "in our database." %
-                                         (acc, version, selector, selector_version))
+                                         (acc, 'version %s ' % version if version else '', selector, selector_version))
             return
 
         if not version:
@@ -417,7 +416,7 @@ class Converter(object) :
         """
         variant = "%s:%s" % (accNo, mutation)
         if self._parseInput(variant) :
-            acc = self.parseTree.RefSeqAcc
+            acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
             try:
                 version = int(self.parseTree.Version)
             except ValueError:
@@ -503,7 +502,7 @@ class Converter(object) :
         @rtype: unicode
         """
         if self._parseInput(variant):
-            acc = self.parseTree.RefSeqAcc
+            acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
             try:
                 version = int(self.parseTree.Version)
             except ValueError:
@@ -511,6 +510,10 @@ class Converter(object) :
             if self.parseTree.Gene:
                 selector = self.parseTree.Gene.GeneSymbol
                 selector_version = int(self.parseTree.Gene.TransVar or 1)
+            elif self.parseTree.LRGTranscriptID:
+                selector = None
+                selector_version = int(self.parseTree.LRGTranscriptID)
+                pass
             else:
                 selector = selector_version = None
             self._get_mapping(acc, version, selector, selector_version)
@@ -669,7 +672,7 @@ class Converter(object) :
         if not self._parseInput(variant) :
             return None
 
-        acc = self.parseTree.RefSeqAcc
+        acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
         version = self.parseTree.Version
 
         chromosome = Chromosome.query \
@@ -733,9 +736,14 @@ class Converter(object) :
                 #balen
                 continue
             # construct the variant description
-            accNo = "%s.%s" % (self.mapping.accession, self.mapping.version)
+            if self.mapping.reference_type == 'lrg':
+                accNo = self.mapping.accession
+            else:
+                accNo = "%s.%s" % (self.mapping.accession, self.mapping.version)
             if self.mapping.select_transcript:
-                if self.mapping.transcript:
+                if self.mapping.reference_type == 'lrg':
+                    selector = 't%d' % self.mapping.transcript
+                elif self.mapping.transcript:
                     selector = '(%s_v%.3i)' % (self.mapping.gene, self.mapping.transcript)
                 else:
                     selector = '(%s)' % self.mapping.gene
diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py
index 9cf89b9c..e0137d93 100644
--- a/mutalyzer/services/rpc.py
+++ b/mutalyzer/services/rpc.py
@@ -558,7 +558,7 @@ class MutalyzerService(ServiceBase):
         @type LOVD_ver: string
         @arg build: The genome build (hg19, hg18, mm10).
         @type build: string
-        @arg accNo: The NM accession number and version.
+        @arg accNo: The NM accession number and version or LRG.
         @type accNo: string
         @arg variant: The variant.
         @type variant: string
@@ -614,7 +614,7 @@ class MutalyzerService(ServiceBase):
         @type LOVD_ver: string
         @arg build: The genome build (hg19, hg18, mm10).
         @type build: string
-        @arg accNo: The NM accession number and version.
+        @arg accNo: The NM accession number and version or LRG.
         @type accNo: string
 
         @return: Complex object:
@@ -729,7 +729,7 @@ class MutalyzerService(ServiceBase):
 
         @arg build: The genome build (hg19, hg18, mm10).
         @type build: string
-        @arg acc: The NM accession number (version NOT included).
+        @arg acc: The NM accession number (version NOT included) or LRG.
         @type acc: string
 
         @return: The name of a chromosome.
@@ -768,7 +768,7 @@ class MutalyzerService(ServiceBase):
         @arg build: The genome build (hg19, hg18, mm10).
         @type build: string
         @arg variant: The variant in either I{c.} or I{g.} notation, full HGVS
-            notation, including NM_ or NC_ accession number.
+            notation, including NM_, NC_, or LRG_ accession number.
         @type variant: string
         @kwarg gene: Optional gene name. If given, return variant descriptions
             on all transcripts for this gene.
diff --git a/mutalyzer/website/templates/position-converter.html b/mutalyzer/website/templates/position-converter.html
index 0db57523..db9c6c1a 100644
--- a/mutalyzer/website/templates/position-converter.html
+++ b/mutalyzer/website/templates/position-converter.html
@@ -29,8 +29,16 @@ normalize it to HGVS. Use the <a href="{{ url_for('.name_checker') }}">Name Chec
     <label for="description">Variant description</label>
     <input type="text" name="description" id="description" value="{{ description }}"
            class="form-control form-pre" placeholder="Variant description using HGVS format">
-    <p>Examples: <code class="example-input"
-    data-for="description">NM_003002.3:c.274G&gt;T</code>, <code class="example-input" data-for="description">chr11:g.111959693G&gt;T</code> and <code class="example-input" data-for="description">NC_000011.9:g.111959693G&gt;T</code></p>
+    <p>Examples:
+      <code class="example-input"
+            data-for="description">NM_003002.3:c.274G&gt;T</code>,
+      <code class="example-input"
+            data-for="description">LRG_9t1:c.274G&gt;T</code>,
+      <code class="example-input"
+            data-for="description">chr11:g.111959693G&gt;T</code> and
+      <code class="example-input"
+            data-for="description">NC_000011.9:g.111959693G&gt;T</code>
+    </p>
   </div>
   <div class="form-group button-group">
     <input type="submit" class="btn btn-primary" value="Convert variant description">
diff --git a/tests/fixtures.py b/tests/fixtures.py
index f96a9c3a..28e168f1 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -593,6 +593,54 @@ def hg19_transcript_mappings(db, hg19):
         cds=(37035039, 37092144),
         select_transcript=False,
         version=3))
+    db.session.add(TranscriptMapping(
+        hg19.chromosomes.filter_by(name='chr17').one(),
+        'lrg',
+        'LRG_1',
+        'COL1A1',
+        'reverse',
+        48261457,
+        48279000,
+        [48261457, 48263139, 48263678, 48264001, 48264376, 48264845, 48265237,
+         48265457, 48265891, 48266103, 48266264, 48266529, 48266738, 48267040,
+         48267220, 48267362, 48267688, 48267904, 48268178, 48268744, 48269149,
+         48269341, 48269836, 48270001, 48270158, 48270355, 48271304, 48271491,
+         48271710, 48271934, 48272082, 48272408, 48272593, 48272795, 48272928,
+         48273284, 48273516, 48273675, 48273845, 48273978, 48274371, 48274541,
+         48275093, 48275310, 48275522, 48275794, 48276587, 48276779, 48276917,
+         48277114, 48278772],
+        [48263009, 48263381, 48263868, 48264283, 48264483, 48264898, 48265344,
+         48265510, 48265998, 48266156, 48266371, 48266636, 48266899, 48267093,
+         48267273, 48267469, 48267741, 48267957, 48268285, 48268851, 48269247,
+         48269385, 48269889, 48270054, 48270211, 48270408, 48271402, 48271544,
+         48271808, 48271987, 48272189, 48272461, 48272691, 48272839, 48273026,
+         48273337, 48273560, 48273728, 48273889, 48274031, 48274424, 48274594,
+         48275146, 48275363, 48275566, 48275865, 48276688, 48276814, 48276951,
+         48277308, 48279000],
+        'ebi',
+        transcript=1,
+        cds=(48262863, 48278874),
+        select_transcript=True))
+    db.session.add(TranscriptMapping(
+        hg19.chromosomes.filter_by(name='chr1').one(),
+        'lrg',
+        'LRG_348',
+        'CR2',
+        'forward',
+        207627645,
+        207663240,
+        [207627645, 207639871, 207641872, 207642145, 207642495, 207643040,
+         207644085, 207644342, 207644768, 207646117, 207646890, 207647146,
+         207647586, 207648169, 207649579, 207651230, 207652602, 207653323,
+         207658809, 207662487],
+        [207627821, 207640257, 207642060, 207642244, 207642577, 207643447,
+         207644261, 207644432, 207644844, 207646524, 207647066, 207647230,
+         207647668, 207648561, 207649764, 207651415, 207652625, 207653398,
+         207658917, 207663240],
+        'ebi',
+        transcript=1,
+        cds=(207627764, 207658899),
+        select_transcript=True))
 
     db.session.commit()
 
diff --git a/tests/test_mapping.py b/tests/test_mapping.py
index 30b4c7ef..8ce9d644 100644
--- a/tests/test_mapping.py
+++ b/tests/test_mapping.py
@@ -14,6 +14,39 @@ from mutalyzer.db.models import TranscriptMapping
 from mutalyzer import mapping
 
 
+# Some example positional coding/chromosomal mappings we use in the tests.
+LRG_1_T1_POSITIONS = [
+    ('-150', 48279024),
+    ('-126', 48279000),
+    ('-1', 48278875),
+    ('1', 48278874),
+    ('103', 48278772),
+    ('103+5', 48278767),
+    ('104-5', 48277313),
+    ('104', 48277308),
+    ('870', 48273878),
+    ('4248', 48263139),
+    ('4249', 48263009),
+    ('4395', 48262863),
+    ('*1', 48262862),
+    ('*1406', 48261457),
+    ('*1407', 48261456),
+    ('*1417', 48261446)]
+LRG_348_T1_POSITIONS = [
+    ('-150', 207627614),
+    ('-119', 207627645),
+    ('-1', 207627763),
+    ('1', 207627764),
+    ('58', 207627821),
+    ('58+5', 207627826),
+    ('59-5', 207639866),
+    ('59', 207639871),
+    ('3279', 207658899),
+    ('*1', 207658900),
+    ('*772', 207663240),
+    ('*780', 207663248)]
+
+
 pytestmark = pytest.mark.usefixtures('hg19_transcript_mappings')
 
 
@@ -328,6 +361,44 @@ def test_ins_seq_seq_c2chrom_reverse(converter):
     assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]'
 
 
+@pytest.mark.parametrize('coding,chromosomal', LRG_1_T1_POSITIONS)
+def test_lrg_1t1_c2chrom(converter, coding, chromosomal):
+    """
+    Conversion from LRG reference on reverse strand.
+    """
+    chromosomal_descr = converter.c2chrom('LRG_1t1:c.%sdel' % coding)
+    assert chromosomal_descr == 'NC_000017.10:g.%ddel' % chromosomal
+
+
+@pytest.mark.parametrize('coding,chromosomal', LRG_1_T1_POSITIONS)
+def test_lrg_1t1_chrom2c(converter, coding, chromosomal):
+    """
+    Conversion to LRG reference on reverse strand.
+    """
+    coding_descr = converter.chrom2c(
+        'NC_000017.10:g.%ddel' % chromosomal, 'list')
+    assert 'LRG_1t1:c.%sdel' % coding in coding_descr
+
+
+@pytest.mark.parametrize('coding,chromosomal', LRG_348_T1_POSITIONS)
+def test_lrg_348t1_c2chrom(converter, coding, chromosomal):
+    """
+    Conversion from LRG reference on forward strand.
+    """
+    chromosomal_descr = converter.c2chrom('LRG_348t1:c.%sdel' % coding)
+    assert chromosomal_descr == 'NC_000001.10:g.%ddel' % chromosomal
+
+
+@pytest.mark.parametrize('coding,chromosomal', LRG_348_T1_POSITIONS)
+def test_lrg_348t1_chrom2c(converter, coding, chromosomal):
+    """
+    Conversion to LRG reference on forward strand.
+    """
+    coding_descr = converter.chrom2c(
+        'NC_000001.10:g.%ddel' % chromosomal, 'list')
+    assert 'LRG_348t1:c.%sdel' % coding in coding_descr
+
+
 def test_import_mapview(hg19):
     original_count = TranscriptMapping.query.count()
 
-- 
GitLab