Support LRG transcripts in the position converter

Note that we explicitely only support LRG references as transcripts, so using c. positioning to convert to/from chromosomal positioning. Supporting LRG references as genomic referenes, so using g. positioning can be future work but converting them to/from LRG transcripts is of course already done by the name checker. Converting between genomic LRG positioning and chromosomal positioning directly is not something that can be easily supported in the current setup of the position converter.

Support LRG transcripts in the position converter
d9335656 · Vermaat · f92aba16 · d9335656 · d9335656 · d9335656
Commit d9335656 authored 9 years ago by Vermaat
--- a/migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py
+++ b/migrations/versions/56ddeb75114e_add_ebi_to_mappings_source_enum.py
+"""Add EBI to mappings source enum
+
+Revision ID: 56ddeb75114e
+Revises: 1ed411f9fdfa
+Create Date: 2016-02-11 04:21:40.658981
+
+"""
+
+from __future__ import unicode_literals
+
+# revision identifiers, used by Alembic.
+revision = '56ddeb75114e'
+down_revision = u'1ed411f9fdfa'
+
+from alembic import op
+import sqlalchemy as sa
+
+
+def upgrade():
+    context = op.get_context()
+
+    if context.bind.dialect.name == 'postgresql':
+        # In PostgreSQL < 9.1 there was no ALTER TYPE for enums, so it would
+        # have been something like:
+        #
+        #     ALTER TABLE foo ALTER COLUMN bar TYPE new_type USING bar::text::new_type;
+        #
+        # However, all my installations are PostgreSQL >= 9.1 and I think the
+        # USING syntax is PostgreSQL-specific, so let's ignore that. It would
+        # also come with all the hassle of moving old column values into the
+        # new column.
+        if context.bind.dialect.server_version_info >= (9, 3):
+            op.execute('COMMIT')
+            op.execute("ALTER TYPE source ADD VALUE IF NOT EXISTS 'ebi'")
+            return
+        if context.bind.dialect.server_version_info >= (9, 1):
+            op.execute('COMMIT')
+            op.execute("ALTER TYPE source ADD VALUE 'ebi'")
+            return
+
+    elif context.bind.dialect.name == 'sqlite':
+        # SQLite doesn't support altering columns, so we have to wrap this in
+        # a batch operation.
+        with op.batch_alter_table('transcript_mappings') as batch_op:
+            batch_op.alter_column(
+                'source', nullable=False,
+                type_=sa.Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source')
+            )
+        return
+
+    elif context.bind.dialect.name == 'mysql':
+        # In MySQL we can simply alter the column.
+        op.alter_column(
+            'transcript_mappings', 'source', nullable=False,
+            type_=sa.Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source'),
+            existing_type=sa.Enum('ucsc', 'ncbi', 'reference', name='source')
+        )
+        return
+
+    raise Exception('Sorry, only PostgreSQL >= 9.1, SQLite, and MySQL are supported by this migration')
+
+
+def downgrade():
+    raise Exception('Downgrade not supported by this migration')
--- a/mutalyzer/db/models.py
+++ b/mutalyzer/db/models.py
@@ -375,11 +375,11 @@ class TranscriptMapping(db.Base):
    #: If `False`, variant descriptions can use just the accession number
    #: without gene and transcript selector (e.g., ``NM_000020:c.1del``,
    #: ``LRG_1:c.1del``). If `True`, gene and transcript selection is
-    #: necessary (e.g., ``NC_012920(TRNI_v001):c.1del``, ``LRG_1_t1:c.1del``).
+    #: necessary (e.g., ``NC_012920(TRNI_v001):c.1del``, ``LRG_1t1:c.1del``).
    select_transcript = Column(Boolean, nullable=False)

    #: Source of the mapping.
-    source = Column(Enum('ucsc', 'ncbi', 'reference', name='source'),
+    source = Column(Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source'),
                    nullable=False)

    #: The :class:`Assembly` this chromosome is in.

--- a/mutalyzer/mapping.py
+++ b/mutalyzer/mapping.py
@@ -151,9 +151,9 @@ class Converter(object) :
                    "Could not parse the given variant")
            return None
        #if
-        if not parseTree.RefSeqAcc: #In case of LRG for example
+        if not (parseTree.RefSeqAcc or parseTree.LrgAcc):
            self.__output.addMessage(__file__, 4, "EONLYGB",
-                "Currently we only support GenBank Records")
+                "Currently we only support GenBank and LRG records")
            return None
        #if
        self.parseTree = parseTree
@@ -175,7 +175,6 @@ class Converter(object) :
        """
        versions = [m.version for m in TranscriptMapping.query.filter(
                      TranscriptMapping.accession == acc,
-                      TranscriptMapping.version != None,
                      TranscriptMapping.chromosome.has(assembly=self.assembly))]

        if not versions:
@@ -207,10 +206,10 @@ class Converter(object) :

            if not self.mapping:
                self.__output.addMessage(__file__, 4, "EACCNOTINDB",
-                                         "The accession number %s version %s "
+                                         "The accession number %s %s"
                                         "with transcript %s version %s could not be found "
                                         "in our database." %
-                                         (acc, version, selector, selector_version))
+                                         (acc, 'version %s ' % version if version else '', selector, selector_version))
            return

        if not version:
@@ -417,7 +416,7 @@ class Converter(object) :
        """
        variant = "%s:%s" % (accNo, mutation)
        if self._parseInput(variant) :
-            acc = self.parseTree.RefSeqAcc
+            acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
            try:
                version = int(self.parseTree.Version)
            except ValueError:
@@ -503,7 +502,7 @@ class Converter(object) :
        @rtype: unicode
        """
        if self._parseInput(variant):
-            acc = self.parseTree.RefSeqAcc
+            acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
            try:
                version = int(self.parseTree.Version)
            except ValueError:
@@ -511,6 +510,10 @@ class Converter(object) :
            if self.parseTree.Gene:
                selector = self.parseTree.Gene.GeneSymbol
                selector_version = int(self.parseTree.Gene.TransVar or 1)
+            elif self.parseTree.LRGTranscriptID:
+                selector = None
+                selector_version = int(self.parseTree.LRGTranscriptID)
+                pass
            else:
                selector = selector_version = None
            self._get_mapping(acc, version, selector, selector_version)
@@ -669,7 +672,7 @@ class Converter(object) :
        if not self._parseInput(variant) :
            return None

-        acc = self.parseTree.RefSeqAcc
+        acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
        version = self.parseTree.Version

        chromosome = Chromosome.query \
@@ -733,9 +736,14 @@ class Converter(object) :
                #balen
                continue
            # construct the variant description
-            accNo = "%s.%s" % (self.mapping.accession, self.mapping.version)
+            if self.mapping.reference_type == 'lrg':
+                accNo = self.mapping.accession
+            else:
+                accNo = "%s.%s" % (self.mapping.accession, self.mapping.version)
            if self.mapping.select_transcript:
-                if self.mapping.transcript:
+                if self.mapping.reference_type == 'lrg':
+                    selector = 't%d' % self.mapping.transcript
+                elif self.mapping.transcript:
                    selector = '(%s_v%.3i)' % (self.mapping.gene, self.mapping.transcript)
                else:
                    selector = '(%s)' % self.mapping.gene

--- a/mutalyzer/services/rpc.py
+++ b/mutalyzer/services/rpc.py
@@ -558,7 +558,7 @@ class MutalyzerService(ServiceBase):
        @type LOVD_ver: string
        @arg build: The genome build (hg19, hg18, mm10).
        @type build: string
-        @arg accNo: The NM accession number and version.
+        @arg accNo: The NM accession number and version or LRG.
        @type accNo: string
        @arg variant: The variant.
        @type variant: string
@@ -614,7 +614,7 @@ class MutalyzerService(ServiceBase):
        @type LOVD_ver: string
        @arg build: The genome build (hg19, hg18, mm10).
        @type build: string
-        @arg accNo: The NM accession number and version.
+        @arg accNo: The NM accession number and version or LRG.
        @type accNo: string

        @return: Complex object:
@@ -729,7 +729,7 @@ class MutalyzerService(ServiceBase):

        @arg build: The genome build (hg19, hg18, mm10).
        @type build: string
-        @arg acc: The NM accession number (version NOT included).
+        @arg acc: The NM accession number (version NOT included) or LRG.
        @type acc: string

        @return: The name of a chromosome.
@@ -768,7 +768,7 @@ class MutalyzerService(ServiceBase):
        @arg build: The genome build (hg19, hg18, mm10).
        @type build: string
        @arg variant: The variant in either I{c.} or I{g.} notation, full HGVS
-            notation, including NM_ or NC_ accession number.
+            notation, including NM_, NC_, or LRG_ accession number.
        @type variant: string
        @kwarg gene: Optional gene name. If given, return variant descriptions
            on all transcripts for this gene.

--- a/mutalyzer/website/templates/position-converter.html
+++ b/mutalyzer/website/templates/position-converter.html
@@ -29,8 +29,16 @@ normalize it to HGVS. Use the <a href="{{ url_for('.name_checker') }}">Name Chec
    <label for="description">Variant description</label>
    <input type="text" name="description" id="description" value="{{ description }}"
           class="form-control form-pre" placeholder="Variant description using HGVS format">
-    <p>Examples: <code class="example-input"
-    data-for="description">NM_003002.3:c.274G&gt;T</code>, <code class="example-input" data-for="description">chr11:g.111959693G&gt;T</code> and <code class="example-input" data-for="description">NC_000011.9:g.111959693G&gt;T</code></p>
+    <p>Examples:
+      <code class="example-input"
+            data-for="description">NM_003002.3:c.274G&gt;T</code>,
+      <code class="example-input"
+            data-for="description">LRG_9t1:c.274G&gt;T</code>,
+      <code class="example-input"
+            data-for="description">chr11:g.111959693G&gt;T</code> and
+      <code class="example-input"
+            data-for="description">NC_000011.9:g.111959693G&gt;T</code>
+    </p>
  </div>
  <div class="form-group button-group">
    <input type="submit" class="btn btn-primary" value="Convert variant description">

--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -593,6 +593,54 @@ def hg19_transcript_mappings(db, hg19):
        cds=(37035039, 37092144),
        select_transcript=False,
        version=3))
+    db.session.add(TranscriptMapping(
+        hg19.chromosomes.filter_by(name='chr17').one(),
+        'lrg',
+        'LRG_1',
+        'COL1A1',
+        'reverse',
+        48261457,
+        48279000,
+        [48261457, 48263139, 48263678, 48264001, 48264376, 48264845, 48265237,
+         48265457, 48265891, 48266103, 48266264, 48266529, 48266738, 48267040,
+         48267220, 48267362, 48267688, 48267904, 48268178, 48268744, 48269149,
+         48269341, 48269836, 48270001, 48270158, 48270355, 48271304, 48271491,
+         48271710, 48271934, 48272082, 48272408, 48272593, 48272795, 48272928,
+         48273284, 48273516, 48273675, 48273845, 48273978, 48274371, 48274541,
+         48275093, 48275310, 48275522, 48275794, 48276587, 48276779, 48276917,
+         48277114, 48278772],
+        [48263009, 48263381, 48263868, 48264283, 48264483, 48264898, 48265344,
+         48265510, 48265998, 48266156, 48266371, 48266636, 48266899, 48267093,
+         48267273, 48267469, 48267741, 48267957, 48268285, 48268851, 48269247,
+         48269385, 48269889, 48270054, 48270211, 48270408, 48271402, 48271544,
+         48271808, 48271987, 48272189, 48272461, 48272691, 48272839, 48273026,
+         48273337, 48273560, 48273728, 48273889, 48274031, 48274424, 48274594,
+         48275146, 48275363, 48275566, 48275865, 48276688, 48276814, 48276951,
+         48277308, 48279000],
+        'ebi',
+        transcript=1,
+        cds=(48262863, 48278874),
+        select_transcript=True))
+    db.session.add(TranscriptMapping(
+        hg19.chromosomes.filter_by(name='chr1').one(),
+        'lrg',
+        'LRG_348',
+        'CR2',
+        'forward',
+        207627645,
+        207663240,
+        [207627645, 207639871, 207641872, 207642145, 207642495, 207643040,
+         207644085, 207644342, 207644768, 207646117, 207646890, 207647146,
+         207647586, 207648169, 207649579, 207651230, 207652602, 207653323,
+         207658809, 207662487],
+        [207627821, 207640257, 207642060, 207642244, 207642577, 207643447,
+         207644261, 207644432, 207644844, 207646524, 207647066, 207647230,
+         207647668, 207648561, 207649764, 207651415, 207652625, 207653398,
+         207658917, 207663240],
+        'ebi',
+        transcript=1,
+        cds=(207627764, 207658899),
+        select_transcript=True))

    db.session.commit()


--- a/tests/test_mapping.py
+++ b/tests/test_mapping.py
@@ -14,6 +14,39 @@ from mutalyzer.db.models import TranscriptMapping
 from mutalyzer import mapping


+# Some example positional coding/chromosomal mappings we use in the tests.
+LRG_1_T1_POSITIONS = [
+    ('-150', 48279024),
+    ('-126', 48279000),
+    ('-1', 48278875),
+    ('1', 48278874),
+    ('103', 48278772),
+    ('103+5', 48278767),
+    ('104-5', 48277313),
+    ('104', 48277308),
+    ('870', 48273878),
+    ('4248', 48263139),
+    ('4249', 48263009),
+    ('4395', 48262863),
+    ('*1', 48262862),
+    ('*1406', 48261457),
+    ('*1407', 48261456),
+    ('*1417', 48261446)]
+LRG_348_T1_POSITIONS = [
+    ('-150', 207627614),
+    ('-119', 207627645),
+    ('-1', 207627763),
+    ('1', 207627764),
+    ('58', 207627821),
+    ('58+5', 207627826),
+    ('59-5', 207639866),
+    ('59', 207639871),
+    ('3279', 207658899),
+    ('*1', 207658900),
+    ('*772', 207663240),
+    ('*780', 207663248)]
+
+
 pytestmark = pytest.mark.usefixtures('hg19_transcript_mappings')


@@ -328,6 +361,44 @@ def test_ins_seq_seq_c2chrom_reverse(converter):
    assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]'


+@pytest.mark.parametrize('coding,chromosomal', LRG_1_T1_POSITIONS)
+def test_lrg_1t1_c2chrom(converter, coding, chromosomal):
+    """
+    Conversion from LRG reference on reverse strand.
+    """
+    chromosomal_descr = converter.c2chrom('LRG_1t1:c.%sdel' % coding)
+    assert chromosomal_descr == 'NC_000017.10:g.%ddel' % chromosomal
+
+
+@pytest.mark.parametrize('coding,chromosomal', LRG_1_T1_POSITIONS)
+def test_lrg_1t1_chrom2c(converter, coding, chromosomal):
+    """
+    Conversion to LRG reference on reverse strand.
+    """
+    coding_descr = converter.chrom2c(
+        'NC_000017.10:g.%ddel' % chromosomal, 'list')
+    assert 'LRG_1t1:c.%sdel' % coding in coding_descr
+
+
+@pytest.mark.parametrize('coding,chromosomal', LRG_348_T1_POSITIONS)
+def test_lrg_348t1_c2chrom(converter, coding, chromosomal):
+    """
+    Conversion from LRG reference on forward strand.
+    """
+    chromosomal_descr = converter.c2chrom('LRG_348t1:c.%sdel' % coding)
+    assert chromosomal_descr == 'NC_000001.10:g.%ddel' % chromosomal
+
+
+@pytest.mark.parametrize('coding,chromosomal', LRG_348_T1_POSITIONS)
+def test_lrg_348t1_chrom2c(converter, coding, chromosomal):
+    """
+    Conversion to LRG reference on forward strand.
+    """
+    coding_descr = converter.chrom2c(
+        'NC_000001.10:g.%ddel' % chromosomal, 'list')
+    assert 'LRG_348t1:c.%sdel' % coding in coding_descr
+
+
 def test_import_mapview(hg19):
    original_count = TranscriptMapping.query.count()