Skip to content
Snippets Groups Projects
Commit d9335656 authored by Vermaat's avatar Vermaat
Browse files

Support LRG transcripts in the position converter

Note that we explicitely only support LRG references as transcripts,
so using c. positioning to convert to/from chromosomal positioning.

Supporting LRG references as genomic referenes, so using g. positioning
can be future work but converting them to/from LRG transcripts is of
course already done by the name checker.

Converting between genomic LRG positioning and chromosomal positioning
directly is not something that can be easily supported in the current
setup of the position converter.
parent f92aba16
No related branches found
No related tags found
No related merge requests found
"""Add EBI to mappings source enum
Revision ID: 56ddeb75114e
Revises: 1ed411f9fdfa
Create Date: 2016-02-11 04:21:40.658981
"""
from __future__ import unicode_literals
# revision identifiers, used by Alembic.
revision = '56ddeb75114e'
down_revision = u'1ed411f9fdfa'
from alembic import op
import sqlalchemy as sa
def upgrade():
context = op.get_context()
if context.bind.dialect.name == 'postgresql':
# In PostgreSQL < 9.1 there was no ALTER TYPE for enums, so it would
# have been something like:
#
# ALTER TABLE foo ALTER COLUMN bar TYPE new_type USING bar::text::new_type;
#
# However, all my installations are PostgreSQL >= 9.1 and I think the
# USING syntax is PostgreSQL-specific, so let's ignore that. It would
# also come with all the hassle of moving old column values into the
# new column.
if context.bind.dialect.server_version_info >= (9, 3):
op.execute('COMMIT')
op.execute("ALTER TYPE source ADD VALUE IF NOT EXISTS 'ebi'")
return
if context.bind.dialect.server_version_info >= (9, 1):
op.execute('COMMIT')
op.execute("ALTER TYPE source ADD VALUE 'ebi'")
return
elif context.bind.dialect.name == 'sqlite':
# SQLite doesn't support altering columns, so we have to wrap this in
# a batch operation.
with op.batch_alter_table('transcript_mappings') as batch_op:
batch_op.alter_column(
'source', nullable=False,
type_=sa.Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source')
)
return
elif context.bind.dialect.name == 'mysql':
# In MySQL we can simply alter the column.
op.alter_column(
'transcript_mappings', 'source', nullable=False,
type_=sa.Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source'),
existing_type=sa.Enum('ucsc', 'ncbi', 'reference', name='source')
)
return
raise Exception('Sorry, only PostgreSQL >= 9.1, SQLite, and MySQL are supported by this migration')
def downgrade():
raise Exception('Downgrade not supported by this migration')
......@@ -375,11 +375,11 @@ class TranscriptMapping(db.Base):
#: If `False`, variant descriptions can use just the accession number
#: without gene and transcript selector (e.g., ``NM_000020:c.1del``,
#: ``LRG_1:c.1del``). If `True`, gene and transcript selection is
#: necessary (e.g., ``NC_012920(TRNI_v001):c.1del``, ``LRG_1_t1:c.1del``).
#: necessary (e.g., ``NC_012920(TRNI_v001):c.1del``, ``LRG_1t1:c.1del``).
select_transcript = Column(Boolean, nullable=False)
#: Source of the mapping.
source = Column(Enum('ucsc', 'ncbi', 'reference', name='source'),
source = Column(Enum('ucsc', 'ncbi', 'ebi', 'reference', name='source'),
nullable=False)
#: The :class:`Assembly` this chromosome is in.
......
......@@ -151,9 +151,9 @@ class Converter(object) :
"Could not parse the given variant")
return None
#if
if not parseTree.RefSeqAcc: #In case of LRG for example
if not (parseTree.RefSeqAcc or parseTree.LrgAcc):
self.__output.addMessage(__file__, 4, "EONLYGB",
"Currently we only support GenBank Records")
"Currently we only support GenBank and LRG records")
return None
#if
self.parseTree = parseTree
......@@ -175,7 +175,6 @@ class Converter(object) :
"""
versions = [m.version for m in TranscriptMapping.query.filter(
TranscriptMapping.accession == acc,
TranscriptMapping.version != None,
TranscriptMapping.chromosome.has(assembly=self.assembly))]
if not versions:
......@@ -207,10 +206,10 @@ class Converter(object) :
if not self.mapping:
self.__output.addMessage(__file__, 4, "EACCNOTINDB",
"The accession number %s version %s "
"The accession number %s %s"
"with transcript %s version %s could not be found "
"in our database." %
(acc, version, selector, selector_version))
(acc, 'version %s ' % version if version else '', selector, selector_version))
return
if not version:
......@@ -417,7 +416,7 @@ class Converter(object) :
"""
variant = "%s:%s" % (accNo, mutation)
if self._parseInput(variant) :
acc = self.parseTree.RefSeqAcc
acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
try:
version = int(self.parseTree.Version)
except ValueError:
......@@ -503,7 +502,7 @@ class Converter(object) :
@rtype: unicode
"""
if self._parseInput(variant):
acc = self.parseTree.RefSeqAcc
acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
try:
version = int(self.parseTree.Version)
except ValueError:
......@@ -511,6 +510,10 @@ class Converter(object) :
if self.parseTree.Gene:
selector = self.parseTree.Gene.GeneSymbol
selector_version = int(self.parseTree.Gene.TransVar or 1)
elif self.parseTree.LRGTranscriptID:
selector = None
selector_version = int(self.parseTree.LRGTranscriptID)
pass
else:
selector = selector_version = None
self._get_mapping(acc, version, selector, selector_version)
......@@ -669,7 +672,7 @@ class Converter(object) :
if not self._parseInput(variant) :
return None
acc = self.parseTree.RefSeqAcc
acc = self.parseTree.LrgAcc or self.parseTree.RefSeqAcc
version = self.parseTree.Version
chromosome = Chromosome.query \
......@@ -733,9 +736,14 @@ class Converter(object) :
#balen
continue
# construct the variant description
accNo = "%s.%s" % (self.mapping.accession, self.mapping.version)
if self.mapping.reference_type == 'lrg':
accNo = self.mapping.accession
else:
accNo = "%s.%s" % (self.mapping.accession, self.mapping.version)
if self.mapping.select_transcript:
if self.mapping.transcript:
if self.mapping.reference_type == 'lrg':
selector = 't%d' % self.mapping.transcript
elif self.mapping.transcript:
selector = '(%s_v%.3i)' % (self.mapping.gene, self.mapping.transcript)
else:
selector = '(%s)' % self.mapping.gene
......
......@@ -558,7 +558,7 @@ class MutalyzerService(ServiceBase):
@type LOVD_ver: string
@arg build: The genome build (hg19, hg18, mm10).
@type build: string
@arg accNo: The NM accession number and version.
@arg accNo: The NM accession number and version or LRG.
@type accNo: string
@arg variant: The variant.
@type variant: string
......@@ -614,7 +614,7 @@ class MutalyzerService(ServiceBase):
@type LOVD_ver: string
@arg build: The genome build (hg19, hg18, mm10).
@type build: string
@arg accNo: The NM accession number and version.
@arg accNo: The NM accession number and version or LRG.
@type accNo: string
@return: Complex object:
......@@ -729,7 +729,7 @@ class MutalyzerService(ServiceBase):
@arg build: The genome build (hg19, hg18, mm10).
@type build: string
@arg acc: The NM accession number (version NOT included).
@arg acc: The NM accession number (version NOT included) or LRG.
@type acc: string
@return: The name of a chromosome.
......@@ -768,7 +768,7 @@ class MutalyzerService(ServiceBase):
@arg build: The genome build (hg19, hg18, mm10).
@type build: string
@arg variant: The variant in either I{c.} or I{g.} notation, full HGVS
notation, including NM_ or NC_ accession number.
notation, including NM_, NC_, or LRG_ accession number.
@type variant: string
@kwarg gene: Optional gene name. If given, return variant descriptions
on all transcripts for this gene.
......
......@@ -29,8 +29,16 @@ normalize it to HGVS. Use the <a href="{{ url_for('.name_checker') }}">Name Chec
<label for="description">Variant description</label>
<input type="text" name="description" id="description" value="{{ description }}"
class="form-control form-pre" placeholder="Variant description using HGVS format">
<p>Examples: <code class="example-input"
data-for="description">NM_003002.3:c.274G&gt;T</code>, <code class="example-input" data-for="description">chr11:g.111959693G&gt;T</code> and <code class="example-input" data-for="description">NC_000011.9:g.111959693G&gt;T</code></p>
<p>Examples:
<code class="example-input"
data-for="description">NM_003002.3:c.274G&gt;T</code>,
<code class="example-input"
data-for="description">LRG_9t1:c.274G&gt;T</code>,
<code class="example-input"
data-for="description">chr11:g.111959693G&gt;T</code> and
<code class="example-input"
data-for="description">NC_000011.9:g.111959693G&gt;T</code>
</p>
</div>
<div class="form-group button-group">
<input type="submit" class="btn btn-primary" value="Convert variant description">
......
......@@ -593,6 +593,54 @@ def hg19_transcript_mappings(db, hg19):
cds=(37035039, 37092144),
select_transcript=False,
version=3))
db.session.add(TranscriptMapping(
hg19.chromosomes.filter_by(name='chr17').one(),
'lrg',
'LRG_1',
'COL1A1',
'reverse',
48261457,
48279000,
[48261457, 48263139, 48263678, 48264001, 48264376, 48264845, 48265237,
48265457, 48265891, 48266103, 48266264, 48266529, 48266738, 48267040,
48267220, 48267362, 48267688, 48267904, 48268178, 48268744, 48269149,
48269341, 48269836, 48270001, 48270158, 48270355, 48271304, 48271491,
48271710, 48271934, 48272082, 48272408, 48272593, 48272795, 48272928,
48273284, 48273516, 48273675, 48273845, 48273978, 48274371, 48274541,
48275093, 48275310, 48275522, 48275794, 48276587, 48276779, 48276917,
48277114, 48278772],
[48263009, 48263381, 48263868, 48264283, 48264483, 48264898, 48265344,
48265510, 48265998, 48266156, 48266371, 48266636, 48266899, 48267093,
48267273, 48267469, 48267741, 48267957, 48268285, 48268851, 48269247,
48269385, 48269889, 48270054, 48270211, 48270408, 48271402, 48271544,
48271808, 48271987, 48272189, 48272461, 48272691, 48272839, 48273026,
48273337, 48273560, 48273728, 48273889, 48274031, 48274424, 48274594,
48275146, 48275363, 48275566, 48275865, 48276688, 48276814, 48276951,
48277308, 48279000],
'ebi',
transcript=1,
cds=(48262863, 48278874),
select_transcript=True))
db.session.add(TranscriptMapping(
hg19.chromosomes.filter_by(name='chr1').one(),
'lrg',
'LRG_348',
'CR2',
'forward',
207627645,
207663240,
[207627645, 207639871, 207641872, 207642145, 207642495, 207643040,
207644085, 207644342, 207644768, 207646117, 207646890, 207647146,
207647586, 207648169, 207649579, 207651230, 207652602, 207653323,
207658809, 207662487],
[207627821, 207640257, 207642060, 207642244, 207642577, 207643447,
207644261, 207644432, 207644844, 207646524, 207647066, 207647230,
207647668, 207648561, 207649764, 207651415, 207652625, 207653398,
207658917, 207663240],
'ebi',
transcript=1,
cds=(207627764, 207658899),
select_transcript=True))
db.session.commit()
......
......@@ -14,6 +14,39 @@ from mutalyzer.db.models import TranscriptMapping
from mutalyzer import mapping
# Some example positional coding/chromosomal mappings we use in the tests.
LRG_1_T1_POSITIONS = [
('-150', 48279024),
('-126', 48279000),
('-1', 48278875),
('1', 48278874),
('103', 48278772),
('103+5', 48278767),
('104-5', 48277313),
('104', 48277308),
('870', 48273878),
('4248', 48263139),
('4249', 48263009),
('4395', 48262863),
('*1', 48262862),
('*1406', 48261457),
('*1407', 48261456),
('*1417', 48261446)]
LRG_348_T1_POSITIONS = [
('-150', 207627614),
('-119', 207627645),
('-1', 207627763),
('1', 207627764),
('58', 207627821),
('58+5', 207627826),
('59-5', 207639866),
('59', 207639871),
('3279', 207658899),
('*1', 207658900),
('*772', 207663240),
('*780', 207663248)]
pytestmark = pytest.mark.usefixtures('hg19_transcript_mappings')
......@@ -328,6 +361,44 @@ def test_ins_seq_seq_c2chrom_reverse(converter):
assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]'
@pytest.mark.parametrize('coding,chromosomal', LRG_1_T1_POSITIONS)
def test_lrg_1t1_c2chrom(converter, coding, chromosomal):
"""
Conversion from LRG reference on reverse strand.
"""
chromosomal_descr = converter.c2chrom('LRG_1t1:c.%sdel' % coding)
assert chromosomal_descr == 'NC_000017.10:g.%ddel' % chromosomal
@pytest.mark.parametrize('coding,chromosomal', LRG_1_T1_POSITIONS)
def test_lrg_1t1_chrom2c(converter, coding, chromosomal):
"""
Conversion to LRG reference on reverse strand.
"""
coding_descr = converter.chrom2c(
'NC_000017.10:g.%ddel' % chromosomal, 'list')
assert 'LRG_1t1:c.%sdel' % coding in coding_descr
@pytest.mark.parametrize('coding,chromosomal', LRG_348_T1_POSITIONS)
def test_lrg_348t1_c2chrom(converter, coding, chromosomal):
"""
Conversion from LRG reference on forward strand.
"""
chromosomal_descr = converter.c2chrom('LRG_348t1:c.%sdel' % coding)
assert chromosomal_descr == 'NC_000001.10:g.%ddel' % chromosomal
@pytest.mark.parametrize('coding,chromosomal', LRG_348_T1_POSITIONS)
def test_lrg_348t1_chrom2c(converter, coding, chromosomal):
"""
Conversion to LRG reference on forward strand.
"""
coding_descr = converter.chrom2c(
'NC_000001.10:g.%ddel' % chromosomal, 'list')
assert 'LRG_348t1:c.%sdel' % coding in coding_descr
def test_import_mapview(hg19):
original_count = TranscriptMapping.query.count()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment