Skip to content
Snippets Groups Projects
Commit 03bc99a3 authored by Vermaat's avatar Vermaat
Browse files

Merge pull request #96 from mutalyzer/link-versions

Optionally include versions in transcript-protein links
parents b80ac8ec 2d1771a5
No related branches found
No related tags found
No related merge requests found
......@@ -3,15 +3,14 @@ Communication with the NCBI.
"""
import functools
from Bio import Entrez
from .config import settings
from .redisclient import client as redis
def _get_link(source_accession, source_db, target_db, match_link_name):
def _get_link(source_accession, source_db, target_db, match_link_name,
source_version=None, match_version=True):
"""
Retrieve a linked accession number from the NCBI.
......@@ -22,88 +21,142 @@ def _get_link(source_accession, source_db, target_db, match_link_name):
:arg function match_link_name: For each link found, this function is
called with the link name (`str`) and it should return `True` iff the
link is to be used.
:returns: Linked accession number (without version number) or `None` if no
link can be found.
:rtype: str
:arg int source_version: Optional version number for `source_accession`.
:arg bool match_version: If `False`, the link does not have to match
`source_version`.
:returns: Tuple of `(target_accession, target_version)` representing the
link target, or `None` if no link can be found. If `source_version` is
not specified or `match_version` is `False`, `target_version` can be
`None`.
:rtype: tuple(str, int)
"""
Entrez.email = settings.EMAIL
handle = Entrez.esearch(db=source_db, term=source_accession)
# If we are currently strictly matching on version, we can try again if
# no result is found. Otherwise, we just report failure.
def fail_or_retry():
if source_version is None or match_version:
return None
return _get_link(source_accession, source_db, target_db,
match_link_name, source_version=None,
match_version=False)
if source_version is None:
source = source_accession
else:
source = '%s.%d' % (source_accession, source_version)
# Find source record.
handle = Entrez.esearch(db=source_db, term=source)
try:
result = Entrez.read(handle)
except Entrez.Parser.ValidationError:
return None
return fail_or_retry()
finally:
handle.close()
try:
source_gi = unicode(result['IdList'][0])
except IndexError:
return None
return fail_or_retry()
# Find link from source record to target record.
handle = Entrez.elink(dbfrom=source_db, db=target_db, id=source_gi)
try:
result = Entrez.read(handle)
except Entrez.Parser.ValidationError:
return None
return fail_or_retry()
finally:
handle.close()
if not result[0]['LinkSetDb']:
return None
return fail_or_retry()
for link in result[0]['LinkSetDb']:
if match_link_name(unicode(link['LinkName'])):
target_gi = unicode(link['Link'][0]['Id'])
break
else:
return None
return fail_or_retry()
# Get target record.
handle = Entrez.efetch(
db=target_db, id=target_gi, rettype='acc', retmode='text')
target_accession = unicode(handle.read()).split('.')[0]
target = unicode(handle.read()).strip().split('.')
handle.close()
return target_accession
target_accession = target[0]
target_version = int(target[1]) if source_version is not None else None
return target_accession, target_version
def cache_link(source, target):
"""
Decorator to add caching to link retrieval.
:arg str source: Source database (used to construct cache key).
:arg str target: Target database (used to construct cache key).
def _get_link_cached(forward_key, reverse_key, source_accession, source_db,
target_db, match_link_name, source_version=None,
match_version=True):
"""
forward_key = 'ncbi:%s-to-%s:%%s' % (source, target)
reverse_key = 'ncbi:%s-to-%s:%%s' % (target, source)
Version of :func:`_get_link` with caching.
def cache_source_to_target(f):
@functools.wraps(f)
def cached_f(accession):
result = redis.get(forward_key % accession)
if result is not None:
# The empty string is a cached negative result, which we return as
# `None`.
return result or None
:arg str forward_key: Cache key format string for the forward direction.
The source term will be substituted in this template.
:arg str reverse_key: Cache key format string for the reverse direction.
The target term will be substituted in this template.
result = f(accession)
if result is None:
redis.setex(forward_key % accession,
settings.NEGATIVE_LINK_CACHE_EXPIRATION, '')
return None
The cache value for a negative result (no link found) is the empty string
and expires in `NEGATIVE_LINK_CACHE_EXPIRATION` seconds.
"""
if source_version is not None:
# Query cache for link with version.
target = redis.get(forward_key %
('%s.%d' % (source_accession, source_version)))
if target == '':
return None
if target:
target_accession, target_version = target.split('.')
return target_accession, int(target_version)
if source_version is None or not match_version:
# Query cache for link without version.
target = redis.get(forward_key % source_accession)
if target == '':
return None
if target is not None:
return target, None
# Query NCBI service.
try:
target_accession, target_version = _get_link(
source_accession, source_db, target_db, match_link_name,
source_version=source_version, match_version=match_version)
except TypeError:
# No link was found.
if source_version is not None:
# Store a negative forward link with version.
redis.setex(forward_key %
('%s.%d' % (source_accession, source_version)),
settings.NEGATIVE_LINK_CACHE_EXPIRATION, '')
if source_version is None or not match_version:
# Store a negative forward link without version.
redis.setex(forward_key % source_accession,
settings.NEGATIVE_LINK_CACHE_EXPIRATION, '')
return None
# We store the resulting link in both directions.
redis.set(forward_key % accession, result)
redis.set(reverse_key % result, accession)
return result
# Store the link without version in both directions.
redis.set(forward_key % source_accession, target_accession)
redis.set(reverse_key % target_accession, source_accession)
return cached_f
if source_version is not None and target_version is not None:
# Store the link with version in both directions.
redis.set(forward_key % ('%s.%d' % (source_accession, source_version)),
'%s.%d' % (target_accession, target_version))
redis.set(reverse_key % ('%s.%d' % (target_accession, target_version)),
'%s.%d' % (source_accession, source_version))
return cache_source_to_target
return target_accession, target_version
@cache_link('transcript', 'protein')
def transcript_to_protein(transcript_accession):
def transcript_to_protein(transcript_accession, transcript_version=None,
match_version=True):
"""
Try to find the protein linked to a transcript.
......@@ -113,18 +166,26 @@ def transcript_to_protein(transcript_accession):
:arg str transcript_accession: Accession number of the transcript for
which we want to find the protein (without version number).
:returns: Accession number of a protein (without version number) or `None`
if no link can be found.
:rtype: str
:arg int transcript_version: Transcript version number. Please provide
this if available, also if it does not need to match. This will enrich
the cache.
:arg bool match_version: If `False`, the link does not have to match
`transcript_version`.
:returns: Tuple of `(protein_accession, protein_version)` representing the
linked protein, or `None` if no link can be found. If `match_version` is
`False`, `protein_version` can be `None`. TODO: can or will?
:rtype: tuple(str, int)
"""
return _get_link(
return _get_link_cached(
'ncbi:transcript-to-protein:%s', 'ncbi:protein-to-transcript:%s',
transcript_accession, 'nucleotide', 'protein',
lambda link: link in ('nuccore_protein', 'nuccore_protein_cds'))
lambda link: link in ('nuccore_protein', 'nuccore_protein_cds'),
source_version=transcript_version, match_version=match_version)
@cache_link('protein', 'transcript')
def protein_to_transcript(protein_accession):
def protein_to_transcript(protein_accession, protein_version=None,
match_version=True):
"""
Try to find the transcript linked to a protein.
......@@ -134,11 +195,14 @@ def protein_to_transcript(protein_accession):
:arg str protein_accession: Accession number of the protein for which we
want to find the transcript (without version number).
TODO
:returns: Accession number of a transcript (without version number) or
`None` if no link can be found.
:rtype: str
"""
return _get_link(
return _get_link_cached(
'ncbi:protein-to-transcript:%s', 'ncbi:transcript-to-protein:%s',
protein_accession, 'protein', 'nucleotide',
lambda link: link == 'protein_nuccore_mrna')
lambda link: link == 'protein_nuccore_mrna',
source_version=protein_version, match_version=match_version)
......@@ -220,8 +220,14 @@ class GBparser():
i.proteinLink = i.protein_id.split('.')[0]
#if
else : # Tag an mRNA with the protein id too.
i.proteinLink = \
ncbi.transcript_to_protein(i.transcript_id.split('.')[0])
accession, version = i.transcript_id.split('.')
protein = ncbi.transcript_to_protein(
accession, int(version), match_version=False)
if protein is None:
i.proteinLink = None
else:
# We ignore the version.
i.proteinLink = protein[0]
i.positionList = self.__locationList2posList(i)
i.location = self.__location2pos(i.location) #FIXME
#if not i.positionList : # FIXME ???
......
......@@ -73,6 +73,35 @@ def available_references():
return yaml.safe_load(f)
def _add_links(settings, links):
"""
Add transcript-protein links to the cache.
"""
for transcript, protein in links:
if transcript is not None:
key = 'ncbi:transcript-to-protein:%s' % transcript
if protein is not None:
redis.set(key, protein)
if '.' in transcript:
key = key.rsplit('.', 1)[0]
redis.set(key, protein.rsplit('.', 1)[0])
else:
redis.setex(key,
settings.NEGATIVE_LINK_CACHE_EXPIRATION,
'')
if protein is not None:
key = 'ncbi:protein-to-transcript:%s' % protein
if transcript is not None:
redis.set(key, transcript)
if '.' in protein:
key = key.rsplit('.', 1)[0]
redis.set(key, transcript.rsplit('.', 1)[0])
else:
redis.setex(key,
settings.NEGATIVE_LINK_CACHE_EXPIRATION,
'')
@pytest.fixture
def references(request, settings, db, available_references):
try:
......@@ -99,23 +128,7 @@ def references(request, settings, db, available_references):
references.append(Reference(
accession, entry['checksum'], geninfo_identifier=geninfo_id))
for transcript_accession, protein_accession in entry.get('links', []):
if transcript_accession is not None:
key = 'ncbi:transcript-to-protein:%s' % transcript_accession
if protein_accession is not None:
redis.set(key, protein_accession)
else:
redis.setex(key,
settings.NEGATIVE_LINK_CACHE_EXPIRATION,
'')
if protein_accession is not None:
key = 'ncbi:protein-to-transcript:%s' % protein_accession
if transcript_accession is not None:
redis.set(key, transcript_accession)
else:
redis.setex(key,
settings.NEGATIVE_LINK_CACHE_EXPIRATION,
'')
_add_links(settings, entry.get('links', []))
db.session.add_all(references)
db.session.commit()
......@@ -123,6 +136,17 @@ def references(request, settings, db, available_references):
return references
@pytest.fixture
def links(request, settings, db, available_references):
try:
links = request.param
except AttributeError:
return []
_add_links(settings, links)
return links
@pytest.fixture
def hg19(db):
"""
......@@ -573,3 +597,63 @@ def hg19_transcript_mappings(db, hg19):
version=3))
db.session.commit()
def with_references(*references):
"""
Convenience decorator for parameterizing tests with reference fixtures.
Allows us to write:
@with_references('NM_004006.1', 'NM_004006.2')
def test_references():
pass
Instead of:
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references',
[['NM_004006.1', 'NM_004006.2']],
ids=['NM_004006.1,NM_004006.2'],
indirect=True)
def test_references():
pass
"""
def test_with_references(test):
return pytest.mark.usefixtures('references')(
pytest.mark.parametrize('references', [references], indirect=True,
ids=[','.join(references)])(test))
return test_with_references
def with_links(*links):
"""
Convenience decorator for parameterizing tests with transcript-protein
link fixtures.
Allows us to write:
@with_links(('NM_018650', 'NP_061120'), ('NM_027221', None))
def test_links():
pass
Instead of:
@pytest.mark.usefixtures('links')
@pytest.mark.parametrize('links',
[('NM_018650', 'NP_061120'),
('NM_027221', None)],
ids=['NM_018650/NP_061120,NM_027221/*'],
indirect=True)
def test_links():
pass
"""
def test_with_links(test):
return pytest.mark.usefixtures('links')(
pytest.mark.parametrize(
'links', [links], indirect=True,
ids=[','.join('/'.join(a or '*' for a in l)
for l in links)])(test))
return test_with_links
......@@ -5,33 +5,394 @@ Tests for the mutalyzer.ncbi module.
from __future__ import unicode_literals
import Bio.Entrez
import pytest
from mutalyzer import ncbi
from mutalyzer.redisclient import client as redis
from fixtures import with_links
pytestmark = [
pytest.mark.usefixtures('references'),
pytest.mark.parametrize('references', [['MARK1']], indirect=True)
]
@pytest.fixture
def entrez(request, monkeypatch):
"""
Fixture monkey-patching the NCBI Entrez API to return transcript-protein
links defined in the fixture parameter.
def test_transcript_to_protein():
The fixture is similar to the :func:`fixtures.links` fixture, but instead
of storing the links in the cache, the API is monkey-patched.
"""
Get protein for transcript.
try:
links = request.param
except AttributeError:
return []
# We need two-way lookup.
transcript_to_protein = dict(links)
protein_to_transcript = dict((p, t) for t, p in links)
# Store original methods which should be called as a fallback.
esearch = Bio.Entrez.esearch
elink = Bio.Entrez.elink
efetch = Bio.Entrez.efetch
# Intermediate Entrez result object which can be parsed with Entrez.read.
class EntrezResult(object):
def __init__(self, result):
self.result = result
def read(self):
return self.result
def close(self):
pass
def mock_esearch(db=None, term=None):
if ((db == 'nucleotide' and term in transcript_to_protein)
or (db == 'protein' and term in protein_to_transcript)):
return EntrezResult({
'Count': '1',
'RetMax': '1',
'IdList': [term],
'TranslationSet': [],
'RetStart': '0',
'QueryTranslation': ''
})
return esearch(db=db, term=term)
def mock_elink(dbfrom=None, db=None, id=None):
if dbfrom == 'nucleotide' and id in transcript_to_protein:
if transcript_to_protein[id] is None:
linkset = []
else:
linkset = [{'DbTo': 'protein',
'Link': [{'Id': transcript_to_protein[id]}],
'LinkName': 'nuccore_protein'}]
return EntrezResult([{
'LinkSetDb': linkset,
'DbFrom': 'nuccore',
'IdList': [id],
'LinkSetDbHistory': [],
'ERROR': []
}])
if dbfrom == 'protein' and id in protein_to_transcript:
if protein_to_transcript[id] is None:
linkset = []
else:
linkset = [{'DbTo': 'nuccore',
'Link': [{'Id': '568815587'},
{'Id': '528476600'},
{'Id': '568815270'},
{'Id': '528474155'},
{'Id': '452415518'},
{'Id': '452405284'},
{'Id': '383209650'}],
'LinkName': 'protein_nuccore'},
{'DbTo': 'nuccore',
'Link': [{'Id': '4506864'}],
'LinkName': 'protein_nuccore_cds'},
{'DbTo': 'nuccore',
'Link': [{'Id': '48735311'},
{'Id': '48734961'},
{'Id': '47682402'},
{'Id': '18490203'},
{'Id': '16359050'},
{'Id': '16306997'},
{'Id': '15929518'},
{'Id': '15214938'},
{'Id': '13528941'}],
'LinkName': 'protein_nuccore_mgc_refseq'},
{'DbTo': 'nuccore',
'Link': [{'Id': protein_to_transcript[id]}],
'LinkName': 'protein_nuccore_mrna'}]
return EntrezResult([{
'LinkSetDb': linkset,
'DbFrom': 'protein',
'IdList': [id],
'LinkSetDbHistory': [],
'ERROR': []
}])
return elink(dbfrom=dbfrom, db=db, id=id)
def mock_efetch(db=None, id=None, rettype=None, retmode=None):
if ((db == 'nucleotide' and id in transcript_to_protein)
or (db == 'protein' and id in protein_to_transcript)):
if '.' not in id:
id += '.9999'
return EntrezResult(id + '\n')
return efetch(db=db, id=id, rettype=rettype, retmode=retmode)
def mock_read(result):
return result.read()
monkeypatch.setattr(Bio.Entrez, 'esearch', mock_esearch)
monkeypatch.setattr(Bio.Entrez, 'elink', mock_elink)
monkeypatch.setattr(Bio.Entrez, 'efetch', mock_efetch)
monkeypatch.setattr(Bio.Entrez, 'read', mock_read)
return links
def with_entrez(*links):
"""
Convenience decorator for parameterizing tests with transcript-protein
link fixtures in the Entrez API.
Similar to :func:`fixtures.with_links`.
"""
assert ncbi.transcript_to_protein('NM_018650') == 'NP_061120'
def test_with_entrez(test):
return pytest.mark.usefixtures('entrez')(
pytest.mark.parametrize(
'entrez', [links], indirect=True,
ids=[','.join('/'.join(a or '*' for a in l)
for l in links)])(test))
return test_with_entrez
def test_transcript_to_protein_negative():
@with_entrez(('NM_11111.1', None),
('NM_11111.2', 'NP_11111.2'),
('NM_22222.2', None),
('NM_22222.3', 'NP_22222.3'),
('NM_33333.4', None),
('NM_33333.5', 'NP_33333.5'),
('NM_44444', None),
('NM_44444.5', None),
('NM_44444.6', None),
('NM_55555', 'NP_55555'),
('NM_55555.6', None),
('NM_66666', 'NP_66666'),
('NM_66666.6', 'NP_66666.6'),
('NM_66666.7', 'NP_66666.7'),
('NM_66666.8', None),
('NM_77777', 'NP_77777'),
('NM_77777.7', 'NP_77777.7'),
('NM_77777.8', None),
('NM_88888', None),
('NM_88888.8', None),
('NM_88888.9', 'NP_88888.9'))
@with_links(('NM_11111', 'NP_11111'),
('NM_22222', None),
('NM_33333.3', 'NP_33333.3'),
('NM_44444.4', None),
('NM_55555.5', None),
('NM_66666.6', None))
@pytest.mark.parametrize('accession,version,match_version,expected', [
('NM_11111', None, False, ('NP_11111', None)),
('NM_11111', 1, False, ('NP_11111', None)),
('NM_11111', 1, True, None),
('NM_11111', 2, False, ('NP_11111', None)),
('NM_11111', 2, True, ('NP_11111', 2)),
('NM_22222', None, False, None),
('NM_22222', 2, False, None),
('NM_22222', 2, True, None),
('NM_22222', 3, False, None),
('NM_22222', 3, True, ('NP_22222', 3)),
('NM_33333', None, False, ('NP_33333', None)),
('NM_33333', 3, True, ('NP_33333', 3)),
('NM_33333', 3, False, ('NP_33333', 3)),
('NM_33333', 4, True, None),
('NM_33333', 4, False, ('NP_33333', None)),
('NM_33333', 5, True, ('NP_33333', 5)),
('NM_33333', 5, False, ('NP_33333', None)),
('NM_44444', None, False, None),
('NM_44444', 4, True, None),
('NM_44444', 4, False, None),
('NM_44444', 5, True, None),
('NM_44444', 5, False, None),
('NM_44444', 6, True, None),
('NM_44444', 6, False, None),
('NM_55555', None, False, ('NP_55555', None)),
('NM_55555', 5, True, None),
('NM_55555', 5, False, None),
('NM_55555', 6, True, None),
('NM_55555', 6, False, ('NP_55555', None)),
('NM_66666', None, False, ('NP_66666', None)),
('NM_66666', 6, True, None),
('NM_66666', 6, False, None),
('NM_66666', 7, True, ('NP_66666', 7)),
('NM_66666', 7, False, ('NP_66666', 7)),
('NM_66666', 8, True, None),
('NM_66666', 8, False, ('NP_66666', None)),
('NM_77777', None, False, ('NP_77777', None)),
('NM_77777', 7, False, ('NP_77777', 7)),
('NM_77777', 7, True, ('NP_77777', 7)),
('NM_77777', 8, False, ('NP_77777', None)),
('NM_77777', 8, True, None),
('NM_88888', None, False, None),
('NM_88888', 8, False, None),
('NM_88888', 8, True, None),
('NM_88888', 9, False, ('NP_88888', 9)),
('NM_88888', 9, True, ('NP_88888', 9))])
def test_transcript_to_protein(accession, version, match_version, expected):
"""
Get no protein for transcript.
Get protein for transcript.
Both the Entrez API and our cache are fixed with a set of
transcript-protein links. This test is parametrized with a list of
arguments for the :func:`ncbi.transcript_to_protein` function and the
corresponding expected result.
"""
assert ncbi.transcript_to_protein('XM_005273133') is None
assert ncbi.transcript_to_protein(
accession, version, match_version) == expected
def test_protein_to_transcript():
@with_entrez((None, 'NP_11111.1'),
('NM_11111.2', 'NP_11111.2'),
(None, 'NP_22222.2'),
('NM_22222.3', 'NP_22222.3'),
(None, 'NP_33333.4'),
('NM_33333.5', 'NP_33333.5'),
(None, 'NP_44444'),
(None, 'NP_44444.5'),
(None, 'NP_44444.6'),
('NM_55555', 'NP_55555'),
(None, 'NP_55555.6'),
('NM_66666', 'NP_66666'),
('NM_66666.6', 'NP_66666.6'),
('NM_66666.7', 'NP_66666.7'),
(None, 'NP_66666.8'),
('NM_77777', 'NP_77777'),
('NM_77777.7', 'NP_77777.7'),
(None, 'NP_77777.8'),
(None, 'NP_88888'),
(None, 'NP_88888.8'),
('NM_88888.9', 'NP_88888.9'))
@with_links(('NM_11111', 'NP_11111'),
(None, 'NP_22222'),
('NM_33333.3', 'NP_33333.3'),
(None, 'NP_44444.4'),
(None, 'NP_55555.5'),
(None, 'NP_66666.6'))
@pytest.mark.parametrize('accession,version,match_version,expected', [
('NP_11111', None, False, ('NM_11111', None)),
('NP_11111', 1, False, ('NM_11111', None)),
('NP_11111', 1, True, None),
('NP_11111', 2, False, ('NM_11111', None)),
('NP_11111', 2, True, ('NM_11111', 2)),
('NP_22222', None, False, None),
('NP_22222', 2, False, None),
('NP_22222', 2, True, None),
('NP_22222', 3, False, None),
('NP_22222', 3, True, ('NM_22222', 3)),
('NP_33333', None, False, ('NM_33333', None)),
('NP_33333', 3, True, ('NM_33333', 3)),
('NP_33333', 3, False, ('NM_33333', 3)),
('NP_33333', 4, True, None),
('NP_33333', 4, False, ('NM_33333', None)),
('NP_33333', 5, True, ('NM_33333', 5)),
('NP_33333', 5, False, ('NM_33333', None)),
('NP_44444', None, False, None),
('NP_44444', 4, True, None),
('NP_44444', 4, False, None),
('NP_44444', 5, True, None),
('NP_44444', 5, False, None),
('NP_44444', 6, True, None),
('NP_44444', 6, False, None),
('NP_55555', None, False, ('NM_55555', None)),
('NP_55555', 5, True, None),
('NP_55555', 5, False, None),
('NP_55555', 6, True, None),
('NP_55555', 6, False, ('NM_55555', None)),
('NP_66666', None, False, ('NM_66666', None)),
('NP_66666', 6, True, None),
('NP_66666', 6, False, None),
('NP_66666', 7, True, ('NM_66666', 7)),
('NP_66666', 7, False, ('NM_66666', 7)),
('NP_66666', 8, True, None),
('NP_66666', 8, False, ('NM_66666', None)),
('NP_77777', None, False, ('NM_77777', None)),
('NP_77777', 7, False, ('NM_77777', 7)),
('NP_77777', 7, True, ('NM_77777', 7)),
('NP_77777', 8, False, ('NM_77777', None)),
('NP_77777', 8, True, None),
('NP_88888', None, False, None),
('NP_88888', 8, False, None),
('NP_88888', 8, True, None),
('NP_88888', 9, False, ('NM_88888', 9)),
('NP_88888', 9, True, ('NM_88888', 9))])
def test_protein_to_transcript(accession, version, match_version, expected):
"""
Get transcript for protein.
Both the Entrez API and our cache are fixed with a set of
transcript-protein links. This test is parametrized with a list of
arguments for the :func:`ncbi.transcript_to_protein` function and the
corresponding expected result.
Fixtures and parameters of this test mirror those of the
`test_transcript_to_protein` test.
"""
assert ncbi.protein_to_transcript(
accession, version, match_version) == expected
@with_entrez(('NM_11111', None),
('NM_22222', 'NP_22222'),
('NM_33333', None),
('NM_33333.3', None),
('NM_44444', None),
('NM_44444.4', 'NP_44444.4'))
@pytest.mark.parametrize('accession,version,match_version,expected_forward,expected_reverse', [
('NM_11111', None, False, [('NM_11111', None)], []),
('NM_22222', None, False,
[('NM_22222', 'NP_22222')], [('NM_22222', 'NP_22222')]),
('NM_33333', None, False, [('NM_33333', None)], []),
('NM_33333', 3, False, [('NM_33333', None), ('NM_33333.3', None)], []),
('NM_33333', 3, True, [('NM_33333.3', None)], []),
('NM_44444', None, False, [('NM_44444', None)], []),
('NM_44444', 4, False,
[('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')],
[('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')]),
('NM_44444', 4, True,
[('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')],
[('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')])])
def test_transcript_to_protein_cache(accession, version, match_version,
expected_forward, expected_reverse):
"""
Get protein for transcript and check the resulting cache state.
"""
assert ncbi.protein_to_transcript('NP_061120') == 'NM_018650'
ncbi.transcript_to_protein(accession, version, match_version)
forward = [(key.split(':')[-1], redis.get(key) or None)
for key in redis.keys('ncbi:transcript-to-protein:*')]
assert sorted(forward) == sorted(expected_forward)
reverse = [(redis.get(key) or None, key.split(':')[-1])
for key in redis.keys('ncbi:protein-to-transcript:*')]
assert sorted(reverse) == sorted(expected_reverse)
@with_entrez((None, 'NP_11111'),
('NM_22222', 'NP_22222'),
(None, 'NP_33333'),
(None, 'NP_33333.3'),
(None, 'NP_44444'),
('NM_44444.4', 'NP_44444.4'))
@pytest.mark.parametrize('accession,version,match_version,expected_forward,expected_reverse', [
('NP_11111', None, False, [], [(None, 'NP_11111')]),
('NP_22222', None, False,
[('NM_22222', 'NP_22222')], [('NM_22222', 'NP_22222')]),
('NP_33333', None, False, [], [(None, 'NP_33333')]),
('NP_33333', 3, False, [], [(None, 'NP_33333'), (None, 'NP_33333.3')]),
('NP_33333', 3, True, [], [(None, 'NP_33333.3')]),
('NP_44444', None, False, [], [(None, 'NP_44444')]),
('NP_44444', 4, False,
[('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')],
[('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')]),
('NP_44444', 4, True,
[('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')],
[('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')])])
def test_protein_to_transcript_cache(accession, version, match_version,
expected_forward, expected_reverse):
"""
Get transcript for protein and check the resulting cache state.
"""
ncbi.protein_to_transcript(accession, version, match_version)
forward = [(key.split(':')[-1], redis.get(key) or None)
for key in redis.keys('ncbi:transcript-to-protein:*')]
assert sorted(forward) == sorted(expected_forward)
reverse = [(redis.get(key) or None, key.split(':')[-1])
for key in redis.keys('ncbi:protein-to-transcript:*')]
assert sorted(reverse) == sorted(expected_reverse)
......@@ -11,6 +11,8 @@ import pytest
from mutalyzer.parsers.genbank import GBparser
from fixtures import with_references
@pytest.fixture
def parser():
......@@ -35,7 +37,7 @@ def test_product_lists_mismatch(parser, products, expected):
assert parser._find_mismatch(products) == expected
@pytest.mark.parametrize('references', [['A1BG']], indirect=True)
@with_references('A1BG')
def test_only_complete_genes_included(settings, references, parser):
"""
Incomplete genes from the reference file should be ignored.
......
......@@ -19,6 +19,8 @@ from mutalyzer import File
from mutalyzer import output
from mutalyzer import Scheduler
from fixtures import with_references
pytestmark = pytest.mark.usefixtures('db')
......@@ -85,9 +87,7 @@ def test_large_input():
_batch_job_plain_text(variants, expected, 'syntax-checker')
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['AB026906.1', 'NM_000059.3']],
indirect=True)
@with_references('AB026906.1', 'NM_000059.3')
def test_name_checker():
"""
Simple name checker batch job.
......@@ -212,8 +212,7 @@ def test_name_checker_altered():
_batch_job_plain_text(variants, expected, 'name-checker')
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NM_000059.3']], indirect=True)
@with_references('NM_000059.3')
def test_name_checker_skipped():
"""
Name checker job with skipped entries.
......
......@@ -20,6 +20,8 @@ import mutalyzer
from mutalyzer.services.soap import application
from mutalyzer import Scheduler
from fixtures import with_references
@pytest.fixture
def server():
......@@ -188,8 +190,7 @@ def test_gettranscriptsbygenename_invalid(api):
assert not r
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['AF230870.1']], indirect=True)
@with_references('AF230870.1')
def test_gettranscriptsandinfo_valid(api):
"""
Running getTranscriptsAndInfo with a valid genomic reference should
......@@ -203,8 +204,7 @@ def test_gettranscriptsandinfo_valid(api):
assert t in names
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True)
@with_references('AL449423.14')
def test_gettranscriptsandinfo_restricted_valid(api):
"""
Running getTranscriptsAndInfo with a valid genomic reference and a
......@@ -332,9 +332,7 @@ def test_info(api):
assert r.version == mutalyzer.__version__
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize(
'references', [['AB026906.1', 'AL449423.14', 'NM_003002.2']], indirect=True)
@with_references('AB026906.1', 'AL449423.14', 'NM_003002.2')
def test_getcache(output, api):
"""
Running the getCache method should give us the expected number of
......@@ -393,8 +391,7 @@ def test_gettranscripts_with_versions(api):
assert t in r.string
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True)
@with_references('NM_003002.2')
def test_runmutalyzer(api):
"""
Just a runMutalyzer test.
......@@ -432,8 +429,7 @@ def test_runmutalyzer_reference_info_nm(api):
assert r.molecule == 'n'
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True)
@with_references('NM_003002.2')
def test_runmutalyzer_reference_info_nm_version(api):
"""
Get reference info for an NM variant with version.
......@@ -448,8 +444,7 @@ def test_runmutalyzer_reference_info_nm_version(api):
assert r.molecule == 'n'
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['LRG_1']], indirect=True)
@with_references('LRG_1')
def test_runmutalyzer_reference_info_lrg(api):
"""
Get reference info for an LRG variant.
......@@ -461,8 +456,7 @@ def test_runmutalyzer_reference_info_lrg(api):
assert r.molecule == 'g'
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True)
@with_references('NG_012772.1')
def test_runmutalyzer_reference_info_ng(api):
"""
Get reference info for an NG variant without version.
......@@ -489,8 +483,7 @@ def test_runmutalyzer_reference_info_ng(api):
assert r.molecule == 'g'
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NG_009105.1']], indirect=True)
@with_references('NG_009105.1')
def test_runmutalyzer_reference_info_ng_version(api):
"""
Get reference info for an NG variant with version.
......@@ -505,8 +498,7 @@ def test_runmutalyzer_reference_info_ng_version(api):
assert r.molecule == 'g'
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True)
@with_references('NG_012772.1')
def test_runmutalyzer_reference_info_gi(api):
"""
Get reference info for a GI variant.
......@@ -521,8 +513,7 @@ def test_runmutalyzer_reference_info_gi(api):
assert r.molecule == 'g'
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NM_000143.3']], indirect=True)
@with_references('NM_000143.3')
def test_runmutalyzer_exons(api):
"""
Exon table in runMutalyzer output.
......@@ -544,10 +535,7 @@ def test_runmutalyzer_exons(api):
assert (exon.gStart, exon.gStop, exon.cStart, exon.cStop) == expected_exon
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize(
'references', [['AB026906.1', 'NM_003002.2', 'AL449423.14']],
indirect=True)
@with_references('AB026906.1', 'NM_003002.2', 'AL449423.14')
def test_batchjob(api):
"""
Submit a batch job.
......
This diff is collapsed.
......@@ -18,6 +18,8 @@ from mutalyzer import announce, Scheduler
from mutalyzer.db.models import BatchJob
from mutalyzer.website import create_app
from fixtures import with_references
# TODO: Tests for /upload.
......@@ -125,9 +127,7 @@ def test_description_extractor_raw_fastq(website):
assert '[5_6insTT;17del;26A>C;35dup]' in r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize(
'references', [['NM_004006.1', 'NM_004006.2']], indirect=True)
@with_references('NM_004006.1', 'NM_004006.2')
def test_description_extractor_refseq(website):
"""
Submit two accession numbers to the variant description extractor.
......@@ -247,8 +247,7 @@ def test_checksyntax_invalid(website):
assert 'The "^" indicates the position where the error occurred' in r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True)
@with_references('NM_002001.2')
def test_check_valid(website):
"""
Submit the name checker form with a valid variant.
......@@ -273,8 +272,7 @@ def test_check_invalid(website):
assert 'The "^" indicates the position where the error occurred' in r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NP_064445.1']], indirect=True)
@with_references('NP_064445.1')
def test_check_protein_reference(website):
"""
Submit the name checker form with a protein reference sequence (not
......@@ -287,8 +285,7 @@ def test_check_protein_reference(website):
assert 'Protein reference sequences are not supported' in r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True)
@with_references('NM_002001.2')
def test_check_noninteractive(website):
"""
Submit the name checker form non-interactively.
......@@ -304,8 +301,7 @@ def test_check_noninteractive(website):
assert 'Raw variant 1: deletion of 1' in r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True)
@with_references('NG_012772.1')
def test_check_interactive_links(website):
"""
Submitting interactively should have links to transcripts also
......@@ -427,10 +423,7 @@ def _batch(website, job_type='name-checker', assembly_name_or_alias=None,
return r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize(
'references', [['AB026906.1', 'NM_003002.2', 'AL449423.14']],
indirect=True)
@with_references('AB026906.1', 'NM_003002.2', 'AL449423.14')
def test_batch_namechecker(website):
"""
Submit the batch name checker form.
......@@ -610,8 +603,7 @@ def test_batch_syntaxchecker_oldstyle(website):
header='Input\tStatus')
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['AB026906.1']], indirect=True)
@with_references('AB026906.1')
def test_batch_namechecker_restriction_sites(website):
"""
Submit the batch name checker form and see if restriction site effects
......@@ -703,8 +695,7 @@ def test_annotated_soap_api(website):
assert 'Web Service: Mutalyzer' in r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NG_012337.1']], indirect=True)
@with_references('NG_012337.1')
def test_getgs(website):
"""
Test the /getGS interface used by LOVD2.
......@@ -721,8 +712,7 @@ def test_getgs(website):
assert '<input' not in r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NG_012337.1']], indirect=True)
@with_references('NG_012337.1')
def test_getgs_coding_multiple_transcripts(website):
"""
Test the /getGS interface on a coding description and genomic
......@@ -737,8 +727,7 @@ def test_getgs_coding_multiple_transcripts(website):
assert 'description=NG_012337.1' in r.location
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True)
@with_references('NG_008939.1')
def test_getgs_variant_error(website):
"""
Test the /getGS interface on a variant description with an error.
......@@ -861,8 +850,7 @@ def test_upload_local_file_invalid(website):
assert 'The file could not be parsed.' in r.data
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True)
@with_references('NM_002001.2')
def test_reference(website):
"""
Test if reference files are cached.
......@@ -878,8 +866,7 @@ def test_reference(website):
assert r.data == bz2.BZ2File(path).read()
@pytest.mark.usefixtures('references')
@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True)
@with_references('NM_002001.2')
def test_reference_head(website):
"""
Test if reference files are cached, by issuing a HEAD request.
......@@ -901,8 +888,8 @@ def test_reference_head_none(website):
assert r.status_code == 404
@pytest.mark.usefixtures('references', 'hg19_transcript_mappings')
@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True)
@pytest.mark.usefixtures('hg19_transcript_mappings')
@with_references('NM_003002.2')
def test_bed(website):
"""
BED track for variant.
......@@ -913,8 +900,8 @@ def test_bed(website):
assert '\t'.join(['chr11', '111959694', '111959695', '274G>T', '0', '+']) in r.data
@pytest.mark.usefixtures('references', 'hg19_transcript_mappings')
@pytest.mark.parametrize('references', [['NM_000132.3']], indirect=True)
@pytest.mark.usefixtures('hg19_transcript_mappings')
@with_references('NM_000132.3')
def test_bed_reverse(website):
"""
BED track for variant on reverse strand.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment