""" Tests for the mutalyzer.ncbi module. """ from __future__ import unicode_literals import Bio.Entrez import pytest from mutalyzer import ncbi from mutalyzer.redisclient import client as redis from fixtures import with_links @pytest.fixture def entrez(request, monkeypatch): """ Fixture monkey-patching the NCBI Entrez API to return transcript-protein links defined in the fixture parameter. The fixture is similar to the :func:`fixtures.links` fixture, but instead of storing the links in the cache, the API is monkey-patched. """ try: links = request.param except AttributeError: return [] # We need two-way lookup. transcript_to_protein = dict(links) protein_to_transcript = dict((p, t) for t, p in links) # Store original methods which should be called as a fallback. esearch = Bio.Entrez.esearch elink = Bio.Entrez.elink efetch = Bio.Entrez.efetch # Intermediate Entrez result object which can be parsed with Entrez.read. class EntrezResult(object): def __init__(self, result): self.result = result def read(self): return self.result def close(self): pass def mock_esearch(db=None, term=None): if ((db == 'nucleotide' and term in transcript_to_protein) or (db == 'protein' and term in protein_to_transcript)): return EntrezResult({ 'Count': '1', 'RetMax': '1', 'IdList': [term], 'TranslationSet': [], 'RetStart': '0', 'QueryTranslation': '' }) return esearch(db=db, term=term) def mock_elink(dbfrom=None, db=None, id=None): if dbfrom == 'nucleotide' and id in transcript_to_protein: if transcript_to_protein[id] is None: linkset = [] else: linkset = [{'DbTo': 'protein', 'Link': [{'Id': transcript_to_protein[id]}], 'LinkName': 'nuccore_protein'}] return EntrezResult([{ 'LinkSetDb': linkset, 'DbFrom': 'nuccore', 'IdList': [id], 'LinkSetDbHistory': [], 'ERROR': [] }]) if dbfrom == 'protein' and id in protein_to_transcript: if protein_to_transcript[id] is None: linkset = [] else: linkset = [{'DbTo': 'nuccore', 'Link': [{'Id': '568815587'}, {'Id': '528476600'}, {'Id': '568815270'}, {'Id': '528474155'}, {'Id': '452415518'}, {'Id': '452405284'}, {'Id': '383209650'}], 'LinkName': 'protein_nuccore'}, {'DbTo': 'nuccore', 'Link': [{'Id': '4506864'}], 'LinkName': 'protein_nuccore_cds'}, {'DbTo': 'nuccore', 'Link': [{'Id': '48735311'}, {'Id': '48734961'}, {'Id': '47682402'}, {'Id': '18490203'}, {'Id': '16359050'}, {'Id': '16306997'}, {'Id': '15929518'}, {'Id': '15214938'}, {'Id': '13528941'}], 'LinkName': 'protein_nuccore_mgc_refseq'}, {'DbTo': 'nuccore', 'Link': [{'Id': protein_to_transcript[id]}], 'LinkName': 'protein_nuccore_mrna'}] return EntrezResult([{ 'LinkSetDb': linkset, 'DbFrom': 'protein', 'IdList': [id], 'LinkSetDbHistory': [], 'ERROR': [] }]) return elink(dbfrom=dbfrom, db=db, id=id) def mock_efetch(db=None, id=None, rettype=None, retmode=None): if ((db == 'nucleotide' and id in transcript_to_protein) or (db == 'protein' and id in protein_to_transcript)): if '.' not in id: id += '.9999' return EntrezResult(id + '\n') return efetch(db=db, id=id, rettype=rettype, retmode=retmode) def mock_read(result): return result.read() monkeypatch.setattr(Bio.Entrez, 'esearch', mock_esearch) monkeypatch.setattr(Bio.Entrez, 'elink', mock_elink) monkeypatch.setattr(Bio.Entrez, 'efetch', mock_efetch) monkeypatch.setattr(Bio.Entrez, 'read', mock_read) return links def with_entrez(*links): """ Convenience decorator for parameterizing tests with transcript-protein link fixtures in the Entrez API. Similar to :func:`fixtures.with_links`. """ def test_with_entrez(test): return pytest.mark.usefixtures('entrez')( pytest.mark.parametrize( 'entrez', [links], indirect=True, ids=[','.join('/'.join(a or '*' for a in l) for l in links)])(test)) return test_with_entrez @with_entrez(('NM_11111.1', None), ('NM_11111.2', 'NP_11111.2'), ('NM_22222.2', None), ('NM_22222.3', 'NP_22222.3'), ('NM_33333.4', None), ('NM_33333.5', 'NP_33333.5'), ('NM_44444', None), ('NM_44444.5', None), ('NM_44444.6', None), ('NM_55555', 'NP_55555'), ('NM_55555.6', None), ('NM_66666', 'NP_66666'), ('NM_66666.6', 'NP_66666.6'), ('NM_66666.7', 'NP_66666.7'), ('NM_66666.8', None), ('NM_77777', 'NP_77777'), ('NM_77777.7', 'NP_77777.7'), ('NM_77777.8', None), ('NM_88888', None), ('NM_88888.8', None), ('NM_88888.9', 'NP_88888.9')) @with_links(('NM_11111', 'NP_11111'), ('NM_22222', None), ('NM_33333.3', 'NP_33333.3'), ('NM_44444.4', None), ('NM_55555.5', None), ('NM_66666.6', None)) @pytest.mark.parametrize('accession,version,match_version,expected', [ ('NM_11111', None, False, ('NP_11111', None)), ('NM_11111', 1, False, ('NP_11111', None)), ('NM_11111', 1, True, None), ('NM_11111', 2, False, ('NP_11111', None)), ('NM_11111', 2, True, ('NP_11111', 2)), ('NM_22222', None, False, None), ('NM_22222', 2, False, None), ('NM_22222', 2, True, None), ('NM_22222', 3, False, None), ('NM_22222', 3, True, ('NP_22222', 3)), ('NM_33333', None, False, ('NP_33333', None)), ('NM_33333', 3, True, ('NP_33333', 3)), ('NM_33333', 3, False, ('NP_33333', 3)), ('NM_33333', 4, True, None), ('NM_33333', 4, False, ('NP_33333', None)), ('NM_33333', 5, True, ('NP_33333', 5)), ('NM_33333', 5, False, ('NP_33333', None)), ('NM_44444', None, False, None), ('NM_44444', 4, True, None), ('NM_44444', 4, False, None), ('NM_44444', 5, True, None), ('NM_44444', 5, False, None), ('NM_44444', 6, True, None), ('NM_44444', 6, False, None), ('NM_55555', None, False, ('NP_55555', None)), ('NM_55555', 5, True, None), ('NM_55555', 5, False, None), ('NM_55555', 6, True, None), ('NM_55555', 6, False, ('NP_55555', None)), ('NM_66666', None, False, ('NP_66666', None)), ('NM_66666', 6, True, None), ('NM_66666', 6, False, None), ('NM_66666', 7, True, ('NP_66666', 7)), ('NM_66666', 7, False, ('NP_66666', 7)), ('NM_66666', 8, True, None), ('NM_66666', 8, False, ('NP_66666', None)), ('NM_77777', None, False, ('NP_77777', None)), ('NM_77777', 7, False, ('NP_77777', 7)), ('NM_77777', 7, True, ('NP_77777', 7)), ('NM_77777', 8, False, ('NP_77777', None)), ('NM_77777', 8, True, None), ('NM_88888', None, False, None), ('NM_88888', 8, False, None), ('NM_88888', 8, True, None), ('NM_88888', 9, False, ('NP_88888', 9)), ('NM_88888', 9, True, ('NP_88888', 9))]) def test_transcript_to_protein(accession, version, match_version, expected): """ Get protein for transcript. Both the Entrez API and our cache are fixed with a set of transcript-protein links. This test is parametrized with a list of arguments for the :func:`ncbi.transcript_to_protein` function and the corresponding expected result (`None` if `NoLinkError` is expected to be raised). """ if expected is None: with pytest.raises(ncbi.NoLinkError): ncbi.transcript_to_protein(accession, version, match_version) else: assert ncbi.transcript_to_protein( accession, version, match_version) == expected @with_entrez((None, 'NP_11111.1'), ('NM_11111.2', 'NP_11111.2'), (None, 'NP_22222.2'), ('NM_22222.3', 'NP_22222.3'), (None, 'NP_33333.4'), ('NM_33333.5', 'NP_33333.5'), (None, 'NP_44444'), (None, 'NP_44444.5'), (None, 'NP_44444.6'), ('NM_55555', 'NP_55555'), (None, 'NP_55555.6'), ('NM_66666', 'NP_66666'), ('NM_66666.6', 'NP_66666.6'), ('NM_66666.7', 'NP_66666.7'), (None, 'NP_66666.8'), ('NM_77777', 'NP_77777'), ('NM_77777.7', 'NP_77777.7'), (None, 'NP_77777.8'), (None, 'NP_88888'), (None, 'NP_88888.8'), ('NM_88888.9', 'NP_88888.9')) @with_links(('NM_11111', 'NP_11111'), (None, 'NP_22222'), ('NM_33333.3', 'NP_33333.3'), (None, 'NP_44444.4'), (None, 'NP_55555.5'), (None, 'NP_66666.6')) @pytest.mark.parametrize('accession,version,match_version,expected', [ ('NP_11111', None, False, ('NM_11111', None)), ('NP_11111', 1, False, ('NM_11111', None)), ('NP_11111', 1, True, None), ('NP_11111', 2, False, ('NM_11111', None)), ('NP_11111', 2, True, ('NM_11111', 2)), ('NP_22222', None, False, None), ('NP_22222', 2, False, None), ('NP_22222', 2, True, None), ('NP_22222', 3, False, None), ('NP_22222', 3, True, ('NM_22222', 3)), ('NP_33333', None, False, ('NM_33333', None)), ('NP_33333', 3, True, ('NM_33333', 3)), ('NP_33333', 3, False, ('NM_33333', 3)), ('NP_33333', 4, True, None), ('NP_33333', 4, False, ('NM_33333', None)), ('NP_33333', 5, True, ('NM_33333', 5)), ('NP_33333', 5, False, ('NM_33333', None)), ('NP_44444', None, False, None), ('NP_44444', 4, True, None), ('NP_44444', 4, False, None), ('NP_44444', 5, True, None), ('NP_44444', 5, False, None), ('NP_44444', 6, True, None), ('NP_44444', 6, False, None), ('NP_55555', None, False, ('NM_55555', None)), ('NP_55555', 5, True, None), ('NP_55555', 5, False, None), ('NP_55555', 6, True, None), ('NP_55555', 6, False, ('NM_55555', None)), ('NP_66666', None, False, ('NM_66666', None)), ('NP_66666', 6, True, None), ('NP_66666', 6, False, None), ('NP_66666', 7, True, ('NM_66666', 7)), ('NP_66666', 7, False, ('NM_66666', 7)), ('NP_66666', 8, True, None), ('NP_66666', 8, False, ('NM_66666', None)), ('NP_77777', None, False, ('NM_77777', None)), ('NP_77777', 7, False, ('NM_77777', 7)), ('NP_77777', 7, True, ('NM_77777', 7)), ('NP_77777', 8, False, ('NM_77777', None)), ('NP_77777', 8, True, None), ('NP_88888', None, False, None), ('NP_88888', 8, False, None), ('NP_88888', 8, True, None), ('NP_88888', 9, False, ('NM_88888', 9)), ('NP_88888', 9, True, ('NM_88888', 9))]) def test_protein_to_transcript(accession, version, match_version, expected): """ Get transcript for protein. Both the Entrez API and our cache are fixed with a set of transcript-protein links. This test is parametrized with a list of arguments for the :func:`ncbi.transcript_to_protein` function and the corresponding expected result. Fixtures and parameters of this test mirror those of the `test_transcript_to_protein` test. """ if expected is None: with pytest.raises(ncbi.NoLinkError): ncbi.protein_to_transcript(accession, version, match_version) else: assert ncbi.protein_to_transcript( accession, version, match_version) == expected @with_entrez(('NM_11111', None), ('NM_22222', 'NP_22222'), ('NM_33333', None), ('NM_33333.3', None), ('NM_44444', None), ('NM_44444.4', 'NP_44444.4')) @pytest.mark.parametrize('accession,version,match_version,expected_forward,expected_reverse', [ ('NM_11111', None, False, [('NM_11111', None)], []), ('NM_22222', None, False, [('NM_22222', 'NP_22222')], [('NM_22222', 'NP_22222')]), ('NM_33333', None, False, [('NM_33333', None)], []), ('NM_33333', 3, False, [('NM_33333', None), ('NM_33333.3', None)], []), ('NM_33333', 3, True, [('NM_33333.3', None)], []), ('NM_44444', None, False, [('NM_44444', None)], []), ('NM_44444', 4, False, [('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')], [('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')]), ('NM_44444', 4, True, [('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')], [('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')])]) def test_transcript_to_protein_cache(accession, version, match_version, expected_forward, expected_reverse): """ Get protein for transcript and check the resulting cache state. """ try: ncbi.transcript_to_protein(accession, version, match_version) except ncbi.NoLinkError: pass forward = [(key.split(':')[-1], redis.get(key) or None) for key in redis.keys('ncbi:transcript-to-protein:*')] assert sorted(forward) == sorted(expected_forward) reverse = [(redis.get(key) or None, key.split(':')[-1]) for key in redis.keys('ncbi:protein-to-transcript:*')] assert sorted(reverse) == sorted(expected_reverse) @with_entrez((None, 'NP_11111'), ('NM_22222', 'NP_22222'), (None, 'NP_33333'), (None, 'NP_33333.3'), (None, 'NP_44444'), ('NM_44444.4', 'NP_44444.4')) @pytest.mark.parametrize('accession,version,match_version,expected_forward,expected_reverse', [ ('NP_11111', None, False, [], [(None, 'NP_11111')]), ('NP_22222', None, False, [('NM_22222', 'NP_22222')], [('NM_22222', 'NP_22222')]), ('NP_33333', None, False, [], [(None, 'NP_33333')]), ('NP_33333', 3, False, [], [(None, 'NP_33333'), (None, 'NP_33333.3')]), ('NP_33333', 3, True, [], [(None, 'NP_33333.3')]), ('NP_44444', None, False, [], [(None, 'NP_44444')]), ('NP_44444', 4, False, [('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')], [('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')]), ('NP_44444', 4, True, [('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')], [('NM_44444', 'NP_44444'), ('NM_44444.4', 'NP_44444.4')])]) def test_protein_to_transcript_cache(accession, version, match_version, expected_forward, expected_reverse): """ Get transcript for protein and check the resulting cache state. """ try: ncbi.protein_to_transcript(accession, version, match_version) except ncbi.NoLinkError: pass forward = [(key.split(':')[-1], redis.get(key) or None) for key in redis.keys('ncbi:transcript-to-protein:*')] assert sorted(forward) == sorted(expected_forward) reverse = [(redis.get(key) or None, key.split(':')[-1]) for key in redis.keys('ncbi:protein-to-transcript:*')] assert sorted(reverse) == sorted(expected_reverse)