diff --git a/CHANGES.rst b/CHANGES.rst index 96b228613e78d8cc0140019a3b610455662af42b..eaf4adf3145189a50f3a3754a5ab4f1deefbe31a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,6 +9,9 @@ Version 2.0.6 Release date to be decided. +- Added `getGeneLocation` webservice method. Given a gene symbol and optional + genome build, it returns the location of the gene. + Version 2.0.5 ------------- diff --git a/mutalyzer/models.py b/mutalyzer/models.py index bc9bf5a0780a382af267b3973b17b017c6a8ff77..18d934f7dbf6f3ba39fc06eaad9bc0320a67fb03 100644 --- a/mutalyzer/models.py +++ b/mutalyzer/models.py @@ -41,6 +41,23 @@ class SoapMessage(ComplexModel): #SoapMessage +class GeneLocation(ComplexModel): + """ + Return type of SOAP method getGeneLocation. + """ + __namespace__ = SOAP_NAMESPACE + + gene = Mandatory.Unicode + start = Mandatory.Integer + stop = Mandatory.Integer + orientation = Mandatory.Unicode + chromosome_name = Mandatory.Unicode + chromosome_accession = Mandatory.Unicode + assembly_name = Mandatory.Unicode + assembly_alias = Mandatory.Unicode +#GeneLocation + + class Mapping(ComplexModel): """ Return type of SOAP method mappingInfo. diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index c65053587d4e3b7639df78bf6152100cbf39114a..27c3410ba7f969bb6cd4da7f77d2cc5841fa2d26 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -21,12 +21,13 @@ import os import socket from operator import attrgetter from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy.sql import func import mutalyzer from mutalyzer.config import settings from mutalyzer.db import session -from mutalyzer.db.models import (Assembly, BatchJob, BatchQueueItem, - TranscriptMapping) +from mutalyzer.db.models import (Assembly, Chromosome, BatchJob, + BatchQueueItem, TranscriptMapping) from mutalyzer.output import Output from mutalyzer.grammar import Grammar from mutalyzer.sync import CacheSync @@ -1296,6 +1297,87 @@ class MutalyzerService(ServiceBase): return descriptions #getdbSNPDescriptions + + @srpc(Mandatory.Unicode, Unicode, _returns=GeneLocation) + def getGeneLocation(gene, build=None): + """ + Get the location of a gene on the given genome build (assembly), using + the system's transcript mapping database. + + @arg gene: Gene symbol. + @type gene: string + @arg build: Genome build (assembly) by name or alias. If omited, + the system's default assembly is used. + @type build: string + + @return: Object with the following fields: + - gene: Gene symbol. + - start: Gene start position. If multiple transcripts for the gene + are known, this contains the lowest start position. + - stop: Gene stop position. If multiple transcripts for the gene are + known, this contains the highest stop position. + - orientation: Gene orientation, either 'forward' or 'reverse'. + - chromosome_name: Gene chromosome by name (e.g., 'chrX'). + - chromosome_accession: Gene chromosome by accession (e.g., + 'NC_000023.10'). + - assembly_name: Selected genome build (assembly) by name (e.g., + 'GRCh37'). + - assembly_alias: Selected genome build (assembly) by alias (e.g., + 'hg19'). + """ + output = Output(__file__) + + output.addMessage(__file__, -1, 'INFO', + 'Received request getGeneLocation(%s, %s)' + % (gene, build)) + + try: + assembly = Assembly.by_name_or_alias(build or + settings.DEFAULT_ASSEMBLY) + except NoResultFound: + output.addMessage(__file__, 4, "EARG", "EARG %s" % build) + raise Fault("EARG", + "The build argument (%s) was not a valid " \ + "build name." % build) + + # From all the transcripts for this gene, get the lowest start + # position and highest stop position. For integrity, we group by + # chromosome and orientation. + mapping = \ + session.query(func.min(TranscriptMapping.start), + func.max(TranscriptMapping.stop), + TranscriptMapping.orientation, + Chromosome) \ + .filter(TranscriptMapping.chromosome.has(assembly=assembly), + TranscriptMapping.gene == gene) \ + .join(TranscriptMapping.chromosome) \ + .group_by(TranscriptMapping.chromosome, + TranscriptMapping.orientation) \ + .first() + + if not mapping: + output.addMessage(__file__, 4, "EARG", "EARG %s" % gene) + raise Fault("EARG", + "No location was found for gene %s." % gene) + + start, stop, orientation, chromosome = mapping + + result = GeneLocation() + result.gene = gene + result.start = start + result.stop = stop + result.orientation = orientation + result.chromosome_name = chromosome.name + result.chromosome_accession = chromosome.accession + result.assembly_name = assembly.name + result.assembly_alias = assembly.alias + + output.addMessage(__file__, -1, 'INFO', + 'Finished processing getGeneLocation(%s %s)' + % (gene, build)) + + return result + #getGeneLocation #MutalyzerService diff --git a/tests/test_services_json.py b/tests/test_services_json.py index 81833505e36ecee7436bde0f956e579ecd82c00e..5ca81daef7b476993167fec26e1e7d47da2dbb81 100644 --- a/tests/test_services_json.py +++ b/tests/test_services_json.py @@ -5,8 +5,11 @@ Tests for the JSON interface to Mutalyzer. from __future__ import unicode_literals +import pytest import simplejson as json +from spyne.model.fault import Fault from spyne.server.null import NullServer + import mutalyzer from mutalyzer import announce from mutalyzer import Scheduler @@ -142,3 +145,59 @@ class TestServicesJson(MutalyzerTest): result = self._call('getBatchJob', job_id) result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] assert expected == [line.split('\t') for line in result] + + @fix(database, hg19, hg19_transcript_mappings) + def test_gene_location(self): + """ + Get outer coordinates for gene. + """ + r = self._call('getGeneLocation', 'SDHD', 'hg19') + + assert r == {'gene': 'SDHD', + 'start': 111957571, + 'stop': 111966518, + 'orientation': 'forward', + 'chromosome_name': 'chr11', + 'chromosome_accession': 'NC_000011.9', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + @fix(database, hg19, hg19_transcript_mappings) + def test_gene_location_reverse(self): + """ + Get outer coordinates for gene on the reverse strand. + """ + r = self._call('getGeneLocation', 'DMD', 'hg19') + + assert r == {'gene': 'DMD', + 'start': 31137345, + 'stop': 33038317, + 'orientation': 'reverse', + 'chromosome_name': 'chrX', + 'chromosome_accession': 'NC_000023.10', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + @fix(database, hg19, hg19_transcript_mappings) + def test_gene_location_default_build(self): + """ + Get outer coordinates for gene without specifying the build. + """ + r = self._call('getGeneLocation', 'SDHD') + + assert r == {'gene': 'SDHD', + 'start': 111957571, + 'stop': 111966518, + 'orientation': 'forward', + 'chromosome_name': 'chr11', + 'chromosome_accession': 'NC_000011.9', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + @fix(database, hg19, hg19_transcript_mappings) + def test_gene_location_invalid_gene(self): + """ + Get outer coordinates for gene that does not exist. + """ + with pytest.raises(Fault): + r = self._call('getGeneLocation', 'THISISNOTAGENE', 'hg19')