From e06452a15849416e4308d243beb77be0b5936fc8 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Fri, 30 Jan 2015 11:02:46 +0100 Subject: [PATCH] Add getGeneLocation webservice method Given a gene symbol and optional genome build, this returns the location of the gene. Primary motivation for this is LOVD, where it will be used in combination with sliceChromsome as an alternative for sliceChromosomeByGene which only works on the fixed Ensembl genome build. --- CHANGES.rst | 3 ++ mutalyzer/models.py | 17 ++++++++ mutalyzer/services/rpc.py | 86 ++++++++++++++++++++++++++++++++++++- tests/test_services_json.py | 59 +++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 96b22861..eaf4adf3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,6 +9,9 @@ Version 2.0.6 Release date to be decided. +- Added `getGeneLocation` webservice method. Given a gene symbol and optional + genome build, it returns the location of the gene. + Version 2.0.5 ------------- diff --git a/mutalyzer/models.py b/mutalyzer/models.py index bc9bf5a0..18d934f7 100644 --- a/mutalyzer/models.py +++ b/mutalyzer/models.py @@ -41,6 +41,23 @@ class SoapMessage(ComplexModel): #SoapMessage +class GeneLocation(ComplexModel): + """ + Return type of SOAP method getGeneLocation. + """ + __namespace__ = SOAP_NAMESPACE + + gene = Mandatory.Unicode + start = Mandatory.Integer + stop = Mandatory.Integer + orientation = Mandatory.Unicode + chromosome_name = Mandatory.Unicode + chromosome_accession = Mandatory.Unicode + assembly_name = Mandatory.Unicode + assembly_alias = Mandatory.Unicode +#GeneLocation + + class Mapping(ComplexModel): """ Return type of SOAP method mappingInfo. diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index c6505358..27c3410b 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -21,12 +21,13 @@ import os import socket from operator import attrgetter from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy.sql import func import mutalyzer from mutalyzer.config import settings from mutalyzer.db import session -from mutalyzer.db.models import (Assembly, BatchJob, BatchQueueItem, - TranscriptMapping) +from mutalyzer.db.models import (Assembly, Chromosome, BatchJob, + BatchQueueItem, TranscriptMapping) from mutalyzer.output import Output from mutalyzer.grammar import Grammar from mutalyzer.sync import CacheSync @@ -1296,6 +1297,87 @@ class MutalyzerService(ServiceBase): return descriptions #getdbSNPDescriptions + + @srpc(Mandatory.Unicode, Unicode, _returns=GeneLocation) + def getGeneLocation(gene, build=None): + """ + Get the location of a gene on the given genome build (assembly), using + the system's transcript mapping database. + + @arg gene: Gene symbol. + @type gene: string + @arg build: Genome build (assembly) by name or alias. If omited, + the system's default assembly is used. + @type build: string + + @return: Object with the following fields: + - gene: Gene symbol. + - start: Gene start position. If multiple transcripts for the gene + are known, this contains the lowest start position. + - stop: Gene stop position. If multiple transcripts for the gene are + known, this contains the highest stop position. + - orientation: Gene orientation, either 'forward' or 'reverse'. + - chromosome_name: Gene chromosome by name (e.g., 'chrX'). + - chromosome_accession: Gene chromosome by accession (e.g., + 'NC_000023.10'). + - assembly_name: Selected genome build (assembly) by name (e.g., + 'GRCh37'). + - assembly_alias: Selected genome build (assembly) by alias (e.g., + 'hg19'). + """ + output = Output(__file__) + + output.addMessage(__file__, -1, 'INFO', + 'Received request getGeneLocation(%s, %s)' + % (gene, build)) + + try: + assembly = Assembly.by_name_or_alias(build or + settings.DEFAULT_ASSEMBLY) + except NoResultFound: + output.addMessage(__file__, 4, "EARG", "EARG %s" % build) + raise Fault("EARG", + "The build argument (%s) was not a valid " \ + "build name." % build) + + # From all the transcripts for this gene, get the lowest start + # position and highest stop position. For integrity, we group by + # chromosome and orientation. + mapping = \ + session.query(func.min(TranscriptMapping.start), + func.max(TranscriptMapping.stop), + TranscriptMapping.orientation, + Chromosome) \ + .filter(TranscriptMapping.chromosome.has(assembly=assembly), + TranscriptMapping.gene == gene) \ + .join(TranscriptMapping.chromosome) \ + .group_by(TranscriptMapping.chromosome, + TranscriptMapping.orientation) \ + .first() + + if not mapping: + output.addMessage(__file__, 4, "EARG", "EARG %s" % gene) + raise Fault("EARG", + "No location was found for gene %s." % gene) + + start, stop, orientation, chromosome = mapping + + result = GeneLocation() + result.gene = gene + result.start = start + result.stop = stop + result.orientation = orientation + result.chromosome_name = chromosome.name + result.chromosome_accession = chromosome.accession + result.assembly_name = assembly.name + result.assembly_alias = assembly.alias + + output.addMessage(__file__, -1, 'INFO', + 'Finished processing getGeneLocation(%s %s)' + % (gene, build)) + + return result + #getGeneLocation #MutalyzerService diff --git a/tests/test_services_json.py b/tests/test_services_json.py index 81833505..5ca81dae 100644 --- a/tests/test_services_json.py +++ b/tests/test_services_json.py @@ -5,8 +5,11 @@ Tests for the JSON interface to Mutalyzer. from __future__ import unicode_literals +import pytest import simplejson as json +from spyne.model.fault import Fault from spyne.server.null import NullServer + import mutalyzer from mutalyzer import announce from mutalyzer import Scheduler @@ -142,3 +145,59 @@ class TestServicesJson(MutalyzerTest): result = self._call('getBatchJob', job_id) result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] assert expected == [line.split('\t') for line in result] + + @fix(database, hg19, hg19_transcript_mappings) + def test_gene_location(self): + """ + Get outer coordinates for gene. + """ + r = self._call('getGeneLocation', 'SDHD', 'hg19') + + assert r == {'gene': 'SDHD', + 'start': 111957571, + 'stop': 111966518, + 'orientation': 'forward', + 'chromosome_name': 'chr11', + 'chromosome_accession': 'NC_000011.9', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + @fix(database, hg19, hg19_transcript_mappings) + def test_gene_location_reverse(self): + """ + Get outer coordinates for gene on the reverse strand. + """ + r = self._call('getGeneLocation', 'DMD', 'hg19') + + assert r == {'gene': 'DMD', + 'start': 31137345, + 'stop': 33038317, + 'orientation': 'reverse', + 'chromosome_name': 'chrX', + 'chromosome_accession': 'NC_000023.10', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + @fix(database, hg19, hg19_transcript_mappings) + def test_gene_location_default_build(self): + """ + Get outer coordinates for gene without specifying the build. + """ + r = self._call('getGeneLocation', 'SDHD') + + assert r == {'gene': 'SDHD', + 'start': 111957571, + 'stop': 111966518, + 'orientation': 'forward', + 'chromosome_name': 'chr11', + 'chromosome_accession': 'NC_000011.9', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + @fix(database, hg19, hg19_transcript_mappings) + def test_gene_location_invalid_gene(self): + """ + Get outer coordinates for gene that does not exist. + """ + with pytest.raises(Fault): + r = self._call('getGeneLocation', 'THISISNOTAGENE', 'hg19') -- GitLab