From 8131c606e9e80d1f3e9387b0d97411d6bf57925b Mon Sep 17 00:00:00 2001 From: "J.F.J. Laros" <j.f.j.laros@lumc.nl> Date: Sat, 4 Aug 2012 12:07:35 +0000 Subject: [PATCH] Added a webservice function for the description extractor. rpc.py: - Added the function descriptionExtract(). - Standardised indentation. models.py: - Added a RawVar and an Allele class for the webservices. describe.py: - Made the RawVar class a child of models.RawVar. This is convenient for webservices since we can simply return this object. git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@591 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- extras/soap-tools/describe.py | 478 ---------------------------------- mutalyzer/describe.py | 5 +- mutalyzer/models.py | 28 ++ mutalyzer/services/rpc.py | 212 ++++++++------- 4 files changed, 148 insertions(+), 575 deletions(-) delete mode 100755 extras/soap-tools/describe.py diff --git a/extras/soap-tools/describe.py b/extras/soap-tools/describe.py deleted file mode 100755 index 34778407..00000000 --- a/extras/soap-tools/describe.py +++ /dev/null @@ -1,478 +0,0 @@ -#!/usr/bin/python - -""" -Prototype of a module that can generate a HGVS description of the variant(s) -leading from one sequence to an other. - -@requires: sys -@requires: argparse -@requires: Bio.Seq -@requires: suds.client.Client -""" - -# NOTE: The following modules are not needed once this is an integrated module. -import sys -import argparse -from suds.client import Client -from mutalyzer.util import monkey_patch_suds; monkey_patch_suds() - -WSDL_LOCATION = "http://localhost/mutalyzer/services/?wsdl" - -# NOTE: The following modules are really needed. -import Bio.Seq -from mutalyzer.util import longest_common_prefix, longest_common_suffix -from mutalyzer.util import palinsnoop, roll - -def printMatrix(M) : - for i in M : - print i - -def LCSMatrix(s1, s2) : - """ - """ - - y_max = len(s1) + 1 - x_max = len(s2) + 1 - M = [[0] * x_max for i in xrange(y_max)] - #printMatrix(M) - - for x in xrange(1, y_max) : - for y in xrange(1, x_max) : - if s1[x - 1] == s2[y - 1] : - M[x][y] = M[x - 1][y - 1] + 1 - - return M -#LCSMatrix - -def findMax(M, x1, x2, y1, y2) : - """ - M = describe.LCSMatrix("banaan", "ana") - - N = describe.LCSMatrix("banaan", "n") - describe.findMax(M, 1, 7, 2, 3) - - N = describe.LCSMatrix("banaan", "na") - describe.findMax(M, 1, 7, 2, 4) - """ - - longest, x_longest, y_longest = 0, 0, 0 - - for x in xrange(x1, x2) : - x_relative = x - x1 - - for y in xrange(y1, y2) : - y_relative = y - y1 - realVal = min(x_relative, y_relative, M[x][y]) - #print realVal, - - if realVal > longest : - longest = realVal - x_longest = x_relative - y_longest = y_relative - #if - #for - #print - #for - - return x_longest, y_longest, longest -#findMax - -def LongestCommonSubstring(s1, s2) : - """ - Find the longest common substring between {s1} and {s2}. - - Mainly copied from: - http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/ - Longest_common_substring#Python - - @arg s1: String 1. - @type s1: str - @arg s2: String 2. - @type s2: str - - @returns: The end locations and the length of the longest common substring. - @rtype: tuple(int, int, int) - """ - - len_s1 = len(s1) - len_s2 = len(s2) - M = [[0] * (len_s2 + 1) for i in xrange(len_s1 + 1)] - longest, x_longest, y_longest = 0, 0, 0 - - for x in xrange(1, len_s1 + 1) : - for y in xrange(1, len_s2 + 1) : - if s1[x - 1] == s2[y - 1] : - M[x][y] = M[x - 1][y - 1] + 1 - - if M[x][y] > longest : - longest = M[x][y] - x_longest = x - y_longest = y - #if - #if - else : # Doesn't seem to do anything? - M[x][y] = 0 - #for - #for - - #print s1, s2 - #printMatrix(M) - return x_longest, y_longest, longest -#LongestCommonSubstring - -class RawVar() : - """ - Container for a raw variant. - - To use this class correctly, do not supply more than the minimum amount of - data. The {description()} function may not work properly if too much - information is given. - - Example: if {end} is initialised for a substitution, a range will be - retuned, resulting in a description like: 100_100A>T - """ - # TODO: We now use the length of the variant as a measure of its - # ``length'', but that makes it a bit context dependent, e.g., c.1A>T - # and g.100A>T may be the same variant giving different lengths. - # Maybe we should ignore the positions in a description. - - def __init__(self, start = 0, start_offset = 0, end = 0, end_offset = 0, - type = "none", deleted = "", inserted = "", shift = 0) : - """ - Initialise the class with the appropriate values. - - @arg start: Start position. - @type start: int - @arg start_offset: - @type start_offset: int - @arg end: End position. - @type end: int - @arg end_offset: - @type end_offset: int - @arg type: Variant type. - @type type: str - @arg deleted: Deleted part of the reference sequence. - @type deleted: str - @arg inserted: Inserted part. - @type inserted: str - @arg shift: Amount of freedom. - @type shift: int - """ - # TODO: Will this container be used for all variants, or only genomic? - # start_offset and end_offset may be never used. - - self.start = start - self.start_offset = start_offset - self.end = end - self.end_offset= end_offset - self.type = type - self.deleted = deleted - self.inserted = inserted - self.shift = shift - #__init__ - - def description(self) : - """ - Give the HGVS description of the raw variant stored in this class. - - Note that this function relies on the absence of values to make the - correct description. Also see the comment in the class definition. - - @returns: The HGVS description of the raw variant stored in this class. - @rtype: str - """ - - if not self.start : - return "=" - - descr = "%i" % self.start - - if self.end : - descr += "_%i" % self.end - - if self.type != "subst" : - descr += "%s" % self.type - - if self.inserted : - return descr + "%s" % self.inserted - return descr - #if - - return descr + "%s>%s" % (self.deleted, self.inserted) - #description -#RawVar - -def alleleDescription(allele) : - """ - Convert a list of raw variants to an HGVS allele description. - - @arg allele: A list of raw variants representing an allele description. - @type allele: list(RawVar) - - @returns: The HGVS description of {allele}. - @rval: str - """ - - if len(allele) > 1 : - return "[%s]" % ';'.join(map(lambda x : x.description(), allele)) - return allele[0].description() -#alleleDescription - -def printpos(s, start, end, fill = 0) : - """ - For debugging purposes. - """ - # TODO: See if this can partially replace or be merged with the - # visualisation in the __mutate() function of mutator.py - - fs = 10 # Flank size. - - return "%s %s%s %s" % (s[start - fs:start], s[start:end], '-' * fill, - s[end:end + fs]) -#printpos - -def DNA_description(M, s1, s2, s1_start, s1_end, s2_start, s2_end) : - """ - Give an allele description of the change from {s1} to {s2} in the range - {s1_start}..{s1_end} on {s1} and {s2_start}..{s2_end} on {s2}. - - arg s1: Sequence 1. - type s1: str - arg s2: Sequence 2. - type s2: str - arg s1_start: Start of the range on {s1}. - type s1_start: int - arg s1_end: End of the range on {s1}. - type s1_end: int - arg s2_start: Start of the range on {s2}. - type s2_start: int - arg s2_end: End of the range on {s2}. - type s2_end: int - - @returns: A list of RawVar objects, representing the allele. - @rval: list(RawVar) - """ - # TODO: Instead of copying this function and adjusting it to make it work - # for proteins, consider disabling parts like the inversion. - # TODO: Think about frameshift descriptions. - - # Nothing happened. - if s1 == s2: - return [RawVar()] - - # Insertion / Duplication. - if s1_start == s1_end : - ins_length = s2_end - s2_start - shift5, shift3 = roll(s2, s2_start + 1, s2_end) - shift = shift5 + shift3 - - s1_start += shift3 - s1_end += shift3 - s2_start += shift3 - s2_end += shift3 - - if s2_start - ins_length >= 0 and \ - s1[s1_start - ins_length:s1_start] == s2[s2_start:s2_end] : - - if ins_length == 1 : - return [RawVar(start = s1_start, type = "dup", shift = shift)] - return [RawVar(start = s1_start - ins_length + 1, end = s1_end, - type = "dup", shift = shift)] - #if - return [RawVar(start = s1_start, end = s1_start + 1, - inserted = s2[s2_start:s2_end], type = "ins", shift = shift)] - #if - - # Deletion. - if s2_start == s2_end : - shift5, shift3 = roll(s1, s1_start + 1, s1_end) - shift = shift5 + shift3 - - s1_start += shift3 + 1 - s1_end += shift3 - - if s1_start == s1_end : - return [RawVar(start = s1_start, type = "del", shift = shift)] - return [RawVar(start = s1_start, end = s1_end, type = "del", - shift = shift)] - #if - - # Substitution. - if s1_start + 1 == s1_end and s2_start + 1 == s2_end : - return [RawVar(start = s1_start + 1, deleted = s1[s1_start], - inserted = s2[s2_start], type = "subst")] - - # Simple InDel. - if s1_start + 1 == s1_end : - return [RawVar(start = s1_start + 1, inserted = s2[s2_start:s2_end], - type = "delins")] - - # TODO: Refactor the code after this point. - - # At this stage, we either have an inversion, an indel or a Compound - # variant. - a, b, c = findMax(M, s1_start, s1_end, s2_start, s2_end) - s1_end_f, s2_end_f, lcs_f_len = LongestCommonSubstring(s1[s1_start:s1_end], - s2[s2_start:s2_end]) - s1_end_r, s2_end_r, lcs_r_len = LongestCommonSubstring(s1[s1_start:s1_end], - Bio.Seq.reverse_complement(s2[s2_start:s2_end])) - print "N:", a, b, c - print "O:", s1_end_f, s2_end_f, lcs_f_len - - # Palindrome snooping. - trim = palinsnoop(s1[s1_start + s1_end_r - lcs_r_len:s1_start + s1_end_r]) - if trim == -1 : # Full palindrome. - lcs_r_len = 0 # s1_end_r and s2_end_r should not be used after this. - - # Inversion or Compound variant. - default = [RawVar(start = s1_start + 1, end = s1_end, - inserted = s2[s2_start:s2_end], type = "delins")] - - if not (lcs_f_len or lcs_r_len) : # Optimisation, not really needed. - return default - - # Inversion. - if lcs_f_len <= lcs_r_len : - if trim > 0 : # Partial palindrome. - s1_end_r -= trim - s2_end_r -= trim - lcs_r_len -= 2 * trim - #if - - # Simple Inversion. - if s2_end - s2_start == lcs_r_len and s1_end - s1_start == lcs_r_len : - return [RawVar(start = s1_start + 1, end = s1_end, type = "inv")] - - r1_len = s1_end_r - lcs_r_len - r2_len = s1_end - s1_start - s1_end_r - m1_len = s2_end_r - lcs_r_len - m2_len = s2_end - s2_start - s2_end_r - - # The flanks of the inversion (but not both) can be empty, so we - # generate descriptions conditionally. - leftRv = [] - rightRv = [] - if r1_len or m2_len : - lcs = len(longest_common_suffix(s1[s1_start:s1_start + r1_len], - s2[s2_start:s2_start + m2_len])) - leftRv = DNA_description(M, s1, s2, - s1_start, s1_start + r1_len - lcs, - s2_start, s2_start + m2_len - lcs) - #if - if r2_len or m1_len : - lcp = len(longest_common_prefix(s1[s1_end - r2_len:s1_end], - s2[s2_end - m1_len:s2_end])) - rightRv = DNA_description(M, s1, s2, - s1_end - r2_len + lcp, s1_end, s2_end - m1_len + lcp, s2_end) - #if - - partial = leftRv + [RawVar(start = s1_start + r1_len + 1, - end = s1_end - r2_len, type = "inv")] + rightRv - #if - - # Compound variant. - else : - r1_len = s1_end_f - lcs_f_len - r2_len = s1_end - s1_start - s1_end_f - m1_len = s2_end_f - lcs_f_len - m2_len = s2_end - s2_start - s2_end_f - - partial = DNA_description(M, s1, s2, s1_start, s1_start + r1_len, - s2_start, s2_start + m1_len) + DNA_description(M, s1, s2, - s1_end - r2_len, s1_end, s2_end - m2_len, s2_end) - #else - - if len(alleleDescription(partial)) - 2 <= len(alleleDescription(default)) : - return partial - return default -#DNA_description - -def describeDNA(original, mutated) : - """ - Convenience function for DNA_description(). - - @arg original: - @type original: str - @arg mutated: - @type mutated: str - - @returns: A list of RawVar objects, representing the allele. - @rval: list(RawVar) - """ - - s1 = str(original) - s2 = str(mutated) - lcp = len(longest_common_prefix(s1, s2)) - lcs = len(longest_common_suffix(s1[lcp:], s2[lcp:])) - s1_end = len(s1) - lcs - s2_end = len(s2) - lcs - - #M = LCSMatrix(s1[lcp:s1_end], s2[lcp:s2_end]) - M = LCSMatrix(s1, s2) - - return DNA_description(M, s1, s2, lcp, s1_end, lcp, s2_end) -#describeDNA - -# NOTE: Everything below this point is not needed once this is an integrated -# module. - -def describe(description) : - """ - Call Mutalyzer with a variant description to get the original and the - mutated sequence and make our own description. - - @arg description: A HGVS description of the variant to be checked. - @type description: str - """ - - service = Client(WSDL_LOCATION, cache = None).service - result = service.runMutalyzer(description) - - if result.rawVariants : - for i in result.rawVariants.RawVariant : - print i.description - print i.visualisation - print - #for - - newDescription = describeDNA(result.original, result.mutated) - - print("old: %s" % result.genomicDescription) - print("new: XX_XXXXXX.X:X.%s" % alleleDescription(newDescription)) - - for i in newDescription : - print("%s" % printpos(result.original, i.start, i.end + 1)) - - # NOTE: Maybe save this part for making a nice table? - print("\nstart\tend\ttype\tdel\tins\tshift\thgvs") - for i in newDescription : - print("%i\t%i\t%s\t%s\t%s\t%i\t%s" % (i.start, i.end, i.type, - i.deleted, i.inserted, i.shift, i.description())) -#describe - -def main() : - """ - Main entry point. - """ - - parser = argparse.ArgumentParser( - prog = "describe", - formatter_class = argparse.RawDescriptionHelpFormatter, - description = "", - epilog = """ -examples: - NM_002001.2:c.1_10delinsCTGGATCCTC - NM_002001.2:c.1_5delinsCCATG - NM_002001.2:c.[1_5delinsCCATG;15del] -""") - - parser.add_argument("-d", dest = "description", type = str, - required = True, help = "HGVS description of a variant.") - - arguments = parser.parse_args() - - describe(arguments.description) -#main - -if __name__ == "__main__" : - main() diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py index 6ee458c9..2b8fcc50 100644 --- a/mutalyzer/describe.py +++ b/mutalyzer/describe.py @@ -10,6 +10,7 @@ leading from one sequence to an other. import Bio.Seq from mutalyzer.util import longest_common_prefix, longest_common_suffix from mutalyzer.util import palinsnoop, roll +from mutalyzer import models def LCSMatrix(s1, s2) : """ @@ -102,7 +103,7 @@ def LongestCommonSubstring(s1, s2) : return x_longest, y_longest, longest #LongestCommonSubstring -class RawVar() : +class RawVar(models.RawVar) : """ Container for a raw variant. @@ -142,7 +143,7 @@ class RawVar() : self.start = start self.start_offset = start_offset self.end = end - self.end_offset= end_offset + self.end_offset = end_offset self.type = type self.deleted = deleted self.inserted = inserted diff --git a/mutalyzer/models.py b/mutalyzer/models.py index 5f2c3422..95de214d 100644 --- a/mutalyzer/models.py +++ b/mutalyzer/models.py @@ -87,6 +87,34 @@ class RawVariant(ComplexModel): #RawVariant +class RawVar(ComplexModel): + """ + Used in MutalyzerOutput data type. + """ + __namespace__ = SOAP_NAMESPACE + + start = Mandatory.Integer + start_offset = Mandatory.Integer + end = Mandatory.Integer + end_offset = Mandatory.Integer + type = Mandatory.String + deleted = Mandatory.String + inserted = Mandatory.String + shift = Mandatory.Integer +#RawVar + + +class Allele(ComplexModel): + """ + Used in MutalyzerOutput data type. + """ + __namespace__ = SOAP_NAMESPACE + + description = Mandatory.String + allele = Array(RawVar) +#Allele + + class MutalyzerOutput(ComplexModel): """ Return type of SOAP method runMutalyzer. diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index e8e05596..6551e3df 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -28,6 +28,7 @@ from mutalyzer.mapping import Converter from mutalyzer import Retriever from mutalyzer import GenRecord from mutalyzer.models import * +from mutalyzer import describe def _checkBuild(L, build) : @@ -69,9 +70,8 @@ def _checkChrom(L, D, chrom) : if not D.isChrom(chrom) : L.addMessage(__file__, 4, "EARG", "EARG %s" % chrom) - raise Fault("EARG", - "The chrom argument (%s) was not a valid " \ - "chromosome name." % chrom) + raise Fault("EARG", "The chrom argument (%s) was not a valid " \ + "chromosome name." % chrom) #if #_checkChrom @@ -91,8 +91,7 @@ def _checkPos(L, pos) : if pos < 1 : L.addMessage(__file__, 4, "ERANGE", "ERANGE %i" % pos) - raise Fault("ERANGE", - "The pos argument (%i) is out of range." % pos) + raise Fault("ERANGE", "The pos argument (%i) is out of range." % pos) #if #_checkPos @@ -128,7 +127,7 @@ class MutalyzerService(ServiceBase): #__init__ @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer, Boolean, - _returns = Array(Mandatory.String)) + _returns=Array(Mandatory.String)) def getTranscripts(build, chrom, pos, versions=False) : """ Get all the transcripts that overlap with a chromosomal position. @@ -154,8 +153,8 @@ class MutalyzerService(ServiceBase): L = Output(__file__) L.addMessage(__file__, -1, "INFO", - "Received request getTranscripts(%s %s %s %s)" % (build, - chrom, pos, versions)) + "Received request getTranscripts(%s %s %s %s)" % (build, chrom, + pos, versions)) _checkBuild(L, build) D = Db.Mapping(build) @@ -172,17 +171,16 @@ class MutalyzerService(ServiceBase): ret = [r[0] for r in ret] L.addMessage(__file__, -1, "INFO", - "Finished processing getTranscripts(%s %s %s %s)" % (build, - chrom, pos, versions)) + "Finished processing getTranscripts(%s %s %s %s)" % (build, chrom, + pos, versions)) - L.addMessage(__file__, -1, "INFO", - "We return %s" % ret) + L.addMessage(__file__, -1, "INFO", "We return %s" % ret) del D, L return ret #getTranscripts - @srpc(Mandatory.String, Mandatory.String, _returns = Array(Mandatory.String)) + @srpc(Mandatory.String, Mandatory.String, _returns=Array(Mandatory.String)) def getTranscriptsByGeneName(build, name): """ Todo: documentation. @@ -190,8 +188,7 @@ class MutalyzerService(ServiceBase): L = Output(__file__) L.addMessage(__file__, -1, "INFO", - "Received request getTranscriptsByGene(%s %s)" % (build, - name)) + "Received request getTranscriptsByGene(%s %s)" % (build, name)) _checkBuild(L, build) D = Db.Mapping(build) @@ -199,8 +196,7 @@ class MutalyzerService(ServiceBase): ret = D.get_TranscriptsByGeneName(name) L.addMessage(__file__, -1, "INFO", - "Finished processing getTranscriptsByGene(%s %s)" % ( - build, name)) + "Finished processing getTranscriptsByGene(%s %s)" % (build, name)) if ret : l = [] @@ -212,7 +208,7 @@ class MutalyzerService(ServiceBase): #getTranscriptsByGene @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer, - Mandatory.Integer, Mandatory.Integer, _returns = Array(Mandatory.String)) + Mandatory.Integer, Mandatory.Integer, _returns=Array(Mandatory.String)) def getTranscriptsRange(build, chrom, pos1, pos2, method) : """ Get all the transcripts that overlap with a range on a chromosome. @@ -256,7 +252,8 @@ class MutalyzerService(ServiceBase): #getTranscriptsRange @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer, - Mandatory.Integer, Mandatory.Integer, _returns = Array(TranscriptMappingInfo)) + Mandatory.Integer, Mandatory.Integer, + _returns=Array(TranscriptMappingInfo)) def getTranscriptsMapping(build, chrom, pos1, pos2, method): """ Get all the transcripts and their info that overlap with a range on a @@ -288,8 +285,8 @@ class MutalyzerService(ServiceBase): """ output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request ' \ - 'getTranscriptsRange(%s %s %s %s %s)' % \ - (build, chrom, pos1, pos2, method)) + 'getTranscriptsRange(%s %s %s %s %s)' % (build, chrom, pos1, pos2, + method)) _checkBuild(output, build) @@ -298,9 +295,9 @@ class MutalyzerService(ServiceBase): for transcript in database.get_Transcripts(chrom, pos1, pos2, method): t = TranscriptMappingInfo() - d = dict(zip(('transcript', 'start', 'stop', 'cds_start', 'cds_stop', - 'exon_starts', 'exon_stops', 'gene', 'chromosome', - 'orientation', 'protein', 'version'), transcript)) + d = dict(zip(('transcript', 'start', 'stop', 'cds_start', + 'cds_stop', 'exon_starts', 'exon_stops', 'gene', 'chromosome', + 'orientation', 'protein', 'version'), transcript)) if d['orientation'] == '-': d['start'], d['stop'] = d['stop'], d['start'] d['cds_start'], d['cds_stop'] = d['cds_stop'], d['cds_start'] @@ -316,13 +313,13 @@ class MutalyzerService(ServiceBase): transcripts.append(t) output.addMessage(__file__, -1, 'INFO', 'Finished processing ' \ - 'getTranscriptsRange(%s %s %s %s %s)' % \ - (build, chrom, pos1, pos2, method)) + 'getTranscriptsRange(%s %s %s %s %s)' % (build, chrom, pos1, pos2, + method)) return transcripts #getTranscriptsMapping - @srpc(Mandatory.String, Mandatory.String, _returns = Mandatory.String) + @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String) def getGeneName(build, accno) : """ Find the gene name associated with a transcript. @@ -338,7 +335,7 @@ class MutalyzerService(ServiceBase): L = Output(__file__) L.addMessage(__file__, -1, "INFO", - "Received request getGeneName(%s %s)" % (build, accno)) + "Received request getGeneName(%s %s)" % (build, accno)) D = Db.Mapping(build) _checkBuild(L, build) @@ -346,7 +343,7 @@ class MutalyzerService(ServiceBase): ret = D.get_GeneName(accno.split('.')[0]) L.addMessage(__file__, -1, "INFO", - "Finished processing getGeneName(%s %s)" % (build, accno)) + "Finished processing getGeneName(%s %s)" % (build, accno)) del D, L return ret @@ -354,7 +351,7 @@ class MutalyzerService(ServiceBase): @srpc(Mandatory.String, Mandatory.String, Mandatory.String, - Mandatory.String, _returns = Mapping) + Mandatory.String, _returns=Mapping) def mappingInfo(LOVD_ver, build, accNo, variant) : """ Search for an NM number in the MySQL database, if the version @@ -364,9 +361,11 @@ class MutalyzerService(ServiceBase): - If no end position is present, the start position is assumed to be the end position. - - If the version number is not found in the database, an error message - is generated and a suggestion for an other version is given. - - If the reference sequence is not found at all, an error is returned. + - If the version number is not found in the database, an error + message is generated and a suggestion for an other version is + given. + - If the reference sequence is not found at all, an error is + returned. - If no variant is present, an error is returned. - If the variant is not accepted by the nomenclature parser, a parse error will be printed. @@ -397,22 +396,22 @@ class MutalyzerService(ServiceBase): L = Output(__file__) L.addMessage(__file__, -1, "INFO", - "Reveived request mappingInfo(%s %s %s %s)" % ( - LOVD_ver, build, accNo, variant)) + "Reveived request mappingInfo(%s %s %s %s)" % (LOVD_ver, build, + accNo, variant)) conv = Converter(build, L) result = conv.mainMapping(accNo, variant) L.addMessage(__file__, -1, "INFO", - "Finished processing mappingInfo(%s %s %s %s)" % ( - LOVD_ver, build, accNo, variant)) + "Finished processing mappingInfo(%s %s %s %s)" % (LOVD_ver, build, + accNo, variant)) del L return result #mappingInfo @srpc(Mandatory.String, Mandatory.String, Mandatory.String, - _returns = Transcript) + _returns=Transcript) def transcriptInfo(LOVD_ver, build, accNo) : """ Search for an NM number in the MySQL database, if the version @@ -435,19 +434,19 @@ class MutalyzerService(ServiceBase): O = Output(__file__) O.addMessage(__file__, -1, "INFO", - "Received request transcriptInfo(%s %s %s)" % (LOVD_ver, - build, accNo)) + "Received request transcriptInfo(%s %s %s)" % (LOVD_ver, build, + accNo)) converter = Converter(build, O) T = converter.mainTranscript(accNo) O.addMessage(__file__, -1, "INFO", - "Finished processing transcriptInfo(%s %s %s)" % ( - LOVD_ver, build, accNo)) + "Finished processing transcriptInfo(%s %s %s)" % (LOVD_ver, build, + accNo)) return T #transcriptInfo - @srpc(Mandatory.String, Mandatory.String, _returns = Mandatory.String) + @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String) def chromAccession(build, name) : """ Get the accession number of a chromosome, given a name. @@ -464,7 +463,7 @@ class MutalyzerService(ServiceBase): L = Output(__file__) L.addMessage(__file__, -1, "INFO", - "Received request chromAccession(%s %s)" % (build, name)) + "Received request chromAccession(%s %s)" % (build, name)) _checkBuild(L, build) _checkChrom(L, D, name) @@ -472,14 +471,13 @@ class MutalyzerService(ServiceBase): result = D.chromAcc(name) L.addMessage(__file__, -1, "INFO", - "Finished processing chromAccession(%s %s)" % (build, - name)) + "Finished processing chromAccession(%s %s)" % (build, name)) del D,L return result #chromAccession - @srpc(Mandatory.String, Mandatory.String, _returns = Mandatory.String) + @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String) def chromosomeName(build, accNo) : """ Get the name of a chromosome, given a chromosome accession number. @@ -496,7 +494,7 @@ class MutalyzerService(ServiceBase): L = Output(__file__) L.addMessage(__file__, -1, "INFO", - "Received request chromName(%s %s)" % (build, accNo)) + "Received request chromName(%s %s)" % (build, accNo)) _checkBuild(L, build) # self._checkChrom(L, D, name) @@ -504,14 +502,13 @@ class MutalyzerService(ServiceBase): result = D.chromName(accNo) L.addMessage(__file__, -1, "INFO", - "Finished processing chromName(%s %s)" % (build, - accNo)) + "Finished processing chromName(%s %s)" % (build, accNo)) del D,L return result #chromosomeName - @srpc(Mandatory.String, Mandatory.String, _returns = Mandatory.String) + @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String) def getchromName(build, acc) : """ Get the chromosome name, given a transcript identifier (NM number). @@ -528,7 +525,7 @@ class MutalyzerService(ServiceBase): L = Output(__file__) L.addMessage(__file__, -1, "INFO", - "Received request getchromName(%s %s)" % (build, acc)) + "Received request getchromName(%s %s)" % (build, acc)) _checkBuild(L, build) # self._checkChrom(L, D, name) @@ -536,14 +533,14 @@ class MutalyzerService(ServiceBase): result = D.get_chromName(acc) L.addMessage(__file__, -1, "INFO", - "Finished processing getchromName(%s %s)" % (build, - acc)) + "Finished processing getchromName(%s %s)" % (build, acc)) del D,L return result #chromosomeName - @srpc(Mandatory.String, Mandatory.String, String, _returns = Array(Mandatory.String)) + @srpc(Mandatory.String, Mandatory.String, String, + _returns=Array(Mandatory.String)) def numberConversion(build, variant, gene=None): """ Converts I{c.} to I{g.} notation or vice versa @@ -563,8 +560,7 @@ class MutalyzerService(ServiceBase): D = Db.Mapping(build) O = Output(__file__) O.addMessage(__file__, -1, "INFO", - "Received request cTogConversion(%s %s)" % ( - build, variant)) + "Received request cTogConversion(%s %s)" % (build, variant)) converter = Converter(build, O) variant = converter.correctChrVariant(variant) @@ -576,12 +572,11 @@ class MutalyzerService(ServiceBase): result = [""] O.addMessage(__file__, -1, "INFO", - "Finished processing cTogConversion(%s %s)" % ( - build, variant)) + "Finished processing cTogConversion(%s %s)" % (build, variant)) return result #numberConversion - @srpc(Mandatory.String, _returns = CheckSyntaxOutput) + @srpc(Mandatory.String, _returns=CheckSyntaxOutput) def checkSyntax(variant): """ Checks the syntax of a variant. @@ -597,7 +592,7 @@ class MutalyzerService(ServiceBase): """ output = Output(__file__) output.addMessage(__file__, -1, "INFO", - "Received request checkSyntax(%s)" % (variant)) + "Received request checkSyntax(%s)" % (variant)) result = CheckSyntaxOutput() @@ -608,7 +603,7 @@ class MutalyzerService(ServiceBase): result.valid = bool(parsetree) output.addMessage(__file__, -1, "INFO", - "Finished processing checkSyntax(%s)" % (variant)) + "Finished processing checkSyntax(%s)" % (variant)) result.messages = [] for message in output.getMessages(): @@ -620,7 +615,7 @@ class MutalyzerService(ServiceBase): return result #checkSyntax - @srpc(Mandatory.String, _returns = MutalyzerOutput) + @srpc(Mandatory.String, _returns=MutalyzerOutput) def runMutalyzer(variant) : """ Run the Mutalyzer name checker. @@ -664,7 +659,7 @@ class MutalyzerService(ServiceBase): """ O = Output(__file__) O.addMessage(__file__, -1, "INFO", - "Received request runMutalyzer(%s)" % (variant)) + "Received request runMutalyzer(%s)" % (variant)) variantchecker.check_variant(variant, O) result = MutalyzerOutput() @@ -713,7 +708,7 @@ class MutalyzerService(ServiceBase): result.errors, result.warnings, result.summary = O.Summary() O.addMessage(__file__, -1, "INFO", - "Finished processing runMutalyzer(%s)" % (variant)) + "Finished processing runMutalyzer(%s)" % (variant)) result.messages = [] for message in O.getMessages(): @@ -725,7 +720,7 @@ class MutalyzerService(ServiceBase): return result #runMutalyzer - @srpc(Mandatory.String, Mandatory.String, _returns = TranscriptNameInfo) + @srpc(Mandatory.String, Mandatory.String, _returns=TranscriptNameInfo) def getGeneAndTranscript(genomicReference, transcriptReference) : """ Todo: documentation. @@ -734,8 +729,8 @@ class MutalyzerService(ServiceBase): D = Db.Cache() O.addMessage(__file__, -1, "INFO", - "Received request getGeneAndTranscript(%s, %s)" % (genomicReference, - transcriptReference)) + "Received request getGeneAndTranscript(%s, %s)" % ( + genomicReference, transcriptReference)) retriever = Retriever.GenBankRetriever(O, D) record = retriever.loadrecord(genomicReference) @@ -758,7 +753,7 @@ class MutalyzerService(ServiceBase): return ret #getGeneAndTranscript - @srpc(Mandatory.String, String, _returns = Array(TranscriptInfo)) + @srpc(Mandatory.String, String, _returns=Array(TranscriptInfo)) def getTranscriptsAndInfo(genomicReference, geneName=None): """ Given a genomic reference, return all its transcripts with their @@ -806,7 +801,8 @@ class MutalyzerService(ServiceBase): D = Db.Cache() O.addMessage(__file__, -1, "INFO", - "Received request getTranscriptsAndInfo(%s, %s)" % (genomicReference, geneName)) + "Received request getTranscriptsAndInfo(%s, %s)" % ( + genomicReference, geneName)) retriever = Retriever.GenBankRetriever(O, D) record = retriever.loadrecord(genomicReference) @@ -835,7 +831,8 @@ class MutalyzerService(ServiceBase): # Some raw info we don't use directly: # - transcript.CDS.location CDS start and stop (g) # - transcript.CDS.positionList: CDS splice sites (g) ? - # - transcript.mRNA.location: translation start and stop (g) + # - transcript.mRNA.location: translation start and stop + # (g) # - transcript.mRNA.positionList: splice sites (g) t.exons = [] @@ -843,10 +840,12 @@ class MutalyzerService(ServiceBase): exon = ExonInfo() exon.gStart = transcript.CM.getSpliceSite(i) exon.cStart = transcript.CM.g2c(exon.gStart) - exon.chromStart = GenRecordInstance.record.toChromPos(exon.gStart) + exon.chromStart = GenRecordInstance.record.toChromPos( + exon.gStart) exon.gStop = transcript.CM.getSpliceSite(i + 1) exon.cStop = transcript.CM.g2c(exon.gStop) - exon.chromStop = GenRecordInstance.record.toChromPos(exon.gStop) + exon.chromStop = GenRecordInstance.record.toChromPos( + exon.gStop) t.exons.append(exon) # Beware that CM.info() gives a made-up value for trans_end, @@ -854,12 +853,14 @@ class MutalyzerService(ServiceBase): # it in our output and use the end position of the last exon # instead. The made-up value is still useful for sorting, so # we return it as sortableTransEnd. - trans_start, sortable_trans_end, cds_stop = transcript.CM.info() + trans_start, sortable_trans_end, cds_stop = \ + transcript.CM.info() cds_start = 1 t.cTransEnd = str(t.exons[-1].cStop) t.gTransEnd = t.exons[-1].gStop - t.chromTransEnd = GenRecordInstance.record.toChromPos(t.gTransEnd) + t.chromTransEnd = GenRecordInstance.record.toChromPos( + t.gTransEnd) t.sortableTransEnd = sortable_trans_end # Todo: If we have no CDS info, CM.info() gives trans_end as @@ -872,10 +873,12 @@ class MutalyzerService(ServiceBase): t.product = transcript.transcriptProduct t.cTransStart = str(trans_start) t.gTransStart = transcript.CM.x2g(trans_start, 0) - t.chromTransStart = GenRecordInstance.record.toChromPos(t.gTransStart) + t.chromTransStart = GenRecordInstance.record.toChromPos( + t.gTransStart) t.cCDSStart = str(cds_start) t.gCDSStart = transcript.CM.x2g(cds_start, 0) - t.chromCDSStart = GenRecordInstance.record.toChromPos(t.gCDSStart) + t.chromCDSStart = GenRecordInstance.record.toChromPos( + t.gCDSStart) t.cCDSStop = str(cds_stop) t.gCDSStop = transcript.CM.x2g(cds_stop, 0) t.chromCDSStop = GenRecordInstance.record.toChromPos(t.gCDSStop) @@ -899,7 +902,7 @@ class MutalyzerService(ServiceBase): return transcripts #getTranscriptsAndInfo - @srpc(Mandatory.String, _returns = Mandatory.String) + @srpc(Mandatory.String, _returns=Mandatory.String) def upLoadGenBankLocalFile(content) : """ Not implemented yet. @@ -907,7 +910,7 @@ class MutalyzerService(ServiceBase): raise Fault('ENOTIMPLEMENTED', 'Not implemented yet') #upLoadGenBankLocalFile - @srpc(Mandatory.String, _returns = Mandatory.String) + @srpc(Mandatory.String, _returns=Mandatory.String) def upLoadGenBankRemoteFile(url) : """ Not implemented yet. @@ -916,7 +919,7 @@ class MutalyzerService(ServiceBase): #upLoadGenBankRemoteFile @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer, - Mandatory.Integer, _returns = Mandatory.String) + Mandatory.Integer, _returns=Mandatory.String) def sliceChromosomeByGene(geneSymbol, organism, upStream, downStream) : """ @@ -946,7 +949,7 @@ class MutalyzerService(ServiceBase): #sliceChromosomeByGene @srpc(Mandatory.String, Mandatory.Integer, Mandatory.Integer, - Mandatory.Integer, _returns = Mandatory.String) + Mandatory.Integer, _returns=Mandatory.String) def sliceChromosome(chromAccNo, start, end, orientation) : """ Todo: documentation, error handling, argument checking, tests. @@ -972,7 +975,7 @@ class MutalyzerService(ServiceBase): return UD #sliceChromosome - @srpc(_returns = InfoOutput) + @srpc(_returns=InfoOutput) def info(): """ Gives some static application information, such as the current running @@ -1010,7 +1013,7 @@ class MutalyzerService(ServiceBase): return result #info - @srpc(_returns = Mandatory.String) + @srpc(_returns=Mandatory.String) def ping(): """ Simple function to test the interface. @@ -1021,7 +1024,28 @@ class MutalyzerService(ServiceBase): return 'pong' #ping - @srpc(DateTime, _returns = Array(CacheEntry)) + @srpc(Mandatory.String, Mandatory.String, _returns=Allele) + def descriptionExtract(reference, observed): + """ + Extract the HGVS variant description from a reference sequence and an + observed sequence. + """ + output = Output(__file__) + + output.addMessage(__file__, -1, 'INFO', + 'Received request descriptionExtract') + + result = Allele() + result.allele = describe.describeDNA(reference, observed) + result.description = describe.alleleDescription(result.allele) + + output.addMessage(__file__, -1, 'INFO', + 'Finished processing descriptionExtract') + + return result + #descriptionExtract + + @srpc(DateTime, _returns=Array(CacheEntry)) def getCache(created_since=None): """ Get a list of entries from the local cache created since given date. @@ -1031,8 +1055,7 @@ class MutalyzerService(ServiceBase): """ output = Output(__file__) - output.addMessage(__file__, -1, 'INFO', - 'Received request getCache') + output.addMessage(__file__, -1, 'INFO', 'Received request getCache') database = Db.Cache() sync = CacheSync(output, database) @@ -1042,18 +1065,17 @@ class MutalyzerService(ServiceBase): def cache_entry_to_soap(entry): e = CacheEntry() for attr in ('name', 'gi', 'hash', 'chromosomeName', - 'chromosomeStart', 'chromosomeStop', - 'chromosomeOrientation', 'url', 'created', 'cached'): + 'chromosomeStart', 'chromosomeStop', 'chromosomeOrientation', + 'url', 'created', 'cached'): setattr(e, attr, entry[attr]) return e - output.addMessage(__file__, -1, 'INFO', - 'Finished processing getCache') + output.addMessage(__file__, -1, 'INFO', 'Finished processing getCache') return map(cache_entry_to_soap, cache) #getCache - @srpc(Mandatory.String, _returns = Array(Mandatory.String)) + @srpc(Mandatory.String, _returns=Array(Mandatory.String)) def getdbSNPDescriptions(rs_id): """ Lookup HGVS descriptions for a dbSNP rs identifier. @@ -1067,19 +1089,19 @@ class MutalyzerService(ServiceBase): output = Output(__file__) output.addMessage(__file__, -1, 'INFO', - 'Received request getdbSNPDescription(%s)' % rs_id) + 'Received request getdbSNPDescription(%s)' % rs_id) retriever = Retriever.Retriever(output, None) descriptions = retriever.snpConvert(rs_id) output.addMessage(__file__, -1, 'INFO', - 'Finished processing getdbSNPDescription(%s)' % rs_id) + 'Finished processing getdbSNPDescription(%s)' % rs_id) # Todo: use SOAP Fault object here (see Trac issue #41). messages = output.getMessages() if messages: - error = 'The request could not be completed\n' \ - + '\n'.join(map(lambda m: str(m), output.getMessages())) + error = 'The request could not be completed\n' + \ + '\n'.join(map(lambda m: str(m), output.getMessages())) raise Exception(error) return descriptions -- GitLab