From 8131c606e9e80d1f3e9387b0d97411d6bf57925b Mon Sep 17 00:00:00 2001
From: "J.F.J. Laros" <j.f.j.laros@lumc.nl>
Date: Sat, 4 Aug 2012 12:07:35 +0000
Subject: [PATCH] Added a webservice function for the description extractor.

rpc.py:
- Added the function descriptionExtract().
- Standardised indentation.

models.py:
- Added a RawVar and an Allele class for the webservices.

describe.py:
- Made the RawVar class a child of models.RawVar. This is convenient for
  webservices since we can simply return this object.



git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@591 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
---
 extras/soap-tools/describe.py | 478 ----------------------------------
 mutalyzer/describe.py         |   5 +-
 mutalyzer/models.py           |  28 ++
 mutalyzer/services/rpc.py     | 212 ++++++++-------
 4 files changed, 148 insertions(+), 575 deletions(-)
 delete mode 100755 extras/soap-tools/describe.py

diff --git a/extras/soap-tools/describe.py b/extras/soap-tools/describe.py
deleted file mode 100755
index 34778407..00000000
--- a/extras/soap-tools/describe.py
+++ /dev/null
@@ -1,478 +0,0 @@
-#!/usr/bin/python
-
-"""
-Prototype of a module that can generate a HGVS description of the variant(s)
-leading from one sequence to an other.
-
-@requires: sys
-@requires: argparse
-@requires: Bio.Seq
-@requires: suds.client.Client
-"""
-
-# NOTE: The following modules are not needed once this is an integrated module.
-import sys
-import argparse
-from suds.client import Client
-from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
-
-WSDL_LOCATION = "http://localhost/mutalyzer/services/?wsdl"
-
-# NOTE: The following modules are really needed.
-import Bio.Seq
-from mutalyzer.util import longest_common_prefix, longest_common_suffix
-from mutalyzer.util import palinsnoop, roll
-
-def printMatrix(M) :
-    for i in M :
-        print i
-
-def LCSMatrix(s1, s2) :
-    """
-    """
-
-    y_max = len(s1) + 1
-    x_max = len(s2) + 1
-    M = [[0] * x_max for i in xrange(y_max)]
-    #printMatrix(M)
-
-    for x in xrange(1, y_max) :
-        for y in xrange(1, x_max) :
-            if s1[x - 1] == s2[y - 1] :
-                M[x][y] = M[x - 1][y - 1] + 1
-
-    return M
-#LCSMatrix
-
-def findMax(M, x1, x2, y1, y2) :
-    """
-    M = describe.LCSMatrix("banaan", "ana")
-
-    N = describe.LCSMatrix("banaan", "n")
-    describe.findMax(M, 1, 7, 2, 3)
-
-    N = describe.LCSMatrix("banaan", "na")
-    describe.findMax(M, 1, 7, 2, 4)
-    """
-
-    longest, x_longest, y_longest = 0, 0, 0
-
-    for x in xrange(x1, x2) :
-        x_relative = x - x1
-
-        for y in xrange(y1, y2) :
-            y_relative = y - y1
-            realVal = min(x_relative, y_relative, M[x][y])
-            #print realVal,
-
-            if realVal > longest :
-                longest = realVal
-                x_longest = x_relative
-                y_longest = y_relative
-            #if
-        #for
-        #print
-    #for
-
-    return x_longest, y_longest, longest
-#findMax
-
-def LongestCommonSubstring(s1, s2) :
-    """
-    Find the longest common substring between {s1} and {s2}.
-
-    Mainly copied from:
-    http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/
-        Longest_common_substring#Python
-
-    @arg s1: String 1.
-    @type s1: str
-    @arg s2: String 2.
-    @type s2: str
-
-    @returns: The end locations and the length of the longest common substring.
-    @rtype: tuple(int, int, int)
-    """
-
-    len_s1 = len(s1)
-    len_s2 = len(s2)
-    M = [[0] * (len_s2 + 1) for i in xrange(len_s1 + 1)]
-    longest, x_longest, y_longest = 0, 0, 0
-
-    for x in xrange(1, len_s1 + 1) :
-        for y in xrange(1, len_s2 + 1) :
-            if s1[x - 1] == s2[y - 1] :
-                M[x][y] = M[x - 1][y - 1] + 1
-
-                if M[x][y] > longest :
-                    longest = M[x][y]
-                    x_longest = x
-                    y_longest = y
-                #if
-            #if
-            else : # Doesn't seem to do anything?
-                M[x][y] = 0
-        #for
-    #for
-
-    #print s1, s2
-    #printMatrix(M)
-    return x_longest, y_longest, longest
-#LongestCommonSubstring
-
-class RawVar() :
-    """
-    Container for a raw variant.
-
-    To use this class correctly, do not supply more than the minimum amount of
-    data. The {description()} function may not work properly if too much
-    information is given.
-
-    Example: if {end} is initialised for a substitution, a range will be
-      retuned, resulting in a description like: 100_100A>T
-    """
-    # TODO: We now use the length of the variant as a measure of its
-    #       ``length'', but that makes it a bit context dependent, e.g., c.1A>T
-    #       and g.100A>T may be the same variant giving different lengths.
-    #       Maybe we should ignore the positions in a description. 
-
-    def __init__(self, start = 0, start_offset = 0, end = 0, end_offset = 0,
-        type = "none", deleted = "", inserted = "", shift = 0) :
-        """
-        Initialise the class with the appropriate values.
-
-        @arg start: Start position.
-        @type start: int
-        @arg start_offset:
-        @type start_offset: int
-        @arg end: End position.
-        @type end: int
-        @arg end_offset:
-        @type end_offset: int
-        @arg type: Variant type.
-        @type type: str
-        @arg deleted: Deleted part of the reference sequence.
-        @type deleted: str
-        @arg inserted: Inserted part.
-        @type inserted: str
-        @arg shift: Amount of freedom.
-        @type shift: int
-        """
-        # TODO: Will this container be used for all variants, or only genomic?
-        #       start_offset and end_offset may be never used.
-
-        self.start = start
-        self.start_offset = start_offset
-        self.end = end
-        self.end_offset= end_offset
-        self.type = type
-        self.deleted = deleted
-        self.inserted = inserted
-        self.shift = shift
-    #__init__
-
-    def description(self) :
-        """
-        Give the HGVS description of the raw variant stored in this class.
-
-        Note that this function relies on the absence of values to make the
-        correct description. Also see the comment in the class definition.
-
-        @returns: The HGVS description of the raw variant stored in this class.
-        @rtype: str
-        """
-
-        if not self.start :
-            return "="
-
-        descr = "%i" % self.start
-
-        if self.end :
-            descr += "_%i" % self.end
-
-        if self.type != "subst" :
-            descr += "%s" % self.type
-
-            if self.inserted :
-                return descr + "%s" % self.inserted
-            return descr
-        #if
-
-        return descr + "%s>%s" % (self.deleted, self.inserted)
-    #description
-#RawVar
-
-def alleleDescription(allele) :
-    """
-    Convert a list of raw variants to an HGVS allele description.
-
-    @arg allele: A list of raw variants representing an allele description.
-    @type allele: list(RawVar)
-
-    @returns: The HGVS description of {allele}.
-    @rval: str
-    """
-
-    if len(allele) > 1 :
-        return "[%s]" % ';'.join(map(lambda x : x.description(), allele))
-    return allele[0].description()
-#alleleDescription
-
-def printpos(s, start, end, fill = 0) :
-    """
-    For debugging purposes.
-    """
-    # TODO: See if this can partially replace or be merged with the
-    #       visualisation in the __mutate() function of mutator.py
-
-    fs = 10 # Flank size.
-
-    return "%s %s%s %s" % (s[start - fs:start], s[start:end], '-' * fill,
-        s[end:end + fs])
-#printpos
-
-def DNA_description(M, s1, s2, s1_start, s1_end, s2_start, s2_end) :
-    """
-    Give an allele description of the change from {s1} to {s2} in the range
-    {s1_start}..{s1_end} on {s1} and {s2_start}..{s2_end} on {s2}.
-
-    arg s1: Sequence 1.
-    type s1: str
-    arg s2: Sequence 2.
-    type s2: str
-    arg s1_start: Start of the range on {s1}.
-    type s1_start: int
-    arg s1_end: End of the range on {s1}.
-    type s1_end: int
-    arg s2_start: Start of the range on {s2}.
-    type s2_start: int
-    arg s2_end: End of the range on {s2}.
-    type s2_end: int
-
-    @returns: A list of RawVar objects, representing the allele.
-    @rval: list(RawVar)
-    """
-    # TODO: Instead of copying this function and adjusting it to make it work
-    #       for proteins, consider disabling parts like the inversion.
-    # TODO: Think about frameshift descriptions.
-
-    # Nothing happened.
-    if s1 == s2:
-        return [RawVar()]
-
-    # Insertion / Duplication.
-    if s1_start == s1_end :
-        ins_length = s2_end - s2_start
-        shift5, shift3 = roll(s2, s2_start + 1, s2_end)
-        shift = shift5 + shift3
-
-        s1_start += shift3
-        s1_end += shift3
-        s2_start += shift3
-        s2_end += shift3
-
-        if s2_start - ins_length >= 0 and \
-            s1[s1_start - ins_length:s1_start] == s2[s2_start:s2_end] :
-
-            if ins_length == 1 :
-                return [RawVar(start = s1_start, type = "dup", shift = shift)]
-            return [RawVar(start = s1_start - ins_length + 1, end = s1_end,
-                type = "dup", shift = shift)]
-        #if
-        return [RawVar(start = s1_start, end = s1_start + 1,
-            inserted = s2[s2_start:s2_end], type = "ins", shift = shift)]
-    #if
-
-    # Deletion.
-    if s2_start == s2_end :
-        shift5, shift3 = roll(s1, s1_start + 1, s1_end)
-        shift = shift5 + shift3
-
-        s1_start += shift3 + 1
-        s1_end += shift3
-
-        if s1_start == s1_end :
-            return [RawVar(start = s1_start, type = "del", shift = shift)]
-        return [RawVar(start = s1_start, end = s1_end, type = "del",
-            shift = shift)]
-    #if
-
-    # Substitution.
-    if s1_start + 1 == s1_end and s2_start + 1 == s2_end :
-        return [RawVar(start = s1_start + 1, deleted = s1[s1_start],
-            inserted = s2[s2_start], type = "subst")]
-
-    # Simple InDel.
-    if s1_start + 1 == s1_end :
-        return [RawVar(start = s1_start + 1, inserted = s2[s2_start:s2_end],
-            type = "delins")]
-
-    # TODO: Refactor the code after this point.
-
-    # At this stage, we either have an inversion, an indel or a Compound
-    # variant.
-    a, b, c = findMax(M, s1_start, s1_end, s2_start, s2_end)
-    s1_end_f, s2_end_f, lcs_f_len = LongestCommonSubstring(s1[s1_start:s1_end],
-        s2[s2_start:s2_end])
-    s1_end_r, s2_end_r, lcs_r_len = LongestCommonSubstring(s1[s1_start:s1_end],
-        Bio.Seq.reverse_complement(s2[s2_start:s2_end]))
-    print "N:", a, b, c
-    print "O:", s1_end_f, s2_end_f, lcs_f_len
-
-    # Palindrome snooping.
-    trim = palinsnoop(s1[s1_start + s1_end_r - lcs_r_len:s1_start + s1_end_r])
-    if trim == -1 :   # Full palindrome.
-        lcs_r_len = 0 # s1_end_r and s2_end_r should not be used after this.
-
-    # Inversion or Compound variant.
-    default = [RawVar(start = s1_start + 1, end = s1_end,
-        inserted = s2[s2_start:s2_end], type = "delins")]
-
-    if not (lcs_f_len or lcs_r_len) : # Optimisation, not really needed.
-        return default
-
-    # Inversion.
-    if lcs_f_len <= lcs_r_len :
-        if trim > 0 : # Partial palindrome.
-            s1_end_r -= trim
-            s2_end_r -= trim
-            lcs_r_len -= 2 * trim
-        #if
-
-        # Simple Inversion.
-        if s2_end - s2_start == lcs_r_len and s1_end - s1_start == lcs_r_len :
-            return [RawVar(start = s1_start + 1, end = s1_end, type = "inv")]
-
-        r1_len = s1_end_r - lcs_r_len
-        r2_len = s1_end - s1_start - s1_end_r
-        m1_len = s2_end_r - lcs_r_len
-        m2_len = s2_end - s2_start - s2_end_r
-
-        # The flanks of the inversion (but not both) can be empty, so we
-        # generate descriptions conditionally.
-        leftRv = []
-        rightRv = []
-        if r1_len or m2_len :
-            lcs = len(longest_common_suffix(s1[s1_start:s1_start + r1_len],
-                s2[s2_start:s2_start + m2_len]))
-            leftRv = DNA_description(M, s1, s2,
-                s1_start, s1_start + r1_len - lcs,
-                s2_start, s2_start + m2_len - lcs)
-        #if
-        if r2_len or m1_len :
-            lcp = len(longest_common_prefix(s1[s1_end - r2_len:s1_end],
-                s2[s2_end - m1_len:s2_end]))
-            rightRv = DNA_description(M, s1, s2,
-                s1_end - r2_len + lcp, s1_end, s2_end - m1_len + lcp, s2_end)
-        #if
-
-        partial = leftRv + [RawVar(start = s1_start + r1_len + 1,
-            end = s1_end - r2_len, type = "inv")] + rightRv
-    #if
-
-    # Compound variant.
-    else :
-        r1_len = s1_end_f - lcs_f_len
-        r2_len = s1_end - s1_start - s1_end_f
-        m1_len = s2_end_f - lcs_f_len
-        m2_len = s2_end - s2_start - s2_end_f
-
-        partial = DNA_description(M, s1, s2, s1_start, s1_start + r1_len,
-            s2_start, s2_start + m1_len) + DNA_description(M, s1, s2,
-            s1_end - r2_len, s1_end, s2_end - m2_len, s2_end)
-    #else
-
-    if len(alleleDescription(partial)) - 2 <= len(alleleDescription(default)) :
-        return partial
-    return default
-#DNA_description
-
-def describeDNA(original, mutated) :
-    """
-    Convenience function for DNA_description().
-
-    @arg original:
-    @type original: str
-    @arg mutated:
-    @type mutated: str
-
-    @returns: A list of RawVar objects, representing the allele.
-    @rval: list(RawVar)
-    """
-
-    s1 = str(original)
-    s2 = str(mutated)
-    lcp = len(longest_common_prefix(s1, s2))
-    lcs = len(longest_common_suffix(s1[lcp:], s2[lcp:]))
-    s1_end = len(s1) - lcs
-    s2_end = len(s2) - lcs
-    
-    #M = LCSMatrix(s1[lcp:s1_end], s2[lcp:s2_end])
-    M = LCSMatrix(s1, s2)
-
-    return DNA_description(M, s1, s2, lcp, s1_end, lcp, s2_end)
-#describeDNA
-
-# NOTE: Everything below this point is not needed once this is an integrated
-#       module.
-
-def describe(description) :
-    """
-    Call Mutalyzer with a variant description to get the original and the
-    mutated sequence and make our own description.
-
-    @arg description: A HGVS description of the variant to be checked.
-    @type description: str
-    """
-
-    service = Client(WSDL_LOCATION, cache = None).service
-    result = service.runMutalyzer(description)
-
-    if result.rawVariants :
-        for i in result.rawVariants.RawVariant :
-            print i.description
-            print i.visualisation
-            print
-        #for
-
-    newDescription = describeDNA(result.original, result.mutated)
-
-    print("old: %s" % result.genomicDescription)
-    print("new: XX_XXXXXX.X:X.%s" % alleleDescription(newDescription))
-
-    for i in newDescription :
-        print("%s" % printpos(result.original, i.start, i.end + 1))
-
-    # NOTE: Maybe save this part for making a nice table?
-    print("\nstart\tend\ttype\tdel\tins\tshift\thgvs")
-    for i in newDescription :
-        print("%i\t%i\t%s\t%s\t%s\t%i\t%s" % (i.start, i.end, i.type,
-            i.deleted, i.inserted, i.shift, i.description()))
-#describe
-
-def main() :
-    """
-    Main entry point.
-    """
-
-    parser = argparse.ArgumentParser(
-        prog = "describe",
-        formatter_class = argparse.RawDescriptionHelpFormatter,
-        description = "",
-        epilog = """
-examples:
-  NM_002001.2:c.1_10delinsCTGGATCCTC
-  NM_002001.2:c.1_5delinsCCATG
-  NM_002001.2:c.[1_5delinsCCATG;15del]
-""")
-
-    parser.add_argument("-d", dest = "description", type = str,
-        required = True, help = "HGVS description of a variant.")
-
-    arguments = parser.parse_args()
-
-    describe(arguments.description)
-#main
-
-if __name__ == "__main__" :
-    main()
diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py
index 6ee458c9..2b8fcc50 100644
--- a/mutalyzer/describe.py
+++ b/mutalyzer/describe.py
@@ -10,6 +10,7 @@ leading from one sequence to an other.
 import Bio.Seq
 from mutalyzer.util import longest_common_prefix, longest_common_suffix
 from mutalyzer.util import palinsnoop, roll
+from mutalyzer import models
 
 def LCSMatrix(s1, s2) :
     """
@@ -102,7 +103,7 @@ def LongestCommonSubstring(s1, s2) :
     return x_longest, y_longest, longest
 #LongestCommonSubstring
 
-class RawVar() :
+class RawVar(models.RawVar) :
     """
     Container for a raw variant.
 
@@ -142,7 +143,7 @@ class RawVar() :
         self.start = start
         self.start_offset = start_offset
         self.end = end
-        self.end_offset= end_offset
+        self.end_offset = end_offset
         self.type = type
         self.deleted = deleted
         self.inserted = inserted
diff --git a/mutalyzer/models.py b/mutalyzer/models.py
index 5f2c3422..95de214d 100644
--- a/mutalyzer/models.py
+++ b/mutalyzer/models.py
@@ -87,6 +87,34 @@ class RawVariant(ComplexModel):
 #RawVariant
 
 
+class RawVar(ComplexModel):
+    """
+    Used in MutalyzerOutput data type.
+    """
+    __namespace__ = SOAP_NAMESPACE
+
+    start = Mandatory.Integer
+    start_offset = Mandatory.Integer
+    end = Mandatory.Integer
+    end_offset = Mandatory.Integer
+    type = Mandatory.String
+    deleted = Mandatory.String
+    inserted = Mandatory.String
+    shift = Mandatory.Integer
+#RawVar
+
+
+class Allele(ComplexModel):
+    """
+    Used in MutalyzerOutput data type.
+    """
+    __namespace__ = SOAP_NAMESPACE
+
+    description = Mandatory.String
+    allele = Array(RawVar)
+#Allele
+
+
 class MutalyzerOutput(ComplexModel):
     """
     Return type of SOAP method runMutalyzer.
diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py
index e8e05596..6551e3df 100644
--- a/mutalyzer/services/rpc.py
+++ b/mutalyzer/services/rpc.py
@@ -28,6 +28,7 @@ from mutalyzer.mapping import Converter
 from mutalyzer import Retriever
 from mutalyzer import GenRecord
 from mutalyzer.models import *
+from mutalyzer import describe
 
 
 def _checkBuild(L, build) :
@@ -69,9 +70,8 @@ def _checkChrom(L, D, chrom) :
 
     if not D.isChrom(chrom) :
         L.addMessage(__file__, 4, "EARG", "EARG %s" % chrom)
-        raise Fault("EARG",
-                    "The chrom argument (%s) was not a valid " \
-                    "chromosome name." % chrom)
+        raise Fault("EARG", "The chrom argument (%s) was not a valid " \
+            "chromosome name." % chrom)
     #if
 #_checkChrom
 
@@ -91,8 +91,7 @@ def _checkPos(L, pos) :
 
     if pos < 1 :
         L.addMessage(__file__, 4, "ERANGE", "ERANGE %i" % pos)
-        raise Fault("ERANGE",
-                    "The pos argument (%i) is out of range." % pos)
+        raise Fault("ERANGE", "The pos argument (%i) is out of range." % pos)
     #if
 #_checkPos
 
@@ -128,7 +127,7 @@ class MutalyzerService(ServiceBase):
     #__init__
 
     @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer, Boolean,
-        _returns = Array(Mandatory.String))
+        _returns=Array(Mandatory.String))
     def getTranscripts(build, chrom, pos, versions=False) :
         """
         Get all the transcripts that overlap with a chromosomal position.
@@ -154,8 +153,8 @@ class MutalyzerService(ServiceBase):
         L = Output(__file__)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Received request getTranscripts(%s %s %s %s)" % (build,
-                     chrom, pos, versions))
+            "Received request getTranscripts(%s %s %s %s)" % (build, chrom,
+            pos, versions))
 
         _checkBuild(L, build)
         D = Db.Mapping(build)
@@ -172,17 +171,16 @@ class MutalyzerService(ServiceBase):
             ret = [r[0] for r in ret]
 
         L.addMessage(__file__, -1, "INFO",
-                     "Finished processing getTranscripts(%s %s %s %s)" % (build,
-                     chrom, pos, versions))
+            "Finished processing getTranscripts(%s %s %s %s)" % (build, chrom,
+            pos, versions))
 
-        L.addMessage(__file__, -1, "INFO",
-                     "We return %s" % ret)
+        L.addMessage(__file__, -1, "INFO", "We return %s" % ret)
 
         del D, L
         return ret
     #getTranscripts
 
-    @srpc(Mandatory.String, Mandatory.String, _returns = Array(Mandatory.String))
+    @srpc(Mandatory.String, Mandatory.String, _returns=Array(Mandatory.String))
     def getTranscriptsByGeneName(build, name):
         """
         Todo: documentation.
@@ -190,8 +188,7 @@ class MutalyzerService(ServiceBase):
         L = Output(__file__)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Received request getTranscriptsByGene(%s %s)" % (build,
-                     name))
+            "Received request getTranscriptsByGene(%s %s)" % (build, name))
 
         _checkBuild(L, build)
         D = Db.Mapping(build)
@@ -199,8 +196,7 @@ class MutalyzerService(ServiceBase):
         ret = D.get_TranscriptsByGeneName(name)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Finished processing getTranscriptsByGene(%s %s)" % (
-                     build, name))
+            "Finished processing getTranscriptsByGene(%s %s)" % (build, name))
 
         if ret :
             l = []
@@ -212,7 +208,7 @@ class MutalyzerService(ServiceBase):
     #getTranscriptsByGene
 
     @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer,
-        Mandatory.Integer, Mandatory.Integer, _returns = Array(Mandatory.String))
+        Mandatory.Integer, Mandatory.Integer, _returns=Array(Mandatory.String))
     def getTranscriptsRange(build, chrom, pos1, pos2, method) :
         """
         Get all the transcripts that overlap with a range on a chromosome.
@@ -256,7 +252,8 @@ class MutalyzerService(ServiceBase):
     #getTranscriptsRange
 
     @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer,
-        Mandatory.Integer, Mandatory.Integer, _returns = Array(TranscriptMappingInfo))
+        Mandatory.Integer, Mandatory.Integer,
+        _returns=Array(TranscriptMappingInfo))
     def getTranscriptsMapping(build, chrom, pos1, pos2, method):
         """
         Get all the transcripts and their info that overlap with a range on a
@@ -288,8 +285,8 @@ class MutalyzerService(ServiceBase):
         """
         output = Output(__file__)
         output.addMessage(__file__, -1, 'INFO', 'Received request ' \
-                          'getTranscriptsRange(%s %s %s %s %s)' % \
-                          (build, chrom, pos1, pos2, method))
+            'getTranscriptsRange(%s %s %s %s %s)' % (build, chrom, pos1, pos2,
+            method))
 
         _checkBuild(output, build)
 
@@ -298,9 +295,9 @@ class MutalyzerService(ServiceBase):
 
         for transcript in database.get_Transcripts(chrom, pos1, pos2, method):
             t = TranscriptMappingInfo()
-            d = dict(zip(('transcript', 'start', 'stop', 'cds_start', 'cds_stop',
-                          'exon_starts', 'exon_stops', 'gene', 'chromosome',
-                          'orientation', 'protein', 'version'), transcript))
+            d = dict(zip(('transcript', 'start', 'stop', 'cds_start',
+                'cds_stop', 'exon_starts', 'exon_stops', 'gene', 'chromosome',
+                'orientation', 'protein', 'version'), transcript))
             if d['orientation'] == '-':
                 d['start'], d['stop'] = d['stop'], d['start']
                 d['cds_start'], d['cds_stop'] = d['cds_stop'], d['cds_start']
@@ -316,13 +313,13 @@ class MutalyzerService(ServiceBase):
             transcripts.append(t)
 
         output.addMessage(__file__, -1, 'INFO', 'Finished processing ' \
-                          'getTranscriptsRange(%s %s %s %s %s)' % \
-                          (build, chrom, pos1, pos2, method))
+            'getTranscriptsRange(%s %s %s %s %s)' % (build, chrom, pos1, pos2,
+            method))
 
         return transcripts
     #getTranscriptsMapping
 
-    @srpc(Mandatory.String, Mandatory.String, _returns = Mandatory.String)
+    @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String)
     def getGeneName(build, accno) :
         """
         Find the gene name associated with a transcript.
@@ -338,7 +335,7 @@ class MutalyzerService(ServiceBase):
         L = Output(__file__)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Received request getGeneName(%s %s)" % (build, accno))
+            "Received request getGeneName(%s %s)" % (build, accno))
 
         D = Db.Mapping(build)
         _checkBuild(L, build)
@@ -346,7 +343,7 @@ class MutalyzerService(ServiceBase):
         ret = D.get_GeneName(accno.split('.')[0])
 
         L.addMessage(__file__, -1, "INFO",
-                     "Finished processing getGeneName(%s %s)" % (build, accno))
+            "Finished processing getGeneName(%s %s)" % (build, accno))
 
         del D, L
         return ret
@@ -354,7 +351,7 @@ class MutalyzerService(ServiceBase):
 
 
     @srpc(Mandatory.String, Mandatory.String, Mandatory.String,
-        Mandatory.String, _returns = Mapping)
+        Mandatory.String, _returns=Mapping)
     def mappingInfo(LOVD_ver, build, accNo, variant) :
         """
         Search for an NM number in the MySQL database, if the version
@@ -364,9 +361,11 @@ class MutalyzerService(ServiceBase):
 
           - If no end position is present, the start position is assumed to be
             the end position.
-          - If the version number is not found in the database, an error message
-            is generated and a suggestion for an other version is given.
-          - If the reference sequence is not found at all, an error is returned.
+          - If the version number is not found in the database, an error
+            message is generated and a suggestion for an other version is
+            given.
+          - If the reference sequence is not found at all, an error is
+            returned.
           - If no variant is present, an error is returned.
           - If the variant is not accepted by the nomenclature parser, a parse
             error will be printed.
@@ -397,22 +396,22 @@ class MutalyzerService(ServiceBase):
         L = Output(__file__)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Reveived request mappingInfo(%s %s %s %s)" % (
-                        LOVD_ver, build, accNo, variant))
+            "Reveived request mappingInfo(%s %s %s %s)" % (LOVD_ver, build,
+            accNo, variant))
 
         conv = Converter(build, L)
         result = conv.mainMapping(accNo, variant)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Finished processing mappingInfo(%s %s %s %s)" % (
-                        LOVD_ver, build, accNo, variant))
+            "Finished processing mappingInfo(%s %s %s %s)" % (LOVD_ver, build,
+            accNo, variant))
 
         del L
         return result
     #mappingInfo
 
     @srpc(Mandatory.String, Mandatory.String, Mandatory.String,
-        _returns = Transcript)
+        _returns=Transcript)
     def transcriptInfo(LOVD_ver, build, accNo) :
         """
         Search for an NM number in the MySQL database, if the version
@@ -435,19 +434,19 @@ class MutalyzerService(ServiceBase):
         O = Output(__file__)
 
         O.addMessage(__file__, -1, "INFO",
-                     "Received request transcriptInfo(%s %s %s)" % (LOVD_ver,
-                     build, accNo))
+            "Received request transcriptInfo(%s %s %s)" % (LOVD_ver, build,
+            accNo))
 
         converter = Converter(build, O)
         T = converter.mainTranscript(accNo)
 
         O.addMessage(__file__, -1, "INFO",
-                     "Finished processing transcriptInfo(%s %s %s)" % (
-                     LOVD_ver, build, accNo))
+            "Finished processing transcriptInfo(%s %s %s)" % (LOVD_ver, build,
+            accNo))
         return T
     #transcriptInfo
 
-    @srpc(Mandatory.String, Mandatory.String, _returns = Mandatory.String)
+    @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String)
     def chromAccession(build, name) :
         """
         Get the accession number of a chromosome, given a name.
@@ -464,7 +463,7 @@ class MutalyzerService(ServiceBase):
         L = Output(__file__)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Received request chromAccession(%s %s)" % (build, name))
+            "Received request chromAccession(%s %s)" % (build, name))
 
         _checkBuild(L, build)
         _checkChrom(L, D, name)
@@ -472,14 +471,13 @@ class MutalyzerService(ServiceBase):
         result = D.chromAcc(name)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Finished processing chromAccession(%s %s)" % (build,
-                     name))
+            "Finished processing chromAccession(%s %s)" % (build, name))
 
         del D,L
         return result
     #chromAccession
 
-    @srpc(Mandatory.String, Mandatory.String, _returns = Mandatory.String)
+    @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String)
     def chromosomeName(build, accNo) :
         """
         Get the name of a chromosome, given a chromosome accession number.
@@ -496,7 +494,7 @@ class MutalyzerService(ServiceBase):
         L = Output(__file__)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Received request chromName(%s %s)" % (build, accNo))
+            "Received request chromName(%s %s)" % (build, accNo))
 
         _checkBuild(L, build)
 #        self._checkChrom(L, D, name)
@@ -504,14 +502,13 @@ class MutalyzerService(ServiceBase):
         result = D.chromName(accNo)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Finished processing chromName(%s %s)" % (build,
-                     accNo))
+            "Finished processing chromName(%s %s)" % (build, accNo))
 
         del D,L
         return result
     #chromosomeName
 
-    @srpc(Mandatory.String, Mandatory.String, _returns = Mandatory.String)
+    @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String)
     def getchromName(build, acc) :
         """
         Get the chromosome name, given a transcript identifier (NM number).
@@ -528,7 +525,7 @@ class MutalyzerService(ServiceBase):
         L = Output(__file__)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Received request getchromName(%s %s)" % (build, acc))
+            "Received request getchromName(%s %s)" % (build, acc))
 
         _checkBuild(L, build)
 #        self._checkChrom(L, D, name)
@@ -536,14 +533,14 @@ class MutalyzerService(ServiceBase):
         result = D.get_chromName(acc)
 
         L.addMessage(__file__, -1, "INFO",
-                     "Finished processing getchromName(%s %s)" % (build,
-                     acc))
+            "Finished processing getchromName(%s %s)" % (build, acc))
 
         del D,L
         return result
     #chromosomeName
 
-    @srpc(Mandatory.String, Mandatory.String, String, _returns = Array(Mandatory.String))
+    @srpc(Mandatory.String, Mandatory.String, String, 
+        _returns=Array(Mandatory.String))
     def numberConversion(build, variant, gene=None):
         """
         Converts I{c.} to I{g.} notation or vice versa
@@ -563,8 +560,7 @@ class MutalyzerService(ServiceBase):
         D = Db.Mapping(build)
         O = Output(__file__)
         O.addMessage(__file__, -1, "INFO",
-                     "Received request cTogConversion(%s %s)" % (
-                     build, variant))
+            "Received request cTogConversion(%s %s)" % (build, variant))
         converter = Converter(build, O)
         variant = converter.correctChrVariant(variant)
 
@@ -576,12 +572,11 @@ class MutalyzerService(ServiceBase):
             result = [""]
 
         O.addMessage(__file__, -1, "INFO",
-                     "Finished processing cTogConversion(%s %s)" % (
-                     build, variant))
+            "Finished processing cTogConversion(%s %s)" % (build, variant))
         return result
     #numberConversion
 
-    @srpc(Mandatory.String, _returns = CheckSyntaxOutput)
+    @srpc(Mandatory.String, _returns=CheckSyntaxOutput)
     def checkSyntax(variant):
         """
         Checks the syntax of a variant.
@@ -597,7 +592,7 @@ class MutalyzerService(ServiceBase):
         """
         output = Output(__file__)
         output.addMessage(__file__, -1, "INFO",
-                          "Received request checkSyntax(%s)" % (variant))
+            "Received request checkSyntax(%s)" % (variant))
 
         result = CheckSyntaxOutput()
 
@@ -608,7 +603,7 @@ class MutalyzerService(ServiceBase):
         result.valid = bool(parsetree)
 
         output.addMessage(__file__, -1, "INFO",
-                          "Finished processing checkSyntax(%s)" % (variant))
+            "Finished processing checkSyntax(%s)" % (variant))
 
         result.messages = []
         for message in output.getMessages():
@@ -620,7 +615,7 @@ class MutalyzerService(ServiceBase):
         return result
     #checkSyntax
 
-    @srpc(Mandatory.String, _returns = MutalyzerOutput)
+    @srpc(Mandatory.String, _returns=MutalyzerOutput)
     def runMutalyzer(variant) :
         """
         Run the Mutalyzer name checker.
@@ -664,7 +659,7 @@ class MutalyzerService(ServiceBase):
         """
         O = Output(__file__)
         O.addMessage(__file__, -1, "INFO",
-                     "Received request runMutalyzer(%s)" % (variant))
+            "Received request runMutalyzer(%s)" % (variant))
         variantchecker.check_variant(variant, O)
 
         result = MutalyzerOutput()
@@ -713,7 +708,7 @@ class MutalyzerService(ServiceBase):
         result.errors, result.warnings, result.summary = O.Summary()
 
         O.addMessage(__file__, -1, "INFO",
-                     "Finished processing runMutalyzer(%s)" % (variant))
+            "Finished processing runMutalyzer(%s)" % (variant))
 
         result.messages = []
         for message in O.getMessages():
@@ -725,7 +720,7 @@ class MutalyzerService(ServiceBase):
         return result
     #runMutalyzer
 
-    @srpc(Mandatory.String, Mandatory.String, _returns = TranscriptNameInfo)
+    @srpc(Mandatory.String, Mandatory.String, _returns=TranscriptNameInfo)
     def getGeneAndTranscript(genomicReference, transcriptReference) :
         """
         Todo: documentation.
@@ -734,8 +729,8 @@ class MutalyzerService(ServiceBase):
         D = Db.Cache()
 
         O.addMessage(__file__, -1, "INFO",
-            "Received request getGeneAndTranscript(%s, %s)" % (genomicReference,
-            transcriptReference))
+            "Received request getGeneAndTranscript(%s, %s)" % (
+            genomicReference, transcriptReference))
         retriever = Retriever.GenBankRetriever(O, D)
         record = retriever.loadrecord(genomicReference)
 
@@ -758,7 +753,7 @@ class MutalyzerService(ServiceBase):
         return ret
     #getGeneAndTranscript
 
-    @srpc(Mandatory.String, String, _returns = Array(TranscriptInfo))
+    @srpc(Mandatory.String, String, _returns=Array(TranscriptInfo))
     def getTranscriptsAndInfo(genomicReference, geneName=None):
         """
         Given a genomic reference, return all its transcripts with their
@@ -806,7 +801,8 @@ class MutalyzerService(ServiceBase):
         D = Db.Cache()
 
         O.addMessage(__file__, -1, "INFO",
-            "Received request getTranscriptsAndInfo(%s, %s)" % (genomicReference, geneName))
+            "Received request getTranscriptsAndInfo(%s, %s)" % (
+            genomicReference, geneName))
         retriever = Retriever.GenBankRetriever(O, D)
         record = retriever.loadrecord(genomicReference)
 
@@ -835,7 +831,8 @@ class MutalyzerService(ServiceBase):
                 # Some raw info we don't use directly:
                 # - transcript.CDS.location        CDS start and stop (g)
                 # - transcript.CDS.positionList:   CDS splice sites (g) ?
-                # - transcript.mRNA.location:      translation start and stop (g)
+                # - transcript.mRNA.location:      translation start and stop
+                #                                  (g)
                 # - transcript.mRNA.positionList:  splice sites (g)
 
                 t.exons = []
@@ -843,10 +840,12 @@ class MutalyzerService(ServiceBase):
                     exon = ExonInfo()
                     exon.gStart = transcript.CM.getSpliceSite(i)
                     exon.cStart = transcript.CM.g2c(exon.gStart)
-                    exon.chromStart = GenRecordInstance.record.toChromPos(exon.gStart)
+                    exon.chromStart = GenRecordInstance.record.toChromPos(
+                        exon.gStart)
                     exon.gStop = transcript.CM.getSpliceSite(i + 1)
                     exon.cStop = transcript.CM.g2c(exon.gStop)
-                    exon.chromStop = GenRecordInstance.record.toChromPos(exon.gStop)
+                    exon.chromStop = GenRecordInstance.record.toChromPos(
+                        exon.gStop)
                     t.exons.append(exon)
 
                 # Beware that CM.info() gives a made-up value for trans_end,
@@ -854,12 +853,14 @@ class MutalyzerService(ServiceBase):
                 # it in our output and use the end position of the last exon
                 # instead. The made-up value is still useful for sorting, so
                 # we return it as sortableTransEnd.
-                trans_start, sortable_trans_end, cds_stop = transcript.CM.info()
+                trans_start, sortable_trans_end, cds_stop = \
+                    transcript.CM.info()
                 cds_start = 1
 
                 t.cTransEnd = str(t.exons[-1].cStop)
                 t.gTransEnd = t.exons[-1].gStop
-                t.chromTransEnd = GenRecordInstance.record.toChromPos(t.gTransEnd)
+                t.chromTransEnd = GenRecordInstance.record.toChromPos(
+                    t.gTransEnd)
                 t.sortableTransEnd = sortable_trans_end
 
                 # Todo: If we have no CDS info, CM.info() gives trans_end as
@@ -872,10 +873,12 @@ class MutalyzerService(ServiceBase):
                 t.product = transcript.transcriptProduct
                 t.cTransStart = str(trans_start)
                 t.gTransStart = transcript.CM.x2g(trans_start, 0)
-                t.chromTransStart = GenRecordInstance.record.toChromPos(t.gTransStart)
+                t.chromTransStart = GenRecordInstance.record.toChromPos(
+                    t.gTransStart)
                 t.cCDSStart = str(cds_start)
                 t.gCDSStart = transcript.CM.x2g(cds_start, 0)
-                t.chromCDSStart = GenRecordInstance.record.toChromPos(t.gCDSStart)
+                t.chromCDSStart = GenRecordInstance.record.toChromPos(
+                    t.gCDSStart)
                 t.cCDSStop = str(cds_stop)
                 t.gCDSStop = transcript.CM.x2g(cds_stop, 0)
                 t.chromCDSStop = GenRecordInstance.record.toChromPos(t.gCDSStop)
@@ -899,7 +902,7 @@ class MutalyzerService(ServiceBase):
         return transcripts
     #getTranscriptsAndInfo
 
-    @srpc(Mandatory.String, _returns = Mandatory.String)
+    @srpc(Mandatory.String, _returns=Mandatory.String)
     def upLoadGenBankLocalFile(content) :
         """
         Not implemented yet.
@@ -907,7 +910,7 @@ class MutalyzerService(ServiceBase):
         raise Fault('ENOTIMPLEMENTED', 'Not implemented yet')
     #upLoadGenBankLocalFile
 
-    @srpc(Mandatory.String, _returns = Mandatory.String)
+    @srpc(Mandatory.String, _returns=Mandatory.String)
     def upLoadGenBankRemoteFile(url) :
         """
         Not implemented yet.
@@ -916,7 +919,7 @@ class MutalyzerService(ServiceBase):
     #upLoadGenBankRemoteFile
 
     @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer,
-        Mandatory.Integer, _returns = Mandatory.String)
+        Mandatory.Integer, _returns=Mandatory.String)
     def sliceChromosomeByGene(geneSymbol, organism, upStream,
         downStream) :
         """
@@ -946,7 +949,7 @@ class MutalyzerService(ServiceBase):
     #sliceChromosomeByGene
 
     @srpc(Mandatory.String, Mandatory.Integer, Mandatory.Integer,
-        Mandatory.Integer, _returns = Mandatory.String)
+        Mandatory.Integer, _returns=Mandatory.String)
     def sliceChromosome(chromAccNo, start, end, orientation) :
         """
         Todo: documentation, error handling, argument checking, tests.
@@ -972,7 +975,7 @@ class MutalyzerService(ServiceBase):
         return UD
     #sliceChromosome
 
-    @srpc(_returns = InfoOutput)
+    @srpc(_returns=InfoOutput)
     def info():
         """
         Gives some static application information, such as the current running
@@ -1010,7 +1013,7 @@ class MutalyzerService(ServiceBase):
         return result
     #info
 
-    @srpc(_returns = Mandatory.String)
+    @srpc(_returns=Mandatory.String)
     def ping():
         """
         Simple function to test the interface.
@@ -1021,7 +1024,28 @@ class MutalyzerService(ServiceBase):
         return 'pong'
     #ping
 
-    @srpc(DateTime, _returns = Array(CacheEntry))
+    @srpc(Mandatory.String, Mandatory.String, _returns=Allele)
+    def descriptionExtract(reference, observed):
+        """
+        Extract the HGVS variant description from a reference sequence and an
+        observed sequence.
+        """
+        output = Output(__file__)
+
+        output.addMessage(__file__, -1, 'INFO',
+            'Received request descriptionExtract')
+
+        result = Allele()
+        result.allele = describe.describeDNA(reference, observed)
+        result.description = describe.alleleDescription(result.allele)
+
+        output.addMessage(__file__, -1, 'INFO',
+            'Finished processing descriptionExtract')
+
+        return result
+    #descriptionExtract
+
+    @srpc(DateTime, _returns=Array(CacheEntry))
     def getCache(created_since=None):
         """
         Get a list of entries from the local cache created since given date.
@@ -1031,8 +1055,7 @@ class MutalyzerService(ServiceBase):
         """
         output = Output(__file__)
 
-        output.addMessage(__file__, -1, 'INFO',
-                          'Received request getCache')
+        output.addMessage(__file__, -1, 'INFO', 'Received request getCache')
 
         database = Db.Cache()
         sync = CacheSync(output, database)
@@ -1042,18 +1065,17 @@ class MutalyzerService(ServiceBase):
         def cache_entry_to_soap(entry):
             e = CacheEntry()
             for attr in ('name', 'gi', 'hash', 'chromosomeName',
-                         'chromosomeStart', 'chromosomeStop',
-                         'chromosomeOrientation', 'url', 'created', 'cached'):
+                'chromosomeStart', 'chromosomeStop', 'chromosomeOrientation',
+                'url', 'created', 'cached'):
                 setattr(e, attr, entry[attr])
             return e
 
-        output.addMessage(__file__, -1, 'INFO',
-                          'Finished processing getCache')
+        output.addMessage(__file__, -1, 'INFO', 'Finished processing getCache')
 
         return map(cache_entry_to_soap, cache)
     #getCache
 
-    @srpc(Mandatory.String, _returns = Array(Mandatory.String))
+    @srpc(Mandatory.String, _returns=Array(Mandatory.String))
     def getdbSNPDescriptions(rs_id):
         """
         Lookup HGVS descriptions for a dbSNP rs identifier.
@@ -1067,19 +1089,19 @@ class MutalyzerService(ServiceBase):
         output = Output(__file__)
 
         output.addMessage(__file__, -1, 'INFO',
-                          'Received request getdbSNPDescription(%s)' % rs_id)
+            'Received request getdbSNPDescription(%s)' % rs_id)
 
         retriever = Retriever.Retriever(output, None)
         descriptions = retriever.snpConvert(rs_id)
 
         output.addMessage(__file__, -1, 'INFO',
-                          'Finished processing getdbSNPDescription(%s)' % rs_id)
+            'Finished processing getdbSNPDescription(%s)' % rs_id)
 
         # Todo: use SOAP Fault object here (see Trac issue #41).
         messages = output.getMessages()
         if messages:
-            error = 'The request could not be completed\n' \
-                    + '\n'.join(map(lambda m: str(m), output.getMessages()))
+            error = 'The request could not be completed\n' + \
+                '\n'.join(map(lambda m: str(m), output.getMessages()))
             raise Exception(error)
 
         return descriptions
-- 
GitLab