From 15f74803b9841018751d06a45bf8230ff21e0c13 Mon Sep 17 00:00:00 2001
From: "J.F.J. Laros" <j.f.j.laros@lumc.nl>
Date: Sat, 18 Feb 2012 21:23:18 +0000
Subject: [PATCH] Added a standardised description length to the describe
 module.

Added simple error handling for the description extractor.



git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@480 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
---
 mutalyzer/describe.py                       | 65 +++++++++++++++------
 mutalyzer/templates/descriptionExtract.html | 53 +++++++++--------
 mutalyzer/variantchecker.py                 |  4 +-
 mutalyzer/website.py                        | 16 ++++-
 4 files changed, 94 insertions(+), 44 deletions(-)

diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py
index 446817fb..4198444f 100644
--- a/mutalyzer/describe.py
+++ b/mutalyzer/describe.py
@@ -11,10 +11,6 @@ import Bio.Seq
 from mutalyzer.util import longest_common_prefix, longest_common_suffix
 from mutalyzer.util import palinsnoop, roll
 
-def printMatrix(M) :
-    for i in M :
-        print i
-
 def LCSMatrix(s1, s2) :
     """
     """
@@ -22,7 +18,6 @@ def LCSMatrix(s1, s2) :
     y_max = len(s1) + 1
     x_max = len(s2) + 1
     M = [[0] * x_max for i in xrange(y_max)]
-    #printMatrix(M)
 
     for x in xrange(1, y_max) :
         for y in xrange(1, x_max) :
@@ -51,7 +46,6 @@ def findMax(M, x1, x2, y1, y2) :
         for y in xrange(y1, y2) :
             y_relative = y - y1
             realVal = min(x_relative, y_relative, M[x][y])
-            #print realVal,
 
             if realVal > longest :
                 longest = realVal
@@ -59,7 +53,6 @@ def findMax(M, x1, x2, y1, y2) :
                 y_longest = y_relative
             #if
         #for
-        #print
     #for
 
     return x_longest, y_longest, longest
@@ -103,8 +96,6 @@ def LongestCommonSubstring(s1, s2) :
         #for
     #for
 
-    #print s1, s2
-    #printMatrix(M)
     return x_longest, y_longest, longest
 #LongestCommonSubstring
 
@@ -119,10 +110,6 @@ class RawVar() :
     Example: if {end} is initialised for a substitution, a range will be
       retuned, resulting in a description like: 100_100A>T
     """
-    # TODO: We now use the length of the variant as a measure of its
-    #       ``length'', but that makes it a bit context dependent, e.g., c.1A>T
-    #       and g.100A>T may be the same variant giving different lengths.
-    #       Maybe we should ignore the positions in a description. 
 
     def __init__(self, start = 0, start_offset = 0, end = 0, end_offset = 0,
         type = "none", deleted = "", inserted = "", shift = 0) :
@@ -188,6 +175,38 @@ class RawVar() :
 
         return descr + "%s>%s" % (self.deleted, self.inserted)
     #description
+
+    def descriptionLength(self) :
+        """
+        Give the standardised length of the HGVS description of the raw variant
+        stored in this class.
+
+        Note that this function relies on the absence of values to make the
+        correct description. Also see the comment in the class definition.
+
+        @returns: The standardised length of the HGVS description of the raw
+            variant stored in this class.
+        @rtype: int
+        """
+
+        if not self.start : # =
+            return 1
+
+        descrLen = 1 # Start position
+
+        if self.end : # '_' and end position.
+            descrLen += 2
+
+        if self.type != "subst" :
+            descrLen += len(self.type)
+
+            if self.inserted :
+                return descrLen + len(self.inserted)
+            return descrLen
+        #if
+
+        return 4 # Start position, '>' and end position.
+    #descriptionLength
 #RawVar
 
 def alleleDescription(allele) :
@@ -206,6 +225,20 @@ def alleleDescription(allele) :
     return allele[0].description()
 #alleleDescription
 
+def alleleDescriptionLength(allele) :
+    """
+    Calculate the standardised length of an HGVS allele description.
+
+    @arg allele: A list of raw variants representing an allele description.
+    @type allele: list(RawVar)
+
+    @returns: The standardised length of the HGVS description of {allele}.
+    @rval: int
+    """
+
+    return sum(map(lambda x : x.descriptionLength(), allele))
+#alleleDescriptionLength
+
 def printpos(s, start, end, fill = 0) :
     """
     For debugging purposes.
@@ -299,13 +332,10 @@ def DNA_description(M, s1, s2, s1_start, s1_end, s2_start, s2_end) :
 
     # At this stage, we either have an inversion, an indel or a Compound
     # variant.
-    a, b, c = findMax(M, s1_start, s1_end, s2_start, s2_end)
     s1_end_f, s2_end_f, lcs_f_len = LongestCommonSubstring(s1[s1_start:s1_end],
         s2[s2_start:s2_end])
     s1_end_r, s2_end_r, lcs_r_len = LongestCommonSubstring(s1[s1_start:s1_end],
         Bio.Seq.reverse_complement(s2[s2_start:s2_end]))
-    print "N:", a, b, c
-    print "O:", s1_end_f, s2_end_f, lcs_f_len
 
     # Palindrome snooping.
     trim = palinsnoop(s1[s1_start + s1_end_r - lcs_r_len:s1_start + s1_end_r])
@@ -370,7 +400,7 @@ def DNA_description(M, s1, s2, s1_start, s1_end, s2_start, s2_end) :
             s1_end - r2_len, s1_end, s2_end - m2_len, s2_end)
     #else
 
-    if len(alleleDescription(partial)) - 2 <= len(alleleDescription(default)) :
+    if alleleDescriptionLength(partial) <= alleleDescriptionLength(default) :
         return partial
     return default
 #DNA_description
@@ -395,7 +425,6 @@ def describeDNA(original, mutated) :
     s1_end = len(s1) - lcs
     s2_end = len(s2) - lcs
     
-    #M = LCSMatrix(s1[lcp:s1_end], s2[lcp:s2_end])
     M = LCSMatrix(s1, s2)
 
     return DNA_description(M, s1, s2, lcp, s1_end, lcp, s2_end)
diff --git a/mutalyzer/templates/descriptionExtract.html b/mutalyzer/templates/descriptionExtract.html
index 966ea7af..46292a2f 100644
--- a/mutalyzer/templates/descriptionExtract.html
+++ b/mutalyzer/templates/descriptionExtract.html
@@ -50,29 +50,36 @@
       <div tal:condition = "description">
         <br>
         <h3>Variant Description Extractor results:</h3>
-        <br>
-        <b>Genomic description:</b><br>
-        <br>
-        <tt>g.<div tal:replace = "description"></div></tt>
-        <br>
-        <br>
-        <br>
-        <b>Overview of the raw variants:</b><br>
-        <br>
-        <table class = "laTable">
-          <tr>
-            <td>Start</td>
-            <td>End</td>
-            <td>Type</td>
-            <td>Deleted</td>
-            <td>Inserted</td>
-            <td>Shift</td>
-            <td>Description</td>
-          </tr>
-          <tr tal:repeat = "i visualisation">
-            <td tal:repeat = "j i" tal:content = "j"></td>
-          </tr>
-        </table>
+        <div class="messages">
+          <p tal:repeat = "m messages" tal:content = "m/description"
+            tal:attributes = "class m/class; title string:${m/level} (origin: ${m/origin})"></p>
+          <p tal:content = "summary"></p>
+        </div>
+        <div tal:condition = "not:errors">
+          <br>
+          <b>Genomic description:</b><br>
+          <br>
+          <tt>g.<div tal:replace = "description"></div></tt>
+          <br>
+          <br>
+          <br>
+          <b>Overview of the raw variants:</b><br>
+          <br>
+          <table class = "laTable">
+            <tr>
+              <td>Start</td>
+              <td>End</td>
+              <td>Type</td>
+              <td>Deleted</td>
+              <td>Inserted</td>
+              <td>Shift</td>
+              <td>Description</td>
+            </tr>
+            <tr tal:repeat = "i visualisation">
+              <td tal:repeat = "j i" tal:content = "j"></td>
+            </tr>
+          </table>
+        </div>
       </div> <!-- description -->
       <br>
     </div>
diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py
index c7419873..d7e3472b 100644
--- a/mutalyzer/variantchecker.py
+++ b/mutalyzer/variantchecker.py
@@ -1502,8 +1502,8 @@ def check_variant(description, output):
             gene_symbol = parsed_description.Gene.GeneSymbol or ''
             transcript_id = parsed_description.Gene.TransVar or ''
             if parsed_description.Gene.ProtIso:
-                output.addMessage(__file__, 4, 'EPROT', 'Indexing by ' \
-                                  'protein isoform is not supported.')
+                output.addMessage(__file__, 4, 'EPROT',
+                    'Indexing by protein isoform is not supported.')
         retriever = Retriever.GenBankRetriever(output, database)
 
     retrieved_record = retriever.loadrecord(record_id)
diff --git a/mutalyzer/website.py b/mutalyzer/website.py
index 28c427a4..03a7ecc2 100644
--- a/mutalyzer/website.py
+++ b/mutalyzer/website.py
@@ -967,9 +967,19 @@ class DescriptionExtractor :
         output.addMessage(__file__, -1, 'INFO',
             "Received Description Extract request from %s" % IP)
 
+        # Move this to the describe module.
+        if not util.is_dna(referenceSeq) :
+            output.addMessage(__file__, 3, "ENODNA",
+                "Reference sequence is not DNA.")
+        if not util.is_dna(variantSeq) :
+            output.addMessage(__file__, 3, "ENODNA",
+                "Variant sequence is not DNA.")
+
         result = describe.describeDNA(referenceSeq, variantSeq)
         description = describe.alleleDescription(result)
 
+        errors, warnings, summary = output.Summary()
+
         visualisation = []
         for i in result :
             visualisation.append([i.start, i.end, i.type, i.deleted,
@@ -979,7 +989,11 @@ class DescriptionExtractor :
             'lastReferenceSeq' : referenceSeq,
             'lastVariantSeq'   : variantSeq,
             'description'      : description,
-            'visualisation'    : visualisation
+            'visualisation'    : visualisation,
+            'errors'           : errors,
+            'summary'          : summary,
+            'messages'         : map(util.message_info,
+                output.getMessages())
         }
 
         output.addMessage(__file__, -1, 'INFO',
-- 
GitLab