From 15f74803b9841018751d06a45bf8230ff21e0c13 Mon Sep 17 00:00:00 2001 From: "J.F.J. Laros" <j.f.j.laros@lumc.nl> Date: Sat, 18 Feb 2012 21:23:18 +0000 Subject: [PATCH] Added a standardised description length to the describe module. Added simple error handling for the description extractor. git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@480 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- mutalyzer/describe.py | 65 +++++++++++++++------ mutalyzer/templates/descriptionExtract.html | 53 +++++++++-------- mutalyzer/variantchecker.py | 4 +- mutalyzer/website.py | 16 ++++- 4 files changed, 94 insertions(+), 44 deletions(-) diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py index 446817fb..4198444f 100644 --- a/mutalyzer/describe.py +++ b/mutalyzer/describe.py @@ -11,10 +11,6 @@ import Bio.Seq from mutalyzer.util import longest_common_prefix, longest_common_suffix from mutalyzer.util import palinsnoop, roll -def printMatrix(M) : - for i in M : - print i - def LCSMatrix(s1, s2) : """ """ @@ -22,7 +18,6 @@ def LCSMatrix(s1, s2) : y_max = len(s1) + 1 x_max = len(s2) + 1 M = [[0] * x_max for i in xrange(y_max)] - #printMatrix(M) for x in xrange(1, y_max) : for y in xrange(1, x_max) : @@ -51,7 +46,6 @@ def findMax(M, x1, x2, y1, y2) : for y in xrange(y1, y2) : y_relative = y - y1 realVal = min(x_relative, y_relative, M[x][y]) - #print realVal, if realVal > longest : longest = realVal @@ -59,7 +53,6 @@ def findMax(M, x1, x2, y1, y2) : y_longest = y_relative #if #for - #print #for return x_longest, y_longest, longest @@ -103,8 +96,6 @@ def LongestCommonSubstring(s1, s2) : #for #for - #print s1, s2 - #printMatrix(M) return x_longest, y_longest, longest #LongestCommonSubstring @@ -119,10 +110,6 @@ class RawVar() : Example: if {end} is initialised for a substitution, a range will be retuned, resulting in a description like: 100_100A>T """ - # TODO: We now use the length of the variant as a measure of its - # ``length'', but that makes it a bit context dependent, e.g., c.1A>T - # and g.100A>T may be the same variant giving different lengths. - # Maybe we should ignore the positions in a description. def __init__(self, start = 0, start_offset = 0, end = 0, end_offset = 0, type = "none", deleted = "", inserted = "", shift = 0) : @@ -188,6 +175,38 @@ class RawVar() : return descr + "%s>%s" % (self.deleted, self.inserted) #description + + def descriptionLength(self) : + """ + Give the standardised length of the HGVS description of the raw variant + stored in this class. + + Note that this function relies on the absence of values to make the + correct description. Also see the comment in the class definition. + + @returns: The standardised length of the HGVS description of the raw + variant stored in this class. + @rtype: int + """ + + if not self.start : # = + return 1 + + descrLen = 1 # Start position + + if self.end : # '_' and end position. + descrLen += 2 + + if self.type != "subst" : + descrLen += len(self.type) + + if self.inserted : + return descrLen + len(self.inserted) + return descrLen + #if + + return 4 # Start position, '>' and end position. + #descriptionLength #RawVar def alleleDescription(allele) : @@ -206,6 +225,20 @@ def alleleDescription(allele) : return allele[0].description() #alleleDescription +def alleleDescriptionLength(allele) : + """ + Calculate the standardised length of an HGVS allele description. + + @arg allele: A list of raw variants representing an allele description. + @type allele: list(RawVar) + + @returns: The standardised length of the HGVS description of {allele}. + @rval: int + """ + + return sum(map(lambda x : x.descriptionLength(), allele)) +#alleleDescriptionLength + def printpos(s, start, end, fill = 0) : """ For debugging purposes. @@ -299,13 +332,10 @@ def DNA_description(M, s1, s2, s1_start, s1_end, s2_start, s2_end) : # At this stage, we either have an inversion, an indel or a Compound # variant. - a, b, c = findMax(M, s1_start, s1_end, s2_start, s2_end) s1_end_f, s2_end_f, lcs_f_len = LongestCommonSubstring(s1[s1_start:s1_end], s2[s2_start:s2_end]) s1_end_r, s2_end_r, lcs_r_len = LongestCommonSubstring(s1[s1_start:s1_end], Bio.Seq.reverse_complement(s2[s2_start:s2_end])) - print "N:", a, b, c - print "O:", s1_end_f, s2_end_f, lcs_f_len # Palindrome snooping. trim = palinsnoop(s1[s1_start + s1_end_r - lcs_r_len:s1_start + s1_end_r]) @@ -370,7 +400,7 @@ def DNA_description(M, s1, s2, s1_start, s1_end, s2_start, s2_end) : s1_end - r2_len, s1_end, s2_end - m2_len, s2_end) #else - if len(alleleDescription(partial)) - 2 <= len(alleleDescription(default)) : + if alleleDescriptionLength(partial) <= alleleDescriptionLength(default) : return partial return default #DNA_description @@ -395,7 +425,6 @@ def describeDNA(original, mutated) : s1_end = len(s1) - lcs s2_end = len(s2) - lcs - #M = LCSMatrix(s1[lcp:s1_end], s2[lcp:s2_end]) M = LCSMatrix(s1, s2) return DNA_description(M, s1, s2, lcp, s1_end, lcp, s2_end) diff --git a/mutalyzer/templates/descriptionExtract.html b/mutalyzer/templates/descriptionExtract.html index 966ea7af..46292a2f 100644 --- a/mutalyzer/templates/descriptionExtract.html +++ b/mutalyzer/templates/descriptionExtract.html @@ -50,29 +50,36 @@ <div tal:condition = "description"> <br> <h3>Variant Description Extractor results:</h3> - <br> - <b>Genomic description:</b><br> - <br> - <tt>g.<div tal:replace = "description"></div></tt> - <br> - <br> - <br> - <b>Overview of the raw variants:</b><br> - <br> - <table class = "laTable"> - <tr> - <td>Start</td> - <td>End</td> - <td>Type</td> - <td>Deleted</td> - <td>Inserted</td> - <td>Shift</td> - <td>Description</td> - </tr> - <tr tal:repeat = "i visualisation"> - <td tal:repeat = "j i" tal:content = "j"></td> - </tr> - </table> + <div class="messages"> + <p tal:repeat = "m messages" tal:content = "m/description" + tal:attributes = "class m/class; title string:${m/level} (origin: ${m/origin})"></p> + <p tal:content = "summary"></p> + </div> + <div tal:condition = "not:errors"> + <br> + <b>Genomic description:</b><br> + <br> + <tt>g.<div tal:replace = "description"></div></tt> + <br> + <br> + <br> + <b>Overview of the raw variants:</b><br> + <br> + <table class = "laTable"> + <tr> + <td>Start</td> + <td>End</td> + <td>Type</td> + <td>Deleted</td> + <td>Inserted</td> + <td>Shift</td> + <td>Description</td> + </tr> + <tr tal:repeat = "i visualisation"> + <td tal:repeat = "j i" tal:content = "j"></td> + </tr> + </table> + </div> </div> <!-- description --> <br> </div> diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py index c7419873..d7e3472b 100644 --- a/mutalyzer/variantchecker.py +++ b/mutalyzer/variantchecker.py @@ -1502,8 +1502,8 @@ def check_variant(description, output): gene_symbol = parsed_description.Gene.GeneSymbol or '' transcript_id = parsed_description.Gene.TransVar or '' if parsed_description.Gene.ProtIso: - output.addMessage(__file__, 4, 'EPROT', 'Indexing by ' \ - 'protein isoform is not supported.') + output.addMessage(__file__, 4, 'EPROT', + 'Indexing by protein isoform is not supported.') retriever = Retriever.GenBankRetriever(output, database) retrieved_record = retriever.loadrecord(record_id) diff --git a/mutalyzer/website.py b/mutalyzer/website.py index 28c427a4..03a7ecc2 100644 --- a/mutalyzer/website.py +++ b/mutalyzer/website.py @@ -967,9 +967,19 @@ class DescriptionExtractor : output.addMessage(__file__, -1, 'INFO', "Received Description Extract request from %s" % IP) + # Move this to the describe module. + if not util.is_dna(referenceSeq) : + output.addMessage(__file__, 3, "ENODNA", + "Reference sequence is not DNA.") + if not util.is_dna(variantSeq) : + output.addMessage(__file__, 3, "ENODNA", + "Variant sequence is not DNA.") + result = describe.describeDNA(referenceSeq, variantSeq) description = describe.alleleDescription(result) + errors, warnings, summary = output.Summary() + visualisation = [] for i in result : visualisation.append([i.start, i.end, i.type, i.deleted, @@ -979,7 +989,11 @@ class DescriptionExtractor : 'lastReferenceSeq' : referenceSeq, 'lastVariantSeq' : variantSeq, 'description' : description, - 'visualisation' : visualisation + 'visualisation' : visualisation, + 'errors' : errors, + 'summary' : summary, + 'messages' : map(util.message_info, + output.getMessages()) } output.addMessage(__file__, -1, 'INFO', -- GitLab