diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py index 0af60af3b90c4f0890bb6fb05ae6c0f6e5f59363..3ce677514e3befa2573e3b5a18f52ba10da86af5 100644 --- a/mutalyzer/describe.py +++ b/mutalyzer/describe.py @@ -16,6 +16,11 @@ from mutalyzer.util import longest_common_prefix, longest_common_suffix from mutalyzer.util import palinsnoop, roll from mutalyzer import models + +# Maximum size of the LCS matrix +MAX_MATRIX_SIZE = 8000000 + + class LCS(object): """ Class that calculates a Longest Common Substring matrix once and provides @@ -35,7 +40,7 @@ class LCS(object): @arg lcp: The length of the longest common prefix of {s1} and {s2}. @type lcp: int @arg s1_end: End of the substring in {s1}. - @type s1_end: + @type s1_end: @arg s2_end: End of the substring in {s2}. @type s2_end: int @arg DNA: @@ -806,6 +811,9 @@ def describe(original, mutated, DNA=True): s1_end = len(s1) - lcs s2_end = len(s2) - lcs + if (s1_end - lcp) * (s2_end - lcp) > MAX_MATRIX_SIZE: + return + if not DNA: M = LCS(s1, s2, lcp, s1_end, s2_end) return protein_description(M, s1, s2, lcp, s1_end, lcp, s2_end) diff --git a/mutalyzer/website.py b/mutalyzer/website.py index f7c9084d715d5bbed5bab11d77f10a88d9a43045..5b2235289fc556e960e3166d40e4bf55fdb5d0a8 100644 --- a/mutalyzer/website.py +++ b/mutalyzer/website.py @@ -831,13 +831,17 @@ class Check: chromosome=raw_variants[0], start=min(positions) - 10, stop=max(positions) + 10, bed_file=urllib.quote(bed_url)) - extracted = describe.alleleDescription( - describe.describe(output.getIndexedOutput("original", 0), - output.getIndexedOutput("mutated", 0))) + allele = describe.describe(output.getIndexedOutput("original", 0), + output.getIndexedOutput("mutated", 0)) + prot_allele = describe.describe(output.getIndexedOutput("oldprotein", 0), + output.getIndexedOutput("newprotein", 0, default=""), DNA=False) - extractedProt = describe.alleleDescription( - describe.describe(output.getIndexedOutput("oldprotein", 0), - output.getIndexedOutput("newprotein", 0, default=""), DNA=False)) + extracted = extractedProt = '(skipped)' + + if allele: + extracted = describe.alleleDescription(allele) + if prot_allele: + extractedProt = describe.alleleDescription(prot_allele) # Todo: Generate the fancy HTML views for the proteins here instead