From 3185a420b587a07b06df9894b6c2a7d137e9c220 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Mon, 24 Sep 2012 12:04:07 +0000
Subject: [PATCH] Maximum matrix size for description extractor

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@607 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
---
 mutalyzer/describe.py | 10 +++++++++-
 mutalyzer/website.py  | 16 ++++++++++------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py
index 0af60af3..3ce67751 100644
--- a/mutalyzer/describe.py
+++ b/mutalyzer/describe.py
@@ -16,6 +16,11 @@ from mutalyzer.util import longest_common_prefix, longest_common_suffix
 from mutalyzer.util import palinsnoop, roll
 from mutalyzer import models
 
+
+# Maximum size of the LCS matrix
+MAX_MATRIX_SIZE = 8000000
+
+
 class LCS(object):
     """
     Class that calculates a Longest Common Substring matrix once and provides
@@ -35,7 +40,7 @@ class LCS(object):
         @arg lcp: The length of the longest common prefix of {s1} and {s2}.
         @type lcp: int
         @arg s1_end: End of the substring in {s1}.
-        @type s1_end: 
+        @type s1_end:
         @arg s2_end: End of the substring in {s2}.
         @type s2_end: int
         @arg DNA:
@@ -806,6 +811,9 @@ def describe(original, mutated, DNA=True):
     s1_end = len(s1) - lcs
     s2_end = len(s2) - lcs
 
+    if (s1_end - lcp) * (s2_end - lcp) > MAX_MATRIX_SIZE:
+        return
+
     if not DNA:
         M = LCS(s1, s2, lcp, s1_end, s2_end)
         return protein_description(M, s1, s2, lcp, s1_end, lcp, s2_end)
diff --git a/mutalyzer/website.py b/mutalyzer/website.py
index f7c9084d..5b223528 100644
--- a/mutalyzer/website.py
+++ b/mutalyzer/website.py
@@ -831,13 +831,17 @@ class Check:
                 chromosome=raw_variants[0], start=min(positions) - 10,
                 stop=max(positions) + 10, bed_file=urllib.quote(bed_url))
 
-        extracted = describe.alleleDescription(
-            describe.describe(output.getIndexedOutput("original", 0),
-            output.getIndexedOutput("mutated", 0)))
+        allele = describe.describe(output.getIndexedOutput("original", 0),
+                                   output.getIndexedOutput("mutated", 0))
+        prot_allele = describe.describe(output.getIndexedOutput("oldprotein", 0),
+                                        output.getIndexedOutput("newprotein", 0, default=""), DNA=False)
 
-        extractedProt = describe.alleleDescription(
-            describe.describe(output.getIndexedOutput("oldprotein", 0),
-            output.getIndexedOutput("newprotein", 0, default=""), DNA=False))
+        extracted = extractedProt = '(skipped)'
+
+        if allele:
+            extracted = describe.alleleDescription(allele)
+        if prot_allele:
+            extractedProt = describe.alleleDescription(prot_allele)
 
 
         # Todo: Generate the fancy HTML views for the proteins here instead
-- 
GitLab