From ee390387df77fcc24bf4beeb439d240a653763aa Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Sun, 31 May 2015 17:34:45 +0200
Subject: [PATCH] Configurable maximum input length for description extractor

Adds a `EXTRACTOR_MAX_INPUT_LENGTH` configuration setting, defaulting
to 50 Kbp.
---
 mutalyzer/config/default_settings.py          |  3 ++
 mutalyzer/services/rpc.py                     |  7 +++--
 .../templates/description-extractor.html      |  6 ++--
 mutalyzer/website/views.py                    | 10 +++++--
 tests/test_services_json.py                   | 19 ++++++++++++
 tests/test_website.py                         | 29 +++++++++++++++++++
 6 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/mutalyzer/config/default_settings.py b/mutalyzer/config/default_settings.py
index 00dc9b2e..c01d47a5 100644
--- a/mutalyzer/config/default_settings.py
+++ b/mutalyzer/config/default_settings.py
@@ -25,6 +25,9 @@ CACHE_DIR = '/tmp'
 # Maximum size for uploaded and downloaded files (in bytes).
 MAX_FILE_SIZE = 10 * 1048576 # 10 MB
 
+# Maximum sequence length for description extractor (in bases).
+EXTRACTOR_MAX_INPUT_LENGTH = 50 * 1000 # 50 Kbp
+
 # The WSGI application runs behind a reverse proxy (e.g., nginx using
 # proxy_pass). This needs to be set if the application is mapped to a URL
 # other than / or a different HTTP scheme is used by the reverse proxy.
diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py
index 6fce0a8f..d0315fd1 100644
--- a/mutalyzer/services/rpc.py
+++ b/mutalyzer/services/rpc.py
@@ -1242,10 +1242,11 @@ class MutalyzerService(ServiceBase):
 
         stats.increment_counter('description-extractor/webservice')
 
-        if not settings.TESTING and (len(reference) > 1000 or
-                                     len(observed) > 1000):
+        if (len(reference) > settings.EXTRACTOR_MAX_INPUT_LENGTH or
+            len(observed) > settings.EXTRACTOR_MAX_INPUT_LENGTH):
             raise Fault('EMAXSIZE',
-                        'Input sequences are restricted to 1000bp.')
+                        'Input sequences are restricted to {} bp.'
+                        .format(settings.EXTRACTOR_MAX_INPUT_LENGTH))
 
         allele = extractor.describe_dna(reference, observed)
 
diff --git a/mutalyzer/website/templates/description-extractor.html b/mutalyzer/website/templates/description-extractor.html
index ec096feb..631de1d4 100644
--- a/mutalyzer/website/templates/description-extractor.html
+++ b/mutalyzer/website/templates/description-extractor.html
@@ -7,7 +7,7 @@
 
 <p class="alert alert-warning">
 Please note that this is an experimental service and we are currently limiting
-input sequences to 1000bp.
+input sequences to {{ '{:,}'.format(extractor_max_input_length) }} bp.
 </p>
 
 <p>
@@ -88,7 +88,7 @@ Please supply a reference sequence and an observed sequence.
           <div class="form-group">
             <label for="reference_accession_number">Reference accession number</label>
             <input type="text" name="reference_accession_number" id="reference_accession_number" value="{{ reference_accession_number }}" class="form-control form-pre" placeholder="Reference accession number">
-            <p>Example: <code class="example-input" data-for="reference_accession_number">NM_198697.1</code></p>
+            <p>Example: <code class="example-input" data-for="reference_accession_number">NM_004006.1</code></p>
           </div>
         </div>
       </div>
@@ -143,7 +143,7 @@ Please supply a reference sequence and an observed sequence.
           <div class="form-group">
             <label for="sample_accession_number">Sample accession number</label>
             <input type="text" name="sample_accession_number" id="sample_accession_number" value="{{ sample_accession_number }}" class="form-control form-pre" placeholder="Sample accession number">
-            <p>Example: <code class="example-input" data-for="sample_accession_number">NM_198697.2</code></p>
+            <p>Example: <code class="example-input" data-for="sample_accession_number">NM_004006.2</code></p>
           </div>
         </div>
       </div>
diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py
index 830781b6..bc0a339e 100644
--- a/mutalyzer/website/views.py
+++ b/mutalyzer/website/views.py
@@ -682,7 +682,8 @@ def description_extractor():
     """
     Description extractor loader form.
     """
-    return render_template('description-extractor.html')
+    return render_template('description-extractor.html',
+                           extractor_max_input_length=settings.EXTRACTOR_MAX_INPUT_LENGTH)
 
 
 @website.route('/description-extractor', methods=['POST'])
@@ -788,9 +789,11 @@ def description_extractor_submit():
 
     raw_vars = None
     if r and s:
-        if not settings.TESTING and (len(r) > 1000 or len(s) > 1000):
+        if (len(r) > settings.EXTRACTOR_MAX_INPUT_LENGTH or
+            len(s) > settings.EXTRACTOR_MAX_INPUT_LENGTH):
             output.addMessage(__file__, 3, 'EMAXSIZE',
-                              'Input sequences are restricted to 1000bp.')
+                              'Input sequences are restricted to {:,} bp.'
+                              .format(settings.EXTRACTOR_MAX_INPUT_LENGTH))
         else:
             raw_vars = extractor.describe_dna(r, s)
 
@@ -801,6 +804,7 @@ def description_extractor_submit():
                       'Finished Description Extract request')
 
     return render_template('description-extractor.html',
+        extractor_max_input_length=settings.EXTRACTOR_MAX_INPUT_LENGTH,
         reference_sequence=reference_sequence or '',
         sample_sequence=sample_sequence or '',
         reference_accession_number=reference_accession_number or '',
diff --git a/tests/test_services_json.py b/tests/test_services_json.py
index 843ca8ea..259fad6b 100644
--- a/tests/test_services_json.py
+++ b/tests/test_services_json.py
@@ -12,6 +12,7 @@ from spyne.server.null import NullServer
 
 import mutalyzer
 from mutalyzer import announce
+from mutalyzer.config import settings
 from mutalyzer import Scheduler
 from mutalyzer.services.json import application
 
@@ -298,3 +299,21 @@ class TestServicesJson(MutalyzerTest):
                                  'sample_start_offset': 0,
                                  'sample_end_offset': 0}],
                      'description': '[5_6insTT;17del;26A>C;35dup]'}
+
+    def test_description_extract_ref_too_long(self):
+        """
+        Test output of descriptionExtract with too long reference sequence.
+        """
+        with pytest.raises(Fault):
+            self._call('descriptionExtract',
+                       'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1),
+                       'A')
+
+    def test_description_extract_sample_too_long(self):
+        """
+        Test output of descriptionExtract with too long sample sequence.
+        """
+        with pytest.raises(Fault):
+            self._call('descriptionExtract',
+                       'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH),
+                       'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1))
diff --git a/tests/test_website.py b/tests/test_website.py
index bb3ff5af..3392d1fa 100644
--- a/tests/test_website.py
+++ b/tests/test_website.py
@@ -17,6 +17,7 @@ from Bio import Entrez
 import lxml.html
 
 from mutalyzer import announce, Scheduler
+from mutalyzer.config import settings
 from mutalyzer.db import models
 from mutalyzer.website import create_app
 
@@ -178,6 +179,34 @@ class TestWebsite(MutalyzerTest):
             'sample_file': (open(path), 'extractor_input.txt')})
         assert '[5_6insTT;17del;26A&gt;C;35dup]' in r.data
 
+    def test_description_extractor_ref_too_long(self):
+        """
+        Submit a reference sequence exceeding the maximum length to the variant
+        description extractor.
+        """
+        r = self.app.post('/description-extractor', data={
+            'reference_method': 'raw_method',
+            'sample_method': 'raw_method',
+            'reference_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1),
+            'sample_sequence': 'A'})
+        assert '2_{}del'.format(settings.EXTRACTOR_MAX_INPUT_LENGTH + 1) not in r.data
+        assert 'Input sequences are restricted to ' in r.data
+        assert '1 Error, 0 Warnings.' in r.data
+
+    def test_description_extractor_sample_too_long(self):
+        """
+        Submit a sample sequence exceeding the maximum length to the variant
+        description extractor.
+        """
+        r = self.app.post('/description-extractor', data={
+            'reference_method': 'raw_method',
+            'sample_method': 'raw_method',
+            'reference_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH),
+            'sample_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1)})
+        assert '{}dup'.format(settings.EXTRACTOR_MAX_INPUT_LENGTH) not in r.data
+        assert 'Input sequences are restricted to ' in r.data
+        assert '1 Error, 0 Warnings.' in r.data
+
     def test_checksyntax_valid(self):
         """
         Submit the check syntax form with a valid variant.
-- 
GitLab