From b7c8fddd3339ec65e953a569055f81ea7498b79e Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Mon, 24 Nov 2014 12:05:11 +0100 Subject: [PATCH] Check batch job input field length --- mutalyzer/File.py | 29 ++++++++++++++++++--- mutalyzer/website/templates/batch-jobs.html | 5 ++-- tests/test_scheduler.py | 17 ++++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/mutalyzer/File.py b/mutalyzer/File.py index 1212d310..bbe3c2c4 100644 --- a/mutalyzer/File.py +++ b/mutalyzer/File.py @@ -292,6 +292,7 @@ class File() : ret = [] notthree = [] emptyfield = [] + toolong = [] for line, job in jobl[1:]: #Empty line @@ -319,7 +320,10 @@ class File() : inputl = "~!InputFields: " #start with the skip flag inputl+= "|".join(job) - ret.append(inputl) + if len(inputl) > 200: + toolong.append(line) + else: + ret.append(inputl) #for #Create output Message for incompatible fields @@ -335,7 +339,13 @@ class File() : "The first and last column can't be left empty in " "%i line(s): %s.\n" % (len(emptyfield), lines)) - errlist = notthree + emptyfield + if any(toolong): + lines = makeList(toolong, 10) + self.__output.addMessage(__file__, 3, "EBPARSE", + "Batch input field too long in %i line(s): %s.\n" % + (len(toolong), lines)) + + errlist = notthree + emptyfield + toolong #if else: #No Header, possibly a new BatchType @@ -353,12 +363,23 @@ class File() : "line(s): %s" % (len(errlist), makeList(errlist))) + toolong = [line for line, row in jobl + if any(len(col) > 200 for col in row)] + if any(toolong): + self.__output.addMessage(__file__, 3, "EBPARSE", + "Batch input field too long in %i line(s): %s" % + (len(toolong), makeList(toolong))) + ret = [] for line, job in jobl: if not any(job): #Empty line ret.extend(['~!' for _ in range(columns)]) continue - if line in errlist: + if line in toolong: + #Trim too long + ret.append("~!InputFields: " + ('|'.join(job))[:180] + '...') + ret.extend(['~!' for _ in range(columns - 1)]) + elif line in errlist: #Dirty Escape BatchEntries ret.append("~!InputFields: " + '|'.join(job)) ret.extend(['~!' for _ in range(columns - 1)]) @@ -476,7 +497,7 @@ def makeList(l, maxlen=10): @return: a list converted to a string with comma's and spaces @rtype: unicode """ - ret = ", ".join(i for i in l[:maxlen]) + ret = ", ".join(str(i) for i in l[:maxlen]) if len(l)>maxlen: return ret+", ..." else: diff --git a/mutalyzer/website/templates/batch-jobs.html b/mutalyzer/website/templates/batch-jobs.html index 0702c382..2029a828 100644 --- a/mutalyzer/website/templates/batch-jobs.html +++ b/mutalyzer/website/templates/batch-jobs.html @@ -15,13 +15,14 @@ </p> <div id='help' style="display:none"> - <p>The mutalyzer batch checker accepts the following file formats + <p>The mutalyzer batch checker accepts the following file formats: <ul> <li>Tab delimited text file / CSV file</li> <li>Microsoft Excel file</li> <li>OpenOffice ODS file</li> </ul> - and the maximum size is <span tal:content = "maxSize"></span> megabytes. + The maximum file size is {{ max_file_size }} megabytes, and the maximum + length per entry (variant description) is 200 characters. </p> <h5>We accept two types of input files, you can download examples below</h5> <h5>New Style <a href="{{ url_for('.downloads', filename='batchtestnew.txt') }}">Download Example File</a></h5> diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 791f867d..c118bfd7 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -71,6 +71,23 @@ class TestScheduler(MutalyzerTest): 'OK']] self._batch_job_plain_text(variants, expected, 'syntax-checker') + def test_large_input(self): + """ + Simple batch job with large input. + """ + variants = ['chr13:g.114503915delCACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCCG' + 'TATCTACACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCTATATCTACACCTGAG' + 'GGAGGTGinsTGCCTGCGGGAGGTGAGGGGCGCTGGGGACCCCCGTATCTACACC' + 'TGCGGGAGGTGAGGGGCGCTGGGGACCCCTATATCTACACCTGAGGGAGGTG'] + + expected = [['InputFields: chr13:g.114503915delCACCTGCGGGAGGTGAGGGGC' + 'GCTGGGGACCCCCGTATCTACACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCT' + 'ATATCTACACCTGAGGGAGGTGinsTGCCTGCGGGAGGTGAGGGGCGCTGGGGA' + 'CCCCCGTATCTACACCTGCGGGAGGTGAGGG...', + '(Scheduler): Entry could not be formatted correctly, ' + 'check batch input file help for details']] + self._batch_job_plain_text(variants, expected, 'syntax-checker') + @fix(cache('AB026906.1', 'NM_000059.3')) def test_name_checker(self): """ -- GitLab