From 666299144f811c4157ba2994dd7409956ef3dafa Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Mon, 20 Oct 2014 11:24:21 +0200 Subject: [PATCH] Unit tests for unicode strings --- mutalyzer/website/views.py | 20 ++++---- requirements.txt | 1 + tests/test_scheduler.py | 17 ++++++- tests/test_services_json.py | 43 +++++++++++++++++ tests/test_services_soap.py | 42 +++++++++++++++++ tests/test_website.py | 94 ++++++++++++++++++++++++++++++++++--- 6 files changed, 197 insertions(+), 20 deletions(-) diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py index f6f228b1..84b5cf85 100644 --- a/mutalyzer/website/views.py +++ b/mutalyzer/website/views.py @@ -9,12 +9,11 @@ import bz2 import os import pkg_resources import re -from cStringIO import StringIO import urllib from flask import Blueprint -from flask import (abort, current_app, jsonify, make_response, redirect, - render_template, request, send_from_directory, url_for) +from flask import (abort, jsonify, make_response, redirect, render_template, + request, send_from_directory, url_for) import jinja2 from lxml import etree from spyne.server.http import HttpBase @@ -24,9 +23,8 @@ import mutalyzer from mutalyzer import (announce, describe, File, Retriever, Scheduler, stats, util, variantchecker) from mutalyzer.config import settings -from mutalyzer.db import session from mutalyzer.db.models import BATCH_JOB_TYPES -from mutalyzer.db.models import Assembly, BatchJob, BatchQueueItem +from mutalyzer.db.models import Assembly, BatchJob from mutalyzer.grammar import Grammar from mutalyzer.mapping import Converter from mutalyzer.output import Output @@ -137,13 +135,13 @@ def soap_api(): """ soap_server = HttpBase(soap.application) soap_server.doc.wsdl11.build_interface_document(settings.SOAP_WSDL_URL) - wsdl_handle = StringIO(soap_server.doc.wsdl11.get_interface_document()) + wsdl_string = soap_server.doc.wsdl11.get_interface_document() - xsl_handle = open(os.path.join( - pkg_resources.resource_filename('mutalyzer', 'website/templates'), - 'wsdl-viewer.xsl'), 'r') - wsdl_doc = etree.parse(wsdl_handle) - xsl_doc = etree.parse(xsl_handle) + xsl_file = os.path.join( + pkg_resources.resource_filename('mutalyzer', 'website/templates'), + 'wsdl-viewer.xsl') + wsdl_doc = etree.fromstring(wsdl_string) + xsl_doc = etree.parse(xsl_file) transform = etree.XSLT(xsl_doc) return make_response(unicode(transform(wsdl_doc))) diff --git a/requirements.txt b/requirements.txt index c79b98aa..63d953ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ alembic==0.6.3 Sphinx==1.2.1 sphinx-rtd-theme==0.1.5 cchardet==0.3.5 +Werkzeug==0.9.6 diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 6470eb46..791f867d 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -14,7 +14,7 @@ from Bio import Entrez from mock import patch from mutalyzer.config import settings -from mutalyzer.db.models import BatchJob, BatchQueueItem +from mutalyzer.db.models import BatchJob from mutalyzer import File from mutalyzer import output from mutalyzer import Scheduler @@ -49,7 +49,8 @@ class TestScheduler(MutalyzerTest): assert left == 0 filename = 'batch-job-%s.txt' % result_id - result = open(os.path.join(settings.CACHE_DIR, filename)) + result = io.open(os.path.join(settings.CACHE_DIR, filename), + encoding='utf-8') next(result) # Header. assert expected == [line.strip().split('\t') for line in result] @@ -320,3 +321,15 @@ class TestScheduler(MutalyzerTest): file_instance = File.File(output.Output('test')) job, columns = file_instance.parseBatchFile(batch_file) assert job is None + + def test_unicode_input(self): + """ + Simple input with some non-ASCII unicode characters. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + self._batch_job_plain_text(variants, expected, 'syntax-checker') diff --git a/tests/test_services_json.py b/tests/test_services_json.py index 8df9b748..81833505 100644 --- a/tests/test_services_json.py +++ b/tests/test_services_json.py @@ -9,6 +9,7 @@ import simplejson as json from spyne.server.null import NullServer import mutalyzer from mutalyzer import announce +from mutalyzer import Scheduler from mutalyzer.services.json import application from fixtures import database, hg19, hg19_transcript_mappings @@ -99,3 +100,45 @@ class TestServicesJson(MutalyzerTest): announce.unset_announcement() r = self._call('info') assert not r.get('announcement') + + def test_checksyntax_unicode(self): + """ + Run checkSyntax with an invalid variant description containing + non-ASCII unicode characters. + """ + r = self._call('checkSyntax', 'La Pe\xf1a') + assert r['valid'] == False + assert len(r['messages']) == 1 + assert r['messages'][0]['errorcode'] == 'EPARSE' + assert r['messages'][0]['message'] == 'Expected W:(0123...) (at char 2), (line:1, col:3)' + + @fix(database) + def test_batchjob_unicode(self): + """ + Submit a batch job with non-ASCII unicode characters in the input + file. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = '\n'.join(variants) + '\n' #.encode('base64') + + result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = self._call('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = self._call('monitorBatchJob', job_id) + assert int(result) == 0 + + result = self._call('getBatchJob', job_id) + result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] diff --git a/tests/test_services_soap.py b/tests/test_services_soap.py index 0882c9fb..0a85844d 100644 --- a/tests/test_services_soap.py +++ b/tests/test_services_soap.py @@ -669,3 +669,45 @@ facilisi.""" assert r.errors == 0 assert r.genomicDescription == ud + ':g.7872G>T' assert ud + '(SDHD_v001):c.274G>T' in r.transcriptDescriptions.string + + def test_checksyntax_unicode(self): + """ + Run checkSyntax with an invalid variant description containing + non-ASCII unicode characters. + """ + r = self._call('checkSyntax', 'La Pe\xf1a') + assert r.valid == False + assert len(r.messages.SoapMessage) == 1 + assert r.messages.SoapMessage[0]['errorcode'] == 'EPARSE' + assert r.messages.SoapMessage[0]['message'] == 'Expected W:(0123...) (at char 2), (line:1, col:3)' + + @fix(database) + def test_batchjob_unicode(self): + """ + Submit a batch job with non-ASCII unicode characters in the input + file. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = '\n'.join(variants) + '\n' #.encode('base64') + + result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = self._call('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = self._call('monitorBatchJob', job_id) + assert int(result) == 0 + + result = self._call('getBatchJob', job_id) + result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] diff --git a/tests/test_website.py b/tests/test_website.py index c649925e..fd0f02e7 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -9,21 +9,15 @@ from __future__ import unicode_literals #import logging; logging.basicConfig() import bz2 -import cgi -import logging from mock import patch import os -import re from io import BytesIO -import time -import urllib -import urllib2 from Bio import Entrez import lxml.html -import mutalyzer from mutalyzer import announce, Scheduler +from mutalyzer.db import models from mutalyzer.website import create_app from fixtures import cache, database, hg19, hg19_transcript_mappings @@ -739,3 +733,89 @@ class TestWebsite(MutalyzerTest): assert 'text/plain' in r.headers['Content-Type'] assert '\t'.join(['chrX', '154157690', '154157691', '4374A>T', '0', '-']) in r.data assert '\t'.join(['chrX', '154157683', '154157685', '4380_4381del', '0', '-']) in r.data + + def test_checksyntax_unicode(self): + """ + Run check syntax form with an invalid variant description containing + non-ASCII unicode characters. + """ + r = self.app.get('/syntax-checker', + query_string={'description': 'La Pe\xf1a'}) + body = r.get_data(as_text=True) + assert 'Fatal' in body + assert 'Details of the parse error' in body + assert 'Expected W:(0123...) (at char 2), (line:1, col:3)' in body + + @fix(database) + def test_batch_unicode(self): + """ + Submit a batch form with non-ASCII unicode characters in the input + file. + """ + file = '\n'.join(['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del']) + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = {'job_type': 'syntax-checker', + 'email': 'test@test.test', + 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} + + r = self.app.post('/batch-jobs', + data=data) + progress_url = '/' + r.location.split('/')[-1] + + assert models.BatchJob.query.first().email == 'test@test.test' + + scheduler = Scheduler.Scheduler() + scheduler.process() + + r = self.app.get(progress_url) + + dom = lxml.html.fromstring(r.data) + result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] + + r = self.app.get(result_url) + assert 'text/plain' in r.headers['Content-Type'] + + result = r.get_data(as_text=True).strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] + + @fix(database) + def test_batch_unicode_email(self): + """ + Submit a batch form with non-ASCII unicode characters in the email + address. + """ + file = '\n'.join(['AB026906.1:c.274G>T', + 'AL449423.14(CDKN2A_v002):c.5_400del']) + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + data = {'job_type': 'syntax-checker', + 'email': 'pe\xf1a@test.test', + 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} + + r = self.app.post('/batch-jobs', + data=data) + progress_url = '/' + r.location.split('/')[-1] + + assert models.BatchJob.query.first().email == 'pe\xf1a@test.test' + + scheduler = Scheduler.Scheduler() + scheduler.process() + + r = self.app.get(progress_url) + + dom = lxml.html.fromstring(r.data) + result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] + + r = self.app.get(result_url) + assert 'text/plain' in r.headers['Content-Type'] + + result = r.get_data(as_text=True).strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] -- GitLab