diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py index f6f228b19b2795a293d05f9a9f5b31e4920385a7..84b5cf857b424bb6e42b6c2f41d97877459df588 100644 --- a/mutalyzer/website/views.py +++ b/mutalyzer/website/views.py @@ -9,12 +9,11 @@ import bz2 import os import pkg_resources import re -from cStringIO import StringIO import urllib from flask import Blueprint -from flask import (abort, current_app, jsonify, make_response, redirect, - render_template, request, send_from_directory, url_for) +from flask import (abort, jsonify, make_response, redirect, render_template, + request, send_from_directory, url_for) import jinja2 from lxml import etree from spyne.server.http import HttpBase @@ -24,9 +23,8 @@ import mutalyzer from mutalyzer import (announce, describe, File, Retriever, Scheduler, stats, util, variantchecker) from mutalyzer.config import settings -from mutalyzer.db import session from mutalyzer.db.models import BATCH_JOB_TYPES -from mutalyzer.db.models import Assembly, BatchJob, BatchQueueItem +from mutalyzer.db.models import Assembly, BatchJob from mutalyzer.grammar import Grammar from mutalyzer.mapping import Converter from mutalyzer.output import Output @@ -137,13 +135,13 @@ def soap_api(): """ soap_server = HttpBase(soap.application) soap_server.doc.wsdl11.build_interface_document(settings.SOAP_WSDL_URL) - wsdl_handle = StringIO(soap_server.doc.wsdl11.get_interface_document()) + wsdl_string = soap_server.doc.wsdl11.get_interface_document() - xsl_handle = open(os.path.join( - pkg_resources.resource_filename('mutalyzer', 'website/templates'), - 'wsdl-viewer.xsl'), 'r') - wsdl_doc = etree.parse(wsdl_handle) - xsl_doc = etree.parse(xsl_handle) + xsl_file = os.path.join( + pkg_resources.resource_filename('mutalyzer', 'website/templates'), + 'wsdl-viewer.xsl') + wsdl_doc = etree.fromstring(wsdl_string) + xsl_doc = etree.parse(xsl_file) transform = etree.XSLT(xsl_doc) return make_response(unicode(transform(wsdl_doc))) diff --git a/requirements.txt b/requirements.txt index c79b98aa7b7a07520c6506be35262c22d7fa9ca9..63d953eace27346d46a9a1a03088b42509c7b87e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ alembic==0.6.3 Sphinx==1.2.1 sphinx-rtd-theme==0.1.5 cchardet==0.3.5 +Werkzeug==0.9.6 diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 6470eb46b541c29234d0aee4fca6820ce30dcf86..791f867ddad19b9a71ac333726e5f13ade37d782 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -14,7 +14,7 @@ from Bio import Entrez from mock import patch from mutalyzer.config import settings -from mutalyzer.db.models import BatchJob, BatchQueueItem +from mutalyzer.db.models import BatchJob from mutalyzer import File from mutalyzer import output from mutalyzer import Scheduler @@ -49,7 +49,8 @@ class TestScheduler(MutalyzerTest): assert left == 0 filename = 'batch-job-%s.txt' % result_id - result = open(os.path.join(settings.CACHE_DIR, filename)) + result = io.open(os.path.join(settings.CACHE_DIR, filename), + encoding='utf-8') next(result) # Header. assert expected == [line.strip().split('\t') for line in result] @@ -320,3 +321,15 @@ class TestScheduler(MutalyzerTest): file_instance = File.File(output.Output('test')) job, columns = file_instance.parseBatchFile(batch_file) assert job is None + + def test_unicode_input(self): + """ + Simple input with some non-ASCII unicode characters. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + self._batch_job_plain_text(variants, expected, 'syntax-checker') diff --git a/tests/test_services_json.py b/tests/test_services_json.py index 8df9b7485ae9c642a0615453ba7e79e882797cb4..81833505e36ecee7436bde0f956e579ecd82c00e 100644 --- a/tests/test_services_json.py +++ b/tests/test_services_json.py @@ -9,6 +9,7 @@ import simplejson as json from spyne.server.null import NullServer import mutalyzer from mutalyzer import announce +from mutalyzer import Scheduler from mutalyzer.services.json import application from fixtures import database, hg19, hg19_transcript_mappings @@ -99,3 +100,45 @@ class TestServicesJson(MutalyzerTest): announce.unset_announcement() r = self._call('info') assert not r.get('announcement') + + def test_checksyntax_unicode(self): + """ + Run checkSyntax with an invalid variant description containing + non-ASCII unicode characters. + """ + r = self._call('checkSyntax', 'La Pe\xf1a') + assert r['valid'] == False + assert len(r['messages']) == 1 + assert r['messages'][0]['errorcode'] == 'EPARSE' + assert r['messages'][0]['message'] == 'Expected W:(0123...) (at char 2), (line:1, col:3)' + + @fix(database) + def test_batchjob_unicode(self): + """ + Submit a batch job with non-ASCII unicode characters in the input + file. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = '\n'.join(variants) + '\n' #.encode('base64') + + result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = self._call('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = self._call('monitorBatchJob', job_id) + assert int(result) == 0 + + result = self._call('getBatchJob', job_id) + result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] diff --git a/tests/test_services_soap.py b/tests/test_services_soap.py index 0882c9fbce7e09365b0c235805ac03e519e75dd9..0a85844d07c0f5a95bcf9e00b1dbc183a591f6bf 100644 --- a/tests/test_services_soap.py +++ b/tests/test_services_soap.py @@ -669,3 +669,45 @@ facilisi.""" assert r.errors == 0 assert r.genomicDescription == ud + ':g.7872G>T' assert ud + '(SDHD_v001):c.274G>T' in r.transcriptDescriptions.string + + def test_checksyntax_unicode(self): + """ + Run checkSyntax with an invalid variant description containing + non-ASCII unicode characters. + """ + r = self._call('checkSyntax', 'La Pe\xf1a') + assert r.valid == False + assert len(r.messages.SoapMessage) == 1 + assert r.messages.SoapMessage[0]['errorcode'] == 'EPARSE' + assert r.messages.SoapMessage[0]['message'] == 'Expected W:(0123...) (at char 2), (line:1, col:3)' + + @fix(database) + def test_batchjob_unicode(self): + """ + Submit a batch job with non-ASCII unicode characters in the input + file. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = '\n'.join(variants) + '\n' #.encode('base64') + + result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = self._call('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = self._call('monitorBatchJob', job_id) + assert int(result) == 0 + + result = self._call('getBatchJob', job_id) + result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] diff --git a/tests/test_website.py b/tests/test_website.py index c649925e411dc091e172fddc82b5b64550b0f0b2..fd0f02e7725b2cd1dc53b6231a9ac01d70a4caca 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -9,21 +9,15 @@ from __future__ import unicode_literals #import logging; logging.basicConfig() import bz2 -import cgi -import logging from mock import patch import os -import re from io import BytesIO -import time -import urllib -import urllib2 from Bio import Entrez import lxml.html -import mutalyzer from mutalyzer import announce, Scheduler +from mutalyzer.db import models from mutalyzer.website import create_app from fixtures import cache, database, hg19, hg19_transcript_mappings @@ -739,3 +733,89 @@ class TestWebsite(MutalyzerTest): assert 'text/plain' in r.headers['Content-Type'] assert '\t'.join(['chrX', '154157690', '154157691', '4374A>T', '0', '-']) in r.data assert '\t'.join(['chrX', '154157683', '154157685', '4380_4381del', '0', '-']) in r.data + + def test_checksyntax_unicode(self): + """ + Run check syntax form with an invalid variant description containing + non-ASCII unicode characters. + """ + r = self.app.get('/syntax-checker', + query_string={'description': 'La Pe\xf1a'}) + body = r.get_data(as_text=True) + assert 'Fatal' in body + assert 'Details of the parse error' in body + assert 'Expected W:(0123...) (at char 2), (line:1, col:3)' in body + + @fix(database) + def test_batch_unicode(self): + """ + Submit a batch form with non-ASCII unicode characters in the input + file. + """ + file = '\n'.join(['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del']) + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = {'job_type': 'syntax-checker', + 'email': 'test@test.test', + 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} + + r = self.app.post('/batch-jobs', + data=data) + progress_url = '/' + r.location.split('/')[-1] + + assert models.BatchJob.query.first().email == 'test@test.test' + + scheduler = Scheduler.Scheduler() + scheduler.process() + + r = self.app.get(progress_url) + + dom = lxml.html.fromstring(r.data) + result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] + + r = self.app.get(result_url) + assert 'text/plain' in r.headers['Content-Type'] + + result = r.get_data(as_text=True).strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] + + @fix(database) + def test_batch_unicode_email(self): + """ + Submit a batch form with non-ASCII unicode characters in the email + address. + """ + file = '\n'.join(['AB026906.1:c.274G>T', + 'AL449423.14(CDKN2A_v002):c.5_400del']) + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + data = {'job_type': 'syntax-checker', + 'email': 'pe\xf1a@test.test', + 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} + + r = self.app.post('/batch-jobs', + data=data) + progress_url = '/' + r.location.split('/')[-1] + + assert models.BatchJob.query.first().email == 'pe\xf1a@test.test' + + scheduler = Scheduler.Scheduler() + scheduler.process() + + r = self.app.get(progress_url) + + dom = lxml.html.fromstring(r.data) + result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] + + r = self.app.get(result_url) + assert 'text/plain' in r.headers['Content-Type'] + + result = r.get_data(as_text=True).strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result]