diff --git a/doc/conf.py b/doc/conf.py index 424e8512beb673914738dcfb55f16e984b047ce4..322ffec5c70d127f16c9e83fcb2be4ce5021125d 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -23,8 +23,7 @@ class Mock(MagicMock): def __getattr__(cls, name): return Mock() -MOCK_MODULES = ['MySQLdb', 'cchardet', 'lxml', 'lxml.builder', 'lxml.etree', - 'magic'] +MOCK_MODULES = ['MySQLdb', 'lxml', 'lxml.builder', 'lxml.etree', 'magic'] sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) # If extensions (or modules to document with autodoc) are in another directory, diff --git a/mutalyzer/File.py b/mutalyzer/File.py index 5851e7a50e31fa45e9aad72bbab4d89f9e987c3d..90641c3c6dc80a2bedd3281058fa9d57d8e598f0 100644 --- a/mutalyzer/File.py +++ b/mutalyzer/File.py @@ -25,7 +25,7 @@ import csv # Sniffer(), reader(), Error import xlrd # open_workbook() import zipfile # ZipFile() import xml.dom.minidom # parseString() -import cchardet as chardet +import chardet from mutalyzer.config import settings diff --git a/mutalyzer/Retriever.py b/mutalyzer/Retriever.py index cc6f91937ac4a91bf034d1a902fe57bde160a25f..286caf3e8d426631e08e5578add48a80aaafe4c4 100644 --- a/mutalyzer/Retriever.py +++ b/mutalyzer/Retriever.py @@ -27,7 +27,7 @@ from xml.dom import DOMException, minidom from xml.parsers import expat from httplib import HTTPException, IncompleteRead from sqlalchemy.orm.exc import NoResultFound -import cchardet as chardet +import chardet from mutalyzer import util from mutalyzer.config import settings diff --git a/requirements.txt b/requirements.txt index add9c97c442c2ac35daa22dd66187fafdc61b4a4..413abda09ac0e84cb6c7a4431fe29e4d792b65df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ Sphinx==1.2.3 Werkzeug==0.9.6 alembic==0.6.7 biopython==1.64 -cchardet==0.3.5 +chardet==2.3.0 cssselect==0.9.1 lxml==3.4.0 mock==1.0.1 diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index c118bfd751558406c173a5f4343b93d8da68bb6a..7dd3a8c2ffcb98c8cdf3da4b728768ecf8e1ab47 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -350,3 +350,21 @@ class TestScheduler(MutalyzerTest): ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] self._batch_job_plain_text(variants, expected, 'syntax-checker') + + def test_windows_1252_input(self): + """ + Simple input encoded as WINDOWS-1252. + """ + variants = ['AB026906.1:c.274G>T', + # Encoded as WINDOWS-1252, the following is not valid UTF8. + 'NM_000052.4:c.2407\u20132A>G', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + batch_file = io.BytesIO(('\n'.join(variants) + '\n').encode('WINDOWS-1252')) + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['NM_000052.4:c.2407\u20132A>G', + '(grammar): Expected W:(acgt...) (at char 18), (line:1, col:19)'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + self._batch_job(batch_file, expected, 'syntax-checker')