diff --git a/mutalyzer/File.py b/mutalyzer/File.py index 90641c3c6dc80a2bedd3281058fa9d57d8e598f0..772d2002e5cd5fed3ba389ca36786299c319da7b 100644 --- a/mutalyzer/File.py +++ b/mutalyzer/File.py @@ -143,7 +143,7 @@ class File() : handle.seek(0) if result['confidence'] > 0.5: - encoding = result['encoding'] + encoding = unicode(result['encoding']) else: encoding = 'utf-8' @@ -170,7 +170,13 @@ class File() : handle = _UniversalNewlinesByteStreamIter(handle, encoding=encoding, buffer_size=BUFFER_SIZE) - buf = handle.read(BUFFER_SIZE) + try: + buf = handle.read(BUFFER_SIZE) + except UnicodeDecodeError: + self.__output.addMessage(__file__, 3, 'EBPARSE', + 'Could not decode file (using %s encoding).' + % encoding) + return None # Default dialect dialect = 'excel' @@ -196,8 +202,14 @@ class File() : reader = csv.reader(handle, dialect) ret = [] - for i in reader: - ret.append([c.decode('utf-8') for c in i]) + try: + for i in reader: + ret.append([c.decode('utf-8') for c in i]) + except UnicodeDecodeError: + self.__output.addMessage(__file__, 3, 'EBPARSE', + 'Could not decode file (using %s encoding).' + % encoding) + return None return ret #__parseCsvFile diff --git a/mutalyzer/Retriever.py b/mutalyzer/Retriever.py index 286caf3e8d426631e08e5578add48a80aaafe4c4..e514ab96bc3a0cd1519513acaedde13afcb9c584 100644 --- a/mutalyzer/Retriever.py +++ b/mutalyzer/Retriever.py @@ -109,12 +109,18 @@ class Retriever(object) : """ result = chardet.detect(raw_data) if result['confidence'] > 0.5: - encoding = result['encoding'] + encoding = unicode(result['encoding']) else: encoding = 'utf-8' if not util.is_utf8_alias(encoding): - raw_data = raw_data.decode(encoding).encode('utf-8') + try: + raw_data = raw_data.decode(encoding).encode('utf-8') + except UnicodeDecodeError: + self._output.addMessage(__file__, 4, 'ENOPARSE', + 'Could not decode file (using %s encoding).' + % encoding) + return None # Compress the data to save disk space. comp = bz2.BZ2Compressor() @@ -368,7 +374,8 @@ class GenBankRetriever(Retriever): "number to reduce downloading overhead." % unicode(record.id)) #if - self._write(raw_data, outfile) + if not self._write(raw_data, outfile): + return None return outfile, GI #write