Skip to content
Snippets Groups Projects
Commit 57ec1d74 authored by Vermaat's avatar Vermaat
Browse files

Don't trust encoding auto-detection when decoding

parent 71555028
No related branches found
No related tags found
No related merge requests found
......@@ -170,7 +170,13 @@ class File() :
handle = _UniversalNewlinesByteStreamIter(handle, encoding=encoding,
buffer_size=BUFFER_SIZE)
buf = handle.read(BUFFER_SIZE)
try:
buf = handle.read(BUFFER_SIZE)
except UnicodeDecodeError:
self.__output.addMessage(__file__, 3, 'EBPARSE',
'Could not decode file (using %s encoding).'
% encoding)
return None
# Default dialect
dialect = 'excel'
......@@ -196,8 +202,14 @@ class File() :
reader = csv.reader(handle, dialect)
ret = []
for i in reader:
ret.append([c.decode('utf-8') for c in i])
try:
for i in reader:
ret.append([c.decode('utf-8') for c in i])
except UnicodeDecodeError:
self.__output.addMessage(__file__, 3, 'EBPARSE',
'Could not decode file (using %s encoding).'
% encoding)
return None
return ret
#__parseCsvFile
......
......@@ -114,7 +114,13 @@ class Retriever(object) :
encoding = 'utf-8'
if not util.is_utf8_alias(encoding):
raw_data = raw_data.decode(encoding).encode('utf-8')
try:
raw_data = raw_data.decode(encoding).encode('utf-8')
except UnicodeDecodeError:
self._output.addMessage(__file__, 4, 'ENOPARSE',
'Could not decode file (using %s encoding).'
% encoding)
return None
# Compress the data to save disk space.
comp = bz2.BZ2Compressor()
......@@ -368,7 +374,8 @@ class GenBankRetriever(Retriever):
"number to reduce downloading overhead." % unicode(record.id))
#if
self._write(raw_data, outfile)
if not self._write(raw_data, outfile):
return None
return outfile, GI
#write
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment