diff --git a/doc/index.rst b/doc/index.rst
index 58dd32c99c17fd2314c205b9b9d8e006fff9dd0d..e0da8e0dfb44a021e28a055ccf5ba6b3eb6d8f4f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -46,6 +46,7 @@ Additional notes
    development
    issues
    new-organism
+   strings
    changelog
    copyright
 
diff --git a/doc/strings.rst b/doc/strings.rst
new file mode 100644
index 0000000000000000000000000000000000000000..51e26ed9f4f04b907292729d1aea6a58b3df5590
--- /dev/null
+++ b/doc/strings.rst
@@ -0,0 +1,164 @@
+String representations
+======================
+
+We live in a global economy with many different languages and alphabets. Using
+byte strings for text and just assuming everything is ASCII encoded is
+suboptimal and *will* lead to bugs. These bugs may even be security issues.
+
+That's why Mutalyzer uses unicode strings wherever possible and tries to be
+aware of encodings when dealing with input and output. Here we describe how we
+do it.
+
+
+String representations in Python
+--------------------------------
+
+Since Mutalyzer only runs on Python 2.7, we can ignore all older Python versions
+and Python 3. So, the two main string types in Python are:
+
+1. `str`, byte strings
+2. `unicode`, unicode strings
+
+Byte strings are the default string type in Python 2.7 and are for example the
+type you get when writing a string literal::
+
+    >>> type('mutalyzer')
+    <type 'str'>
+
+Unicode string literals can be written using the ``u`` prefix::
+
+    >>> type(u'mutalyzer')
+    <type 'unicode'>
+
+Many modules from the Python standard library and also third party libraries
+consume and produce byte strings by default and may or may not work correctly
+with unicode strings.
+
+
+Unicode strategy
+----------------
+
+Internally, all strings should be represented by unicode strings as much as
+possible. The main exceptions are large reference sequence strings. These can
+often better be BioPython sequence objects, since that is how we usually get
+them in the first place.
+
+Our main strategy is as follows:
+
+1. We use ``from __future__ import unicode_literals`` at the top of every
+   file.
+2. All incoming strings are decoded to unicode (if necessary) as soon as
+   possible.
+3. Outgoing strings are encoded to UTF8 (if necessary) as late as possible.
+4. BioPython sequence objects can be based on byte strings as well as unicode
+   strings.
+5. In the database, everything is UTF8.
+6. We must be aware of the encoding of files supplied by the user or
+   downloaded from external sources.
+
+Point 1 ensures that `all string literals in our source code will be unicode
+strings <http://python-future.org/unicode_literals.html>`_::
+
+    >>> from __future__ import unicode_literals
+    >>> type('mutalyzer')
+    <type 'unicode'>
+
+As for point 4, sometimes this may even change under our eyes (e.g., calling
+``.reverse_complement()`` will change it to a byte string). We don't care as
+long as they're BioPython objects, only when we get the sequence out we must
+have it as unicode string. Their contents are always in the ASCII range
+anyway.
+
+Although `Bio.Seq.reverse_complement` works fine on Python byte strings (and
+we used to rely on that), it crashes on a Python unicode string. So we take
+care to only use it on BioPython sequence objects and wrote our own reverse
+complement function for unicode strings
+(`mutalyzer.util.reverse_complement`).
+
+
+Files
+-----
+
+The Python builtin `open
+<https://docs.python.org/2/library/functions.html#open>`_ cannot decode file
+contents and just yields byte strings. Therefore, we typically use `io.open
+<https://docs.python.org/2/library/io.html#io.open>`_ instead, which accepts
+an `encoding` argument.
+
+Downloaded reference files are stored UTF8 encoded (and then bzipped). We can
+assume UTF8 encoding when reading them back from disk.
+
+We try to detect the encoding of user uploaded text files (batch jobs, GenBank
+files) and assume UTF8 if detection fails.
+
+
+Libraries
+---------
+
+SQLAlchemy, our database toolkit, transparently sends both byte strings and
+unicode strings UTF8 encoded to the database and presents all strings as
+unicode strings to us.
+
+The webframework Mutalyzer uses, Flask, is also fully `unicode based
+<http://flask.pocoo.org/docs/0.10/unicode/>`_.
+
+The Mutalyzer webservices are based on Spyne. The Spyne documentation `has the
+following to say <http://spyne.io/docs/2.10/manual/03_types.html#strings>`_
+about its `String` and `Unicode` types:
+
+    There are two string types in Spyne: `spyne.model.primitive.Unicode` and
+    `spyne.model.primitive.String` whose native types are `unicode` and `str`
+    respectively.
+
+    Unlike the Python `str`, the Spyne `String` is not for arbitrary byte
+    streams. You should not use it unless you are absolutely, positively sure
+    that you need to deal with text data with an unknown encoding. In all
+    other cases, you should just use the `Unicode` type. They actually look
+    the same from outside, this distinction is made just to properly deal with
+    the quirks surrounding Python-2's `unicode` type.
+
+    Remember that you have the `ByteArray` and `File` types at your disposal
+    when you need to deal with arbitrary byte streams.
+
+    The `String` type will be just an alias for `Unicode` once Spyne gets
+    ported to Python 3. It might even be deprecated and removed in the future,
+    so make sure you are using either `Unicode` or `ByteArray` in your
+    interface definitions.
+
+So let's not ignore that and never use `String` in our webservice interface.
+
+The pyparsing library is used for parsing HGVS variant descriptions. Overall
+it can deal with unicode input and also yields unicode output in that
+case. However, there are some exceptions where we explicitely have to decode
+to a unicode string (for example, omitted optional parts yield the empty byte
+string).
+
+
+Python 3
+--------
+
+The situation in Python 3 is very different from Python 2.7. The two main
+string types in Python 3 are:
+
+1. `str`, unicode strings
+2. `byte`, byte strings
+
+Unicode strings are the default string type in Python 3 and are for example
+the type you get when writing a string literal::
+
+    >>> type('mutalyzer')
+    <class 'str'>
+
+Byte string literals can be written using the ``b`` prefix::
+
+    >>> type(b'mutalyzer')
+    <class 'bytes'>
+
+Many modules from the Python standard library and also third party libraries
+consume and produce unicode strings by default and may or may not work
+correctly with byte strings.
+
+What does this mean for Mutalyzer? Actually, our current approach takes us
+quite a bit closer to how things are generally done in Python 3. However,
+Mutalyzer is very much not Python 3 compatible, even the unicode handling
+parts are only valid in Python 2.7 on some points.
diff --git a/extras/log-tools/find-crashes.py b/extras/log-tools/find-crashes.py
index 0e6d791ef19995d3708a982573b40861822ca71f..cf6ba98600a0a4d4afcfb87f2e4ae136e9254096 100755
--- a/extras/log-tools/find-crashes.py
+++ b/extras/log-tools/find-crashes.py
@@ -9,6 +9,8 @@ crashed.
 """
 
 
+from __future__ import unicode_literals
+
 import os
 from mutalyzer import config
 
diff --git a/extras/monitor/mutalyzer-monitor.py b/extras/monitor/mutalyzer-monitor.py
index b5ea49fdbfac865afec348dd163759d70905bd98..43e49abc2f1e502e9a7805efabc3090d06134853 100755
--- a/extras/monitor/mutalyzer-monitor.py
+++ b/extras/monitor/mutalyzer-monitor.py
@@ -15,6 +15,8 @@ Currently implemented checks:
 """
 
 
+from __future__ import unicode_literals
+
 import argparse
 import logging
 import sys
diff --git a/extras/soap-tools/batchjob.py b/extras/soap-tools/batchjob.py
index 7558b98d8e284d0c0de8e7267c406145153bd8b1..de11bc2ac7d64c64efb00158e158fb7e5a9e19a3 100755
--- a/extras/soap-tools/batchjob.py
+++ b/extras/soap-tools/batchjob.py
@@ -17,6 +17,8 @@ to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/checkSyntax.py b/extras/soap-tools/checkSyntax.py
index 78c63e5c902e25d0944b744dfc04691ef6053f40..a2bf32d780966a40f25fbc8846fbb41b61195bdf 100755
--- a/extras/soap-tools/checkSyntax.py
+++ b/extras/soap-tools/checkSyntax.py
@@ -12,6 +12,8 @@ and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/chromAccession.py b/extras/soap-tools/chromAccession.py
index 4fb6e04f1b3baa844bcf50c31f6dd3f826c7ce73..457277d8e278093df25831c7e4de88f2b7d7cde6 100755
--- a/extras/soap-tools/chromAccession.py
+++ b/extras/soap-tools/chromAccession.py
@@ -13,6 +13,8 @@ printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/descriptionExtract.py b/extras/soap-tools/descriptionExtract.py
index 7ca3b2eceefa27fa53b1f41794a8d92dd36b1bba..3889ca414ee9f1054d6350e6f1b87f4b4e909fe4 100755
--- a/extras/soap-tools/descriptionExtract.py
+++ b/extras/soap-tools/descriptionExtract.py
@@ -14,6 +14,8 @@ service and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/getCache.py b/extras/soap-tools/getCache.py
index 2f9c7df218b3c831964671a622f6f44d14f4d039..07a86818946b31a0de8e2555d3b62a85af05c8a2 100755
--- a/extras/soap-tools/getCache.py
+++ b/extras/soap-tools/getCache.py
@@ -12,6 +12,8 @@ printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/getGeneAndTranscript.py b/extras/soap-tools/getGeneAndTranscript.py
index 8946d59e71c8fb280b4e4e240acdd019f3fe24bd..e4ba939b0a335a34e43b85ff1135c3ada19d8aca 100755
--- a/extras/soap-tools/getGeneAndTranscript.py
+++ b/extras/soap-tools/getGeneAndTranscript.py
@@ -13,6 +13,8 @@ web service and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/getGeneName.py b/extras/soap-tools/getGeneName.py
index e3b7dd01445c37602131ffa73e51f680255ee376..ad4ce8c4afe8ad25780a778f76a0d28eaa4f0990 100755
--- a/extras/soap-tools/getGeneName.py
+++ b/extras/soap-tools/getGeneName.py
@@ -13,6 +13,8 @@ printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/getTranscripts.py b/extras/soap-tools/getTranscripts.py
index 51052fca68208719de8002af8b44418120180eb3..82af32191ee18635a07ba55472be91f64a8d830d 100755
--- a/extras/soap-tools/getTranscripts.py
+++ b/extras/soap-tools/getTranscripts.py
@@ -13,6 +13,8 @@ web service and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/getTranscriptsAndInfo.py b/extras/soap-tools/getTranscriptsAndInfo.py
index 86dc3ff446887e970cd6c521b998629848904943..12b94d86003fb96f3af035a9446a9788615c1bd7 100755
--- a/extras/soap-tools/getTranscriptsAndInfo.py
+++ b/extras/soap-tools/getTranscriptsAndInfo.py
@@ -14,6 +14,8 @@ and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/getTranscriptsByGeneName.py b/extras/soap-tools/getTranscriptsByGeneName.py
index d7789a0acbe91b85aef602f9771f25dfd13068a6..f31ff6ba6e667794fdfe3cbaf95f76dcf222038f 100755
--- a/extras/soap-tools/getTranscriptsByGeneName.py
+++ b/extras/soap-tools/getTranscriptsByGeneName.py
@@ -12,6 +12,8 @@ web service and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/getTranscriptsMapping.py b/extras/soap-tools/getTranscriptsMapping.py
index 79683369ed86b478aabc89c20c9195634a65a3f2..891dfa75a11100689d7b3f6d3948e8d0abd5ecf8 100755
--- a/extras/soap-tools/getTranscriptsMapping.py
+++ b/extras/soap-tools/getTranscriptsMapping.py
@@ -16,6 +16,8 @@ and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/getdbSNPDescriptions.py b/extras/soap-tools/getdbSNPDescriptions.py
index f5745533067a6e675077d5b9756bd9b7fcd75160..5be99c735012d7cc176e24396af40ccd350c7b43 100755
--- a/extras/soap-tools/getdbSNPDescriptions.py
+++ b/extras/soap-tools/getdbSNPDescriptions.py
@@ -12,6 +12,8 @@ printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/info.py b/extras/soap-tools/info.py
index eb3cd058044621745a59d464bcfd70ca57602a19..1a4ea6e43335330798767d9aee73a880833848b6 100755
--- a/extras/soap-tools/info.py
+++ b/extras/soap-tools/info.py
@@ -10,6 +10,8 @@ printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/mappingInfo.py b/extras/soap-tools/mappingInfo.py
index 49fb4ac404df042d044ce9b6525e2084a8a992f0..7a473b1c9a6cfd86401e75bad22b55ca5f123f2e 100755
--- a/extras/soap-tools/mappingInfo.py
+++ b/extras/soap-tools/mappingInfo.py
@@ -14,6 +14,8 @@ printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/numberConversion.py b/extras/soap-tools/numberConversion.py
index 977bbc719ce83dd34b2047add81ff55cdd978fa8..bd5262f4bb19d75d4d852593ac0ebfd116d627e0 100755
--- a/extras/soap-tools/numberConversion.py
+++ b/extras/soap-tools/numberConversion.py
@@ -13,6 +13,8 @@ web service and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/runMutalyzer.py b/extras/soap-tools/runMutalyzer.py
index 0a2d1e7593db0eed2963cdd80606a015f1ec7a11..475cc6c18c8f6aab61bdb5a952c5d448ec99af2b 100755
--- a/extras/soap-tools/runMutalyzer.py
+++ b/extras/soap-tools/runMutalyzer.py
@@ -13,6 +13,8 @@ printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/sliceChromosomeByGene.py b/extras/soap-tools/sliceChromosomeByGene.py
index 8e24c54d9b9a9a16cdebcb75fb836e7e6a9b66bc..c4e0e4183d002d53b7e620ddef8cad700691d7ca 100755
--- a/extras/soap-tools/sliceChromosomeByGene.py
+++ b/extras/soap-tools/sliceChromosomeByGene.py
@@ -13,6 +13,8 @@ printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/extras/soap-tools/sp.py b/extras/soap-tools/sp.py
index d395d1993195a7664d6daa3ac05c7e7f2c3476f6..a2fd0be498607c268b9ab31d5c8c60efbee6ef5c 100755
--- a/extras/soap-tools/sp.py
+++ b/extras/soap-tools/sp.py
@@ -11,6 +11,8 @@
 # This code is in the public domain; it can be used for whatever purpose
 # with absolutely no restrictions.
 
+from __future__ import unicode_literals
+
 import sys
 from SOAPpy import WSDL
 
diff --git a/extras/soap-tools/transcriptInfo.py b/extras/soap-tools/transcriptInfo.py
index d25d361a94461572ebd600ac165b3513d8dea92e..bd9c14e8c5dcb0c3b3bca03e513b60f725d89566 100755
--- a/extras/soap-tools/transcriptInfo.py
+++ b/extras/soap-tools/transcriptInfo.py
@@ -12,6 +12,8 @@ and printed to standard output.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 import sys
diff --git a/migrations/script.py.mako b/migrations/script.py.mako
index 95702017ea341e6455933b35f8ef5bf45f2df728..56af6fd8e141a90a81a3cf64d4f1af10eb291cf7 100644
--- a/migrations/script.py.mako
+++ b/migrations/script.py.mako
@@ -6,6 +6,8 @@ Create Date: ${create_date}
 
 """
 
+from __future__ import unicode_literals
+
 # revision identifiers, used by Alembic.
 revision = ${repr(up_revision)}
 down_revision = ${repr(down_revision)}
diff --git a/migrations/versions/402ff01b0d5d_fix_grcm38_chromosome_accession_number_.py b/migrations/versions/402ff01b0d5d_fix_grcm38_chromosome_accession_number_.py
index ca664e5629e625ce136b92963c91a637fd790ed5..10ed1f8be249bd96d42fd7c398cbbc3c034d87fd 100644
--- a/migrations/versions/402ff01b0d5d_fix_grcm38_chromosome_accession_number_.py
+++ b/migrations/versions/402ff01b0d5d_fix_grcm38_chromosome_accession_number_.py
@@ -6,6 +6,8 @@ Create Date: 2014-10-08 15:10:21.522551
 
 """
 
+from __future__ import unicode_literals
+
 # revision identifiers, used by Alembic.
 revision = '402ff01b0d5d'
 down_revision = 'ea660b66f26'
diff --git a/migrations/versions/ea660b66f26_initial_schema.py b/migrations/versions/ea660b66f26_initial_schema.py
index d0d474ed4a532d1661b126aa3a83abc1170bcdd8..eec6ce6af5ee8767be03e99bda445305002394b1 100644
--- a/migrations/versions/ea660b66f26_initial_schema.py
+++ b/migrations/versions/ea660b66f26_initial_schema.py
@@ -6,6 +6,8 @@ Create Date: 2014-02-04 18:38:28.416032
 
 """
 
+from __future__ import unicode_literals
+
 # revision identifiers, used by Alembic.
 revision = 'ea660b66f26'
 down_revision = None
diff --git a/mutalyzer/Crossmap.py b/mutalyzer/Crossmap.py
index 0fb166dc9e2e0c42aef3473ba14015bf9624a726..0de7ce3aba863de574bc9d79e2278b084f417b8c 100644
--- a/mutalyzer/Crossmap.py
+++ b/mutalyzer/Crossmap.py
@@ -10,6 +10,8 @@ and stop and the orientation of a transcript.
 #Public classes:
 #    - Crossmap ; Convert from g. to c. or n. notation or vice versa.
 
+from __future__ import unicode_literals
+
 class Crossmap() :
     """
     Convert from I{g.} to I{c.} or I{n.} notation or vice versa.
@@ -406,13 +408,13 @@ class Crossmap() :
         @type a: integer
 
         @return: The converted notation (may be unaltered)
-        @rtype: string
+        @rtype: unicode
         """
 
         if a > self.__STOP :
-            return '*' + str(a - self.__STOP)
+            return '*' + unicode(a - self.__STOP)
 
-        return str(a)
+        return unicode(a)
     #int2main
 
     def main2int(self, s) :
@@ -423,7 +425,7 @@ class Crossmap() :
             - __STOP ; CDS stop in I{c.} notation.
 
         @arg s: A string in '*' notation
-        @type s: string
+        @type s: unicode
 
         @return: The converted notation (may be unaltered)
         @rtype: integer
@@ -447,20 +449,20 @@ class Crossmap() :
         @type fuzzy: bool
 
         @return: The offset in HGVS notation
-        @rtype: string
+        @rtype: unicode
         """
 
         if t[1] > 0 :                      # The exon boundary is downstream.
             if fuzzy: return '+?'
             if t[0] >= self.__trans_end :  # It is downstream of the last exon.
-                return "+d" + str(t[1])
-            return '+' + str(t[1])
+                return "+d" + unicode(t[1])
+            return '+' + unicode(t[1])
         #if
         if t[1] < 0 :                       # The exon boundary is uptream.
             if fuzzy: return '-?'
             if t[0] <= self.__trans_start : # It is upstream of the first exon.
-                return "-u" + str(-t[1])
-            return str(t[1])
+                return "-u" + unicode(-t[1])
+            return unicode(t[1])
         #if
         return ''                           # No offset was given.
     #int2offset
@@ -472,7 +474,7 @@ class Crossmap() :
         sensible.
 
         @arg s: An offset in HGVS notation
-        @type s: string
+        @type s: unicode
 
         @return: The offset as an integer
         @rtype: integer
@@ -505,12 +507,12 @@ class Crossmap() :
         @type fuzzy: bool
 
         @return: The position in HGVS notation
-        @rtype: string
+        @rtype: unicode
         """
 
         if t[0] >= self.__trans_end or t[0] <= self.__trans_start:
-            return str(self.int2main(self.__minus(t[0], -t[1])))
-        return str(self.int2main(t[0])) + str(self.int2offset(t, fuzzy))
+            return unicode(self.int2main(self.__minus(t[0], -t[1])))
+        return unicode(self.int2main(t[0])) + unicode(self.int2offset(t, fuzzy))
     #tuple2string
 
     def g2c(self, a, fuzzy=False) :
@@ -525,7 +527,7 @@ class Crossmap() :
         @type fuzzy: bool
 
         @return: The position in HGVS notation
-        @rtype: string
+        @rtype: unicode
         """
         return self.tuple2string(self.g2x(a), fuzzy)
     #g2c
diff --git a/mutalyzer/File.py b/mutalyzer/File.py
index b95f03012205f4ec02832d610833f39797d43a15..1212d310de7b085e65a0fa25f5a5458e6e74137a 100644
--- a/mutalyzer/File.py
+++ b/mutalyzer/File.py
@@ -16,24 +16,82 @@ Module for parsing CSV files and spreadsheets.
 #     - File ; Parse CSV files and spreadsheets.
 
 
+from __future__ import unicode_literals
+
+import codecs
+import re
 import magic           # open(), MAGIC_MIME, MAGIC_NONE
 import csv             # Sniffer(), reader(), Error
 import xlrd            # open_workbook()
 import zipfile         # ZipFile()
 import xml.dom.minidom # parseString()
-import os              # remove()
-import tempfile
-import types           # UnicodeType
-from cStringIO import StringIO
+import cchardet as chardet
 
-from mutalyzer import util
 from mutalyzer.config import settings
 
 
-# Amount of bytes to be read for determining the file type.
+# Amount of bytes to be read from a file at a time (this is also the amount
+# read for determining the file type).
 BUFFER_SIZE = 32768
 
 
+class _UniversalNewlinesByteStreamIter(object):
+    """
+    The codecs module doesn't provide universal newline support. This class is
+    used as a stream wrapper that provides this functionality.
+
+    The wrapped stream must yield byte strings. We decode it using the given
+    encoding, normalise newlines, and yield UTF-8 encoded data (read method)
+    or lines (as iterator).
+
+    Adaptation from an old Cython version:
+    https://github.com/cython/cython/blob/076fac3/Cython/Utils.py
+    """
+    normalise_newlines = re.compile('\r\n?|\n').sub
+
+    def __init__(self, stream, encoding='utf-8', buffer_size=0x1000):
+        # let's assume .read() doesn't change
+        self.stream = codecs.getreader(encoding)(stream)
+        self._read = self.stream.read
+        self.buffer_size = buffer_size
+
+    def _read_normalised(self, count=None):
+        count = count or self.buffer_size
+        data = self._read(count)
+        if '\r' not in data:
+            return data
+        if data.endswith('\r'):
+            # may be missing a '\n'
+            data += self._read(1)
+        return self.normalise_newlines('\n', data)
+
+    def _readlines(self):
+        buffer = []
+        data = self._read_normalised()
+        while data:
+            buffer.append(data)
+            lines = ''.join(buffer).splitlines(True)
+            for line in lines[:-1]:
+                yield line
+            buffer = [lines[-1]]
+            data = self._read_normalised()
+
+        if buffer[0]:
+            yield buffer[0]
+
+    def seek(self, pos):
+        if pos == 0:
+            self.stream.seek(0)
+        else:
+            raise NotImplementedError
+
+    def read(self, count=-1):
+        return self._read_normalised(count).encode('utf-8')
+
+    def __iter__(self):
+        return (line.encode('utf-8') for line in self._readlines())
+
+
 class File() :
     """
     Parse CSV files and spreadsheets.
@@ -45,7 +103,6 @@ class File() :
         - __init__(config, output) ; Initialise the class.
 
     Private methods:
-        - __tempFileWrapper(func, handle) ; Call func() with a filename.
         - __parseCsvFile(handle)    ; Parse a CSV file.
         - __parseXlsFile(handle)    ; Parse an Excel file.
         - __parseOdsFile(handle)    ; Parse an OpenDocument Spreadsheet file.
@@ -71,56 +128,48 @@ class File() :
         self.__output = output #: The Output object
     #__init__
 
-    def __tempFileWrapper(self, func, handle) :
+    def __parseCsvFile(self, handle) :
         """
-        Make a temporary file, put the content of a stream in it and pass
-        the filename to a general function. Return whatever this function
-        returns.
+        Parse a CSV file. Does not reset the file handle to start.
 
-        @arg func: general function that needs a file name as argument
-        @type func: function
-        @arg handle: A stream
-        @type handle: stream
-
-        @return: unknown; the output of func().
-        @rtype: ?
-        """
-        write_handle, filename = tempfile.mkstemp(text=True)
-
-        # Dump the content of the stream pointed to by handle into the file.
-        handle.seek(0)
-        os.write(write_handle, handle.read())
-        os.close(write_handle)
-
-        # Open the file with func().
-        ret = func(filename)
-        # Apperantly apache will remove this file even when opened by the
-        # function *func
-        os.remove(filename)
-
-        return ret
-    #__tempFileWrapper
-
-    def __parseCsvFile(self, handle_) :
-        """
-        Parse a CSV file.
-        The stream is not rewinded after use.
-
-        @arg handle: A handle to a stream
-        @type handle: stream
+        @arg handle: CSV file. Must be a seekable binary file object.
+        @type handle: file object
 
         @return: list of lists
         @rtype: list
         """
-        # We wrap the file in a temporary file just to have universal newlines
-        # which is not always possible to have on incoming files (thinks web
-        # and rpc frontends). This transparently solves the problem of Unix
-        # versus Windows versus Mac style newlines.
-        handle = tempfile.TemporaryFile('rU+w')
-        for chunk in handle_:
-            handle.write(chunk)
-
+        buf = handle.read(BUFFER_SIZE)
+        result = chardet.detect(buf)
         handle.seek(0)
+
+        if result['confidence'] > 0.5:
+            encoding = result['encoding']
+        else:
+            encoding = 'utf-8'
+
+        # Python 2.7 makes it extraordinarily hard to do this correctly. We
+        # have a binary file object containing lines of text in a certain
+        # encoding with unknown style of line-endings.
+        #
+        # We want to correctly decode the file contents, accept any style of
+        # line-endings, parse the lines with the `csv` module, and return
+        # unicode strings.
+        #
+        # 1. `codecs.getreader` does not have a universal newlines mode.
+        # 2. `io.TextIOWrapper` cannot be wrapped around our file object,
+        #    since it is required to be an `io.BufferedIOBase`, which it
+        #    usually will not be.
+        # 3. The `csv` module cannot read unicode.
+        #
+        # Ugh.
+        #
+        # So, we use a stream wrapper that consumes byte strings, decodes to
+        # unicode, normalises newlines, and produces the result UTF-8 encoded.
+        # That's what we feed the `csv` module. We decode what it gives back
+        # to unicode strings. What a mess.
+        handle = _UniversalNewlinesByteStreamIter(handle, encoding=encoding,
+                                                  buffer_size=BUFFER_SIZE)
+
         buf = handle.read(BUFFER_SIZE)
 
         # Default dialect
@@ -148,41 +197,38 @@ class File() :
 
         ret = []
         for i in reader:
-            ret.append(i)
+            ret.append([c.decode('utf-8') for c in i])
 
-        handle.close()
         return ret
     #__parseCsvFile
 
     def __parseXlsFile(self, handle) :
         """
-        Parse an Excel file.
-        The stream is not rewinded after use.
+        Parse an Excel file. Does not reset the file handle to start.
 
-        @arg handle: A handle to a stream
-        @type handle: stream
+        @arg handle: Excel file. Must be a binary file object.
+        @type handle: file object
 
         @return: A list of lists
         @rtype: list
         """
 
-        workBook = self.__tempFileWrapper(xlrd.open_workbook, handle)
+        try:
+            workBook = xlrd.open_workbook(file_contents=handle.read())
+        except xlrd.XLRDError:
+            return None
+
         sheet = workBook.sheet_by_index(0)
 
         ret = []
         for i in range(sheet.nrows) :
             row = []
             for j in sheet.row_values(i) :
-                if type(j) == types.UnicodeType : # Convert the data to strings.
-                    row.append(j.encode("utf8"))
-                else :
-                    row.append(str(j))
+                row.append(j)
             #for
             ret.append(row)
         #for
 
-        del sheet, workBook
-
         return ret
     #__parseXlsFile
 
@@ -197,8 +243,8 @@ class File() :
         @return: A list of lists
         @rtype: list
         """
+        # Todo: Use a library for this.
 
-        #zipFile = self.__tempFileWrapper(zipfile.ZipFile, handle)
         zipFile = zipfile.ZipFile(handle)
         doc = xml.dom.minidom.parseString(zipFile.read("content.xml"))
         zipFile.close()
@@ -209,10 +255,11 @@ class File() :
             for j in i.getElementsByTagName("table:table-cell") :
                 c = j.getElementsByTagName("text:p")
                 if c :
-                    row.append(c[0].lastChild.data.encode("utf8"))
+                    row.append(c[0].lastChild.data)
                 #if
             #for
-            ret.append(row)
+            if row:
+                ret.append(row)
         #for
 
         return ret
@@ -343,23 +390,23 @@ class File() :
         Get the mime type of a stream by inspecting a fixed number of bytes.
         The stream is rewinded after use.
 
-        @arg handle: A handle to a stream
-        @type handle: stream
+        @arg handle: Stream to be inspected. Must be a seekable binary file
+          object.
+        @type handle: file object
 
-        @return: The mime type of a file
-        @rtype: string
+        @return: The mime type of a file and a textual description.
+        @rtype: unicode, unicode
         """
         handle.seek(0)
         buf = handle.read(BUFFER_SIZE)
 
         MagicInstance = magic.open(magic.MAGIC_MIME)
         MagicInstance.load()
-        mimeType = MagicInstance.buffer(buf).split(';')[0]
+        mimeType = MagicInstance.buffer(buf).decode('utf-8').split(';')[0]
         MagicInstance.close()
         MagicInstance = magic.open(magic.MAGIC_NONE)
         MagicInstance.load()
-        description = MagicInstance.buffer(buf)
-        del MagicInstance
+        description = MagicInstance.buffer(buf).decode('utf-8')
         handle.seek(0)
 
         return mimeType, description
@@ -368,22 +415,28 @@ class File() :
     def parseFileRaw(self, handle) :
         """
         Check which format a stream has and parse it with the appropriate
-        parser if the stream is recognised.
+        parser if the stream is recognised. Does not reset the file handle to
+        start.
 
-        @arg handle: A handle to a stream
-        @type handle: stream
+        @arg handle: Input file to be parsed. Must be a seekable binary file
+          object.
+        @type handle: file object
 
         @return: A list of lists, None if an error occured
         @rtype: list
         """
 
         mimeType = self.getMimeType(handle)
-        if mimeType[0] == "text/plain" :
+        if mimeType[0] == "text/plain":
             return self.__parseCsvFile(handle)
-        if mimeType[0] == "application/vnd.ms-office" :
+        if mimeType[0] in ('application/vnd.ms-excel',
+                           'application/vnd.ms-office',
+                           'application/msword',
+                           'application/zip'):
             return self.__parseXlsFile(handle)
-        if mimeType == ("application/octet-stream",
-                        "OpenDocument Spreadsheet") :
+        if (mimeType[0] == 'application/vnd.oasis.opendocument.spreadsheet' or
+            mimeType[1] in ('OpenDocument Spreadsheet',
+                            'OpenOffice.org 1.x Calc spreadsheet')):
             return self.__parseOdsFile(handle)
 
         return None
@@ -392,10 +445,12 @@ class File() :
     def parseBatchFile(self, handle) :
         """
         Check which format a stream has and parse it with the appropriate
-        parser if the stream is recognised.
+        parser if the stream is recognised. Does not reset the file handle to
+        start.
 
-        @arg handle: A handle to a stream
-        @type handle: stream
+        @arg handle: Batch job input file. Must be a seekable binary file
+          object.
+        @type handle: file object
 
         @return: A sanitised list of lists (without a header or empty lines)
                  (or None if an error occured) and the number of columns.
@@ -419,9 +474,9 @@ def makeList(l, maxlen=10):
     @arg maxlen: maximum length of the string you want to return
     @type maxlen: integer
     @return: a list converted to a string with comma's and spaces
-    @rtype: string
+    @rtype: unicode
     """
-    ret = ", ".join(str(i) for i in l[:maxlen])
+    ret = ", ".join(i for i in l[:maxlen])
     if len(l)>maxlen:
         return ret+", ..."
     else:
diff --git a/mutalyzer/GenRecord.py b/mutalyzer/GenRecord.py
index b30ed80060bb3135f28650bb9bfae4bbf1f30b61..5a729f737270d0ef52d8acc08d9af16de5668589 100644
--- a/mutalyzer/GenRecord.py
+++ b/mutalyzer/GenRecord.py
@@ -15,7 +15,7 @@ search for them each time.
 #     - GenRecord ; Convert a GenBank record to a nested dictionary.
 
 
-import Bio
+from __future__ import unicode_literals
 
 from mutalyzer import util
 from mutalyzer import Crossmap
@@ -85,7 +85,7 @@ class Locus(object) :
             - CM       ; A Crossmap object.
 
         @arg name: identifier of the locus
-        @type name: string
+        @type name: unicode
         """
 
         self.name = name
@@ -131,7 +131,7 @@ class Locus(object) :
         Expands the DNA description with a new raw variant.
 
         @arg rawVariant: description of a single mutation
-        @type rawVariant: string
+        @type rawVariant: unicode
         """
         if self.description:
             # Don't change anything if we already have an unknown value.
@@ -170,7 +170,7 @@ class Gene(object) :
             - __locusTag ;
 
         @arg name: gene name
-        @type name: string
+        @type name: unicode
         """
 
         self.name = name
@@ -199,14 +199,14 @@ class Gene(object) :
         Find a transcript, given its name.
 
         @arg name: transcript variant number
-        @type name: string
+        @type name: unicode
 
         @return: transcript
         @rtype: object
         """
 
         for i in self.transcriptList :
-            if i.name == name or i.name == str("%03i" % int(name)):
+            if i.name == name or i.name == "%03i" % int(name):
                 return i
         return None
     #findLocus
@@ -230,7 +230,7 @@ class Gene(object) :
         Look in the list of transcripts for a given protein accession number.
 
         @arg protAcc: protein accession number
-        @type protAcc: string
+        @type protAcc: unicode
 
         @return: transcript
         @rtype: object
@@ -300,7 +300,7 @@ class Record(object) :
         Returns a Gene object, given its name.
 
         @arg name: Gene name
-        @type name: string
+        @type name: unicode
 
         @return: Gene object
         @rtype: object
@@ -332,7 +332,7 @@ class Record(object) :
         Expands the DNA description with a new raw variant.
 
         @arg rawVariant: description of a single mutation
-        @type rawVariant: string
+        @type rawVariant: unicode
         """
 
         if self.description :
@@ -469,18 +469,18 @@ class GenRecord() :
         @arg gene: Gene
         @type gene: object
         @arg string: DNA sequence
-        @type string: string
+        @type string: unicode
         @kwarg string_reverse: DNA sequence to use (if not None) for the
             reverse complement.
 
         @return: reverse-complement (if applicable), otherwise return the
             original.
-        @rtype: string
+        @rtype: unicode
         """
         if gene.orientation == -1:
             if string_reverse:
                 string = string_reverse
-            return Bio.Seq.reverse_complement(string)
+            return util.reverse_complement(string)
         return string
     #__maybeInvert
 
@@ -639,15 +639,15 @@ class GenRecord() :
         @arg stop_g: stop position
         @type stop_g: integer
         @arg varType: variant type
-        @type varType: string
+        @type varType: unicode
         @arg arg1: argument 1 of a raw variant
-        @type arg1: string
+        @type arg1: unicode
         @arg arg2: argument 2 of a raw variant
-        @type arg2: string
+        @type arg2: unicode
         @arg roll: ???
         @type roll: tuple (integer, integer)
         @kwarg arg1_reverse: argument 1 to be used on reverse strand
-        @type arg1_reverse: string
+        @type arg1_reverse: unicode
         @kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
         @type start_fuzzy: bool
         @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
@@ -666,8 +666,8 @@ class GenRecord() :
         else:
             chromStart = self.record.toChromPos(stop_g)
             chromStop = self.record.toChromPos(start_g)
-            chromArg1 = Bio.Seq.reverse_complement(arg1)
-            chromArg2 = Bio.Seq.reverse_complement(arg2)
+            chromArg1 = util.reverse_complement(arg1)
+            chromArg2 = util.reverse_complement(arg2)
             # Todo: Should we use arg1_reverse here?
 
         if roll :
diff --git a/mutalyzer/Retriever.py b/mutalyzer/Retriever.py
index 5fa91eeb1940dcb14fd5ce7087495281a91cdfe9..ac09bafa3a735b7af6cd445633baeb47dacaf8a2 100644
--- a/mutalyzer/Retriever.py
+++ b/mutalyzer/Retriever.py
@@ -10,13 +10,15 @@ Public classes:
 """
 
 
+from __future__ import unicode_literals
+
+import io
 import os              # path.isfile(), link() path.isdir(), path.mkdir(),
                        # walk(), path.getsize(), path.join(), stat(), remove()
 import time
 import bz2             # BZ2Compressor(), BZ2File()
 import hashlib         # md5(), update(), hexdigest()
 import urllib2         # urlopen()
-import StringIO        # StringIO()
 from Bio import SeqIO  # read()
 from Bio import Entrez # efetch(), read(), esearch(), esummary()
 from Bio.Seq import UnknownSeq
@@ -25,6 +27,7 @@ from xml.dom import DOMException, minidom
 from xml.parsers import expat
 from httplib import HTTPException, IncompleteRead
 from sqlalchemy.orm.exc import NoResultFound
+import cchardet as chardet
 
 from mutalyzer import util
 from mutalyzer.config import settings
@@ -84,10 +87,10 @@ class Retriever(object) :
         Convert an accession number to a filename.
 
         @arg name: The accession number
-        @type name: string
+        @type name: unicode
 
         @return: A filename
-        @rtype: string
+        @rtype: unicode
         """
         return os.path.join(settings.CACHE_DIR, name + "." + self.fileType + ".bz2")
     #_nametofile
@@ -97,18 +100,27 @@ class Retriever(object) :
         Write raw data to a compressed file.
 
         @arg raw_data: The raw_data to be compressed and written
-        @type raw_data: string
+        @type raw_data: byte string
         @arg filename: The intended name of the outfile
-        @type filename: string
+        @type filename: unicode
 
         @return: outfile ; The full path and name of the file written
-        @rtype: string
+        @rtype: unicode
         """
+        result = chardet.detect(raw_data)
+        if result['confidence'] > 0.5:
+            encoding = result['encoding']
+        else:
+            encoding = 'utf-8'
+
+        if not util.is_utf8_alias(encoding):
+            raw_data = raw_data.decode(encoding).encode('utf-8')
+
         # Compress the data to save disk space.
         comp = bz2.BZ2Compressor()
         data = comp.compress(raw_data)
         data += comp.flush()
-        out_handle = open(self._nametofile(filename), "w")
+        out_handle = open(self._nametofile(filename), "wb")
         out_handle.write(data)
         out_handle.close()
 
@@ -120,10 +132,10 @@ class Retriever(object) :
         Calculate the md5sum of a piece of text.
 
         @arg content: Arbitrary text
-        @type content: string
+        @type content: byte string
 
         @return: The md5sum of 'content'
-        @rtype: string
+        @rtype: unicode
         """
 
         hashfunc = hashlib.md5()
@@ -131,7 +143,7 @@ class Retriever(object) :
         md5sum = hashfunc.hexdigest()
         del hashfunc
 
-        return md5sum
+        return unicode(md5sum)
     #_calcHash
 
     def _newUD(self) :
@@ -139,11 +151,11 @@ class Retriever(object) :
         Make a new UD number based on the current time (seconds since 1970).
 
         @return: A new UD number
-        @rtype: string
+        @rtype: unicode
         """
 
         UD = util.generate_id()
-        return "UD_" + str(UD)
+        return "UD_" + unicode(UD)
     #_newUD
 
     def _updateDBmd5(self, raw_data, name, GI):
@@ -159,7 +171,7 @@ class Retriever(object) :
         @type GI:
 
         @return: filename
-        @rtype: string
+        @rtype: unicode
         """
         try:
             reference = Reference.query.filter_by(accession=name).one()
@@ -191,10 +203,10 @@ class Retriever(object) :
         it.
 
         @arg rsId: The rsId of the SNP (example: 'rs9919552').
-        @type rsId: string
+        @type rsId: unicode
 
         @return: A list of HGVS notations.
-        @rtype: list(string)
+        @rtype: list(unicode)
         """
         # A simple input check.
         id = rs_id[2:]
@@ -223,7 +235,7 @@ class Retriever(object) :
                 self._output.addMessage(__file__, 4, 'EENTREZ',
                                         'Error connecting to dbSNP.')
                 self._output.addMessage(__file__, -1, 'INFO',
-                                        'IOError: %s' % str(e))
+                                        'IOError: %s' % unicode(e))
                 return []
 
         try:
@@ -232,10 +244,10 @@ class Retriever(object) :
             self._output.addMessage(__file__, 4, 'EENTREZ',
                                     'Error reading from dbSNP.')
             self._output.addMessage(__file__, -1, 'INFO',
-                                    'IncompleteRead: %s' % str(e))
+                                    'IncompleteRead: %s' % unicode(e))
             return []
 
-        if response_text == '\n':
+        if response_text.strip() == b'\n':
             # This is apparently what dbSNP returns for non-existing dbSNP id
             self._output.addMessage(__file__, 4, 'EENTREZ',
                                     'ID rs%s could not be found in dbSNP.' \
@@ -251,21 +263,21 @@ class Retriever(object) :
             self._output.addMessage(__file__, 4, 'EENTREZ', 'Unknown dbSNP ' \
                                     'error. Error parsing result XML.')
             self._output.addMessage(__file__, -1, 'INFO',
-                                    'ExpatError: %s' % str(e))
+                                    'ExpatError: %s' % unicode(e))
             self._output.addMessage(__file__, -1, 'INFO',
-                                    'Result from dbSNP: %s' % response_text)
+                                    'Result from dbSNP: %s' % unicode(response_text, 'utf-8'))
             return []
         except IndexError:
             # The expected root element is not present.
             self._output.addMessage(__file__, 4, 'EENTREZ', 'Unknown dbSNP ' \
                                     'error. Result XML was not as expected.')
             self._output.addMessage(__file__, -1, 'INFO',
-                                    'Result from dbSNP: %s' % response_text)
+                                    'Result from dbSNP: %s' % unicode(response_text, 'utf-8'))
             return []
 
         snps = []
         for i in rs.getElementsByTagName('hgvs'):
-            snps.append(i.lastChild.data.encode('utf8'))
+            snps.append(i.lastChild.data)
 
         return snps
     #snpConvert
@@ -298,9 +310,9 @@ class GenBankRetriever(Retriever):
         database).
 
         @arg raw_data: The data
-        @type raw_data: string
+        @type raw_data: byte string
         @arg filename: The intended name of the file.
-        @type filename: string
+        @type filename: unicode
         @arg extract: Flag that indicates whether to extract the record ID and
         GI number:
             - 0 ; Do not extract, use 'filename'
@@ -310,29 +322,27 @@ class GenBankRetriever(Retriever):
         @return: tuple ; Depending on the value of 'extract':
             - 0 ; ('filename', None)
             - 1 ; (id, GI)
-        @rtype: tuple (string, string)
+        @rtype: tuple (unicode, unicode)
         """
 
-        if raw_data == "\nNothing has been found\n" :
+        if raw_data.strip() == b'Nothing has been found':
             self._output.addMessage(__file__, 4, "ENORECORD",
                 "The record could not be retrieved.")
             return None
         #if
 
-        fakehandle = StringIO.StringIO() # Unfortunately, BioPython needs a
-        fakehandle.write(raw_data)       # file handle.
+        fakehandle = io.BytesIO()     # Unfortunately, BioPython needs a
+        fakehandle.write(raw_data)    # file handle.
         fakehandle.seek(0)
         try :
             record = SeqIO.read(fakehandle, "genbank")
         except (ValueError, AttributeError):  # An error occured while parsing.
             self._output.addMessage(__file__, 4, "ENOPARSE",
                 "The file could not be parsed.")
-            fakehandle.close()
             return None
         #except
 
         if type(record.seq) == UnknownSeq :
-            fakehandle.close()
             self._output.addMessage(__file__, 4, "ENOSEQ",
                 "This record contains no sequence. Chromosomal or contig " \
                 "records should be uploaded with the GenBank uploader.")
@@ -342,12 +352,12 @@ class GenBankRetriever(Retriever):
         outfile = filename
         GI = None
         if extract :
-            outfile = record.id
-            GI = record.annotations["gi"]
+            outfile = unicode(record.id)
+            GI = unicode(record.annotations["gi"])
             if outfile != filename :
                 # Add the reference (incl version) to the reference output
                 # This differs if the original reference lacks a version
-                self._output.addOutput("reference", record.id)
+                self._output.addOutput("reference", unicode(record.id))
                 self._output.addOutput(
                         "BatchFlags", ("A1",(
                             filename,
@@ -355,9 +365,8 @@ class GenBankRetriever(Retriever):
                             filename+"." )))
                 self._output.addMessage(__file__, 2, "WNOVER",
                     "No version number is given, using %s. Please use this " \
-                    "number to reduce downloading overhead." % record.id)
+                    "number to reduce downloading overhead." % unicode(record.id))
         #if
-        fakehandle.close()
 
         self._write(raw_data, outfile)
 
@@ -378,12 +387,12 @@ class GenBankRetriever(Retriever):
             net_handle.close()
         except (IOError, urllib2.HTTPError, HTTPException) as e:
             self._output.addMessage(__file__, -1, 'INFO',
-                                    'Error connecting to Entrez nuccore database: %s' % str(e))
+                                    'Error connecting to Entrez nuccore database: %s' % unicode(e))
             self._output.addMessage(__file__, 4, 'ERETR',
                                     'Could not retrieve %s.' % name)
             return None
 
-        if raw_data == '\n' :       # Check if the file is empty or not.
+        if raw_data.strip() == b'':       # Check if the file is empty or not.
             self._output.addMessage(__file__, 4, 'ERETR',
                                     'Could not retrieve %s.' % name)
             return None
@@ -391,10 +400,10 @@ class GenBankRetriever(Retriever):
         # This is a hack to detect constructed references, the proper way to
         # do this would be to check the data_file_division attribute of the
         # parsed GenBank file (it would be 'CON').
-        if '\nCONTIG' in raw_data:
+        if b'\nCONTIG' in raw_data:
             try:
                 # Get the length in base pairs
-                length = int(raw_data[:raw_data.index(' bp', 0, 500)].split()[-1])
+                length = int(raw_data[:raw_data.index(b' bp', 0, 500)].split()[-1])
             except ValueError, IndexError:
                 self._output.addMessage(__file__, 4, 'ERETR',
                                         'Could not retrieve %s.' % name)
@@ -409,7 +418,7 @@ class GenBankRetriever(Retriever):
                 net_handle.close()
             except (IOError, urllib2.HTTPError, HTTPException) as e:
                 self._output.addMessage(__file__, -1, 'INFO',
-                                        'Error connecting to Entrez nuccore database: %s' % str(e))
+                                        'Error connecting to Entrez nuccore database: %s' % unicode(e))
                 self._output.addMessage(__file__, 4, 'ERETR',
                                         'Could not retrieve %s.' % name)
                 return None
@@ -438,7 +447,7 @@ class GenBankRetriever(Retriever):
         as filename.
 
         @arg accno: The accession number of the chromosome
-        @type accno: string
+        @type accno: unicode
         @arg start: Start position of the slice
         @type start: integer
         @arg stop: End position of the slice.
@@ -450,7 +459,7 @@ class GenBankRetriever(Retriever):
         @type orientation: integer
 
         @return: An UD number
-        @rtype: string
+        @rtype: unicode
         """
 
         # Not a valid slice.
@@ -483,7 +492,7 @@ class GenBankRetriever(Retriever):
             handle.close()
         except (IOError, urllib2.HTTPError, HTTPException) as e:
             self._output.addMessage(__file__, -1, 'INFO',
-                                    'Error connecting to Entrez nuccore database: %s' % str(e))
+                                    'Error connecting to Entrez nuccore database: %s' % unicode(e))
             self._output.addMessage(__file__, 4, 'ERETR',
                                     'Could not retrieve slice.')
             return None
@@ -512,7 +521,7 @@ class GenBankRetriever(Retriever):
         #else
 
         if self.write(raw_data, reference.accession, 0):
-            return str(reference.accession)
+            return reference.accession
     #retrieveslice
 
     def retrievegene(self, gene, organism, upstream, downstream) :
@@ -521,9 +530,9 @@ class GenBankRetriever(Retriever):
         slice if the gene can be found.
 
         @arg gene: Name of the gene
-        @type gene: string
+        @type gene: unicode
         @arg organism: The organism in which we search.
-        @type organism: string
+        @type organism: unicode
         @arg upstream: Number of upstream nucleotides for the slice.
         @type upstream: integer
         @arg downstream: Number of downstream nucleotides for the slice.
@@ -549,7 +558,7 @@ class GenBankRetriever(Retriever):
                 handle.close()
         except (IOError, urllib2.HTTPError, HTTPException) as e:
             self._output.addMessage(__file__, -1, 'INFO',
-                                    'Error connecting to Entrez esearch: %s' % str(e))
+                                    'Error connecting to Entrez esearch: %s' % unicode(e))
             self._output.addMessage(__file__, 4, 'ERETR',
                                     'Could not search for gene %s.' % gene)
             return None
@@ -571,29 +580,29 @@ class GenBankRetriever(Retriever):
                     handle.close()
             except (IOError, urllib2.HTTPError, HTTPException) as e:
                 self._output.addMessage(__file__, -1, 'INFO',
-                                        'Error connecting to Entrez esummary: %s' % str(e))
+                                        'Error connecting to Entrez esummary: %s' % unicode(e))
                 self._output.addMessage(__file__, 4, 'ERETR',
                                         'Could not get mapping information for gene %s.' % gene)
                 return None
 
-            if summary[0]["NomenclatureSymbol"].lower() == gene.lower() : # Found it.
+            if unicode(summary[0]["NomenclatureSymbol"]).lower() == gene.lower() : # Found it.
                 if not summary[0]["GenomicInfo"] :
                     self._output.addMessage(__file__, 4, "ENOMAPPING",
                         "No mapping information found for gene %s." % gene)
                     return None
                 #if
-                ChrAccVer = summary[0]["GenomicInfo"][0]["ChrAccVer"]
-                ChrLoc = summary[0]["GenomicInfo"][0]["ChrLoc"]
-                ChrStart = summary[0]["GenomicInfo"][0]["ChrStart"]
-                ChrStop = summary[0]["GenomicInfo"][0]["ChrStop"]
-                break;
+                ChrAccVer = unicode(summary[0]["GenomicInfo"][0]["ChrAccVer"])
+                ChrLoc = unicode(summary[0]["GenomicInfo"][0]["ChrLoc"])
+                ChrStart = unicode(summary[0]["GenomicInfo"][0]["ChrStart"])
+                ChrStop = unicode(summary[0]["GenomicInfo"][0]["ChrStop"])
+                break
             #if
 
             # Collect official symbols that has this gene as alias in case we
             # can not find anything.
-            if gene in summary[0]["OtherAliases"] and \
+            if gene in [unicode(a) for a in summary[0]["OtherAliases"]] and \
                 summary[0]["NomenclatureSymbol"] :
-                aliases.append(summary[0]["NomenclatureSymbol"]);
+                aliases.append(unicode(summary[0]["NomenclatureSymbol"]))
         #for
 
         if not ChrAccVer : # We did not find any genes.
@@ -631,11 +640,18 @@ class GenBankRetriever(Retriever):
         is used.
 
         @arg url: Location of a GenBank record
-        @type url: string
+        @type url: unicode
 
         @return: UD or None
-        @rtype: string
+        @rtype: unicode
         """
+        if not (url.startswith('http://') or
+                url.startswith('https://') or
+                url.startswith('ftp://')):
+            self._output.addMessage(__file__, 4, "ERECPARSE",
+                                    "Only HTTP(S) or FTP locations are allowed.")
+            return None
+
         handle = urllib2.urlopen(url)
         info = handle.info()
         if info["Content-Type"] == "text/plain" :
@@ -651,14 +667,14 @@ class GenBankRetriever(Retriever):
                 except NoResultFound:
                     UD = self._newUD()
                     if not os.path.isfile(self._nametofile(UD)):
-                        UD = self.write(raw_data, UD, 0) and str(UD)
+                        UD = self.write(raw_data, UD, 0) and UD
                     if UD:      #Parsing went OK, add to DB
                         reference = Reference(UD, md5sum, download_url=url)
                         session.add(reference)
                         session.commit()
                 else:
                     if not os.path.isfile(self._nametofile(reference.accession)):
-                        UD = self.write(raw_data, reference.accession, 0) and str(reference.accession)
+                        UD = self.write(raw_data, reference.accession, 0) and reference.accession
 
                 return UD #Returns the UD or None
             #if
@@ -681,11 +697,11 @@ class GenBankRetriever(Retriever):
         If the downloaded file is recognised by its hash, the old UD number
         is used.
 
-        @arg raw_data: A GenBank record
-        @type raw_data: string
+        @arg raw_data: A GenBank record.
+        @type raw_data: byte string
 
-        @return:
-        @rtype: string?????
+        @return: Accession number for the uploaded file.
+        @rtype: unicode
         """
         md5sum = self._calcHash(raw_data)
 
@@ -702,7 +718,7 @@ class GenBankRetriever(Retriever):
             if os.path.isfile(self._nametofile(reference.accession)):
                 return reference.accession
             else:
-                return self.write(raw_data, reference.accession, 0) and str(reference.accession)
+                return self.write(raw_data, reference.accession, 0) and reference.accession
     #uploadrecord
 
     def loadrecord(self, identifier):
@@ -718,7 +734,7 @@ class GenBankRetriever(Retriever):
         3. Fetched from the NCBI.
 
         :arg identifier: A RefSeq accession number or geninfo identifier (GI).
-        :type identifier: string
+        :type identifier: unicode
 
         :return: A parsed RefSeq record or `None` if no record could be found
             for the given identifier.
@@ -830,7 +846,7 @@ class LRGRetriever(Retriever):
         Load and parse a LRG file based on the identifier
 
         @arg identifier: The name of the LRG file to read
-        @type identifier: string
+        @type identifier: unicode
 
         @return: record ; GenRecord.Record of LRG file
                    None ; in case of failure
@@ -870,10 +886,10 @@ class LRGRetriever(Retriever):
         from the pending section.
 
         @arg name: The name of the LRG file to fetch
-        @type name: string
+        @type name: unicode
 
         @return: the full path to the file; None in case of an error
-        @rtype: string
+        @rtype: unicode
         """
 
         prefix = settings.LRG_PREFIX_URL
@@ -901,12 +917,12 @@ class LRGRetriever(Retriever):
         Download an LRG record from an URL.
 
         @arg url: Location of the LRG record
-        @type url: string
+        @type url: unicode
 
         @return:
             - filename    ; The full path to the file
             - None        ; in case of failure
-        @rtype: string
+        @rtype: unicode
         """
 
         lrgID = name or os.path.splitext(os.path.split(url)[1])[0]
@@ -914,6 +930,8 @@ class LRGRetriever(Retriever):
         #    return None
         filename = self._nametofile(lrgID)
 
+        # Todo: Properly read the file contents to a unicode string and write
+        #   it utf-8 encoded.
         handle = urllib2.urlopen(url)
         info = handle.info()
         if info["Content-Type"] == "application/xml" and info.has_key("Content-length"):
@@ -968,14 +986,14 @@ class LRGRetriever(Retriever):
         if a parse error occurs None is returned.
 
         @arg raw_data: The data
-        @type raw_data: string
+        @type raw_data: byte string
         @arg filename: The intended name of the file
-        @type filename: string
+        @type filename: unicode
 
         @return:
             - filename ; The full path and name of the file written
             - None     ; In case of an error
-        @rtype: string
+        @rtype: unicode
         """
         # Dirty way to test if a file is valid,
         # Parse the file to see if it's a real LRG file.
diff --git a/mutalyzer/Scheduler.py b/mutalyzer/Scheduler.py
index e6f102d3e2b23dcd8b60bc7c17a6c7f03a196ad0..400c81c54c3d0530d139ce7ae7a96f7ae4c7542d 100644
--- a/mutalyzer/Scheduler.py
+++ b/mutalyzer/Scheduler.py
@@ -15,13 +15,15 @@ Module used to add and manage the Batch Jobs.
 #             - Batch Syntax Checker
 #             - Batch Position Converter
 
+from __future__ import unicode_literals
+
+import io
 import os                               # os.path.exists
 import smtplib                          # smtplib.STMP
 from email.mime.text import MIMEText    # MIMEText
 from sqlalchemy import func
 from sqlalchemy.orm.exc import NoResultFound
 
-import mutalyzer
 from mutalyzer.config import settings
 from mutalyzer.db import queries, session
 from mutalyzer.db.models import Assembly, BatchJob, BatchQueueItem
@@ -88,9 +90,9 @@ class Scheduler() :
         @todo: Handle Connection errors in a try, except clause
 
         @arg mailTo: The batch job submitter
-        @type mailTo: string
+        @type mailTo: unicode
         @arg url: The url containing the results
-        @type url: string
+        @type url: unicode
         """
         if settings.TESTING:
             return
@@ -410,7 +412,7 @@ Mutalyzer batch scheduler""" % url)
                 O.addMessage(__file__, 4, "EBATCHU",
                         "Unexpected error occurred, dev-team notified")
                 import traceback
-                O.addMessage(__file__, 4, "DEBUG", repr(traceback.format_exc()))
+                O.addMessage(__file__, 4, "DEBUG", unicode(repr(traceback.format_exc())))
             #except
             finally :
                 #check if we need to update the database
@@ -448,11 +450,11 @@ Mutalyzer batch scheduler""" % url)
                       'Affected Proteins',
                       'Restriction Sites Created',
                       'Restriction Sites Deleted']
-            handle = open(filename, 'a')
+            handle = io.open(filename, mode='a', encoding='utf-8')
             handle.write("%s\n" % "\t".join(header))
         #if
         else :
-            handle = open(filename, 'a')
+            handle = io.open(filename, mode='a', encoding='utf-8')
 
         if flags and 'C' in flags:
             separator = '\t'
@@ -507,11 +509,11 @@ Mutalyzer batch scheduler""" % url)
             # header above it. The header is read from the config file as
             # a list. We need a tab delimited string.
             header = ['Input', 'Status']
-            handle = open(filename, 'a')
+            handle = io.open(filename, mode='a', encoding='utf-8')
             handle.write("%s\n" % "\t".join(header))
         #if
         else :
-            handle = open(filename, 'a')
+            handle = io.open(filename, mode='a', encoding='utf-8')
 
         if flags and 'C' in flags:
             separator = '\t'
@@ -535,11 +537,11 @@ Mutalyzer batch scheduler""" % url)
             - Output written to outputfile.
 
         @arg cmd: The Syntax Checker input
-        @type cmd: string
+        @type cmd: unicode
         @arg i: The JobID
         @type i: integer
         @arg build: The build to use for the converter
-        @type build: string
+        @type build: unicode
         @arg flags: Flags of the current entry
         @type flags:
         """
@@ -562,7 +564,7 @@ Mutalyzer batch scheduler""" % url)
                     assembly = Assembly.by_name_or_alias(batch_job.argument)
                 except NoResultFound:
                     O.addMessage(__file__, 3, 'ENOASSEMBLY',
-                                 'Not a valid assembly: ' + str(batch_job.argument))
+                                 'Not a valid assembly: ' + batch_job.argument)
                     raise
 
                 converter = Converter(assembly, O)
@@ -619,11 +621,11 @@ Mutalyzer batch scheduler""" % url)
                       'Errors',
                       'Chromosomal Variant',
                       'Coding Variant(s)']
-            handle = open(filename, 'a')
+            handle = io.open(filename, mode='a', encoding='utf-8')
             handle.write("%s\n" % "\t".join(header))
         #if
         else :
-            handle = open(filename, 'a')
+            handle = io.open(filename, mode='a', encoding='utf-8')
 
         if flags and 'C' in flags:
             separator = '\t'
@@ -681,11 +683,11 @@ Mutalyzer batch scheduler""" % url)
             header = ['Input Variant',
                       'HGVS description(s)',
                       'Errors and warnings']
-            handle = open(filename, 'a')
+            handle = io.open(filename, mode='a', encoding='utf-8')
             handle.write("%s\n" % "\t".join(header))
         #if
         else :
-            handle = open(filename, 'a')
+            handle = io.open(filename, mode='a', encoding='utf-8')
 
         if flags and 'C' in flags:
             separator = '\t'
@@ -704,7 +706,7 @@ Mutalyzer batch scheduler""" % url)
         Add a job to the Database and start the BatchChecker.
 
         @arg email:         e-mail address of batch supplier
-        @type email:        string
+        @type email:        unicode
         @arg queue:         A list of jobs
         @type queue:        list
         @arg columns:       The number of columns.
diff --git a/mutalyzer/__init__.py b/mutalyzer/__init__.py
index e3c80aa36a8691de7128fb8a7c482cf58699bb48..6968d5ff84fa0b23b07b8e49adbd449fd6cc61e5 100644
--- a/mutalyzer/__init__.py
+++ b/mutalyzer/__init__.py
@@ -3,6 +3,9 @@ HGVS variant nomenclature checker.
 """
 
 
+from __future__ import unicode_literals
+
+
 # We follow a versioning scheme compatible with setuptools [1] where the
 # package version is always that of the upcoming release (and not that of the
 # previous release), post-fixed with ``.dev``. Only in a release commit, the
diff --git a/mutalyzer/announce.py b/mutalyzer/announce.py
index d8acbe4de84757bde62b0d326b5a4c0a3fc7ee4d..9adbf79109eeb06e8894b74cfdbad7d929261502 100644
--- a/mutalyzer/announce.py
+++ b/mutalyzer/announce.py
@@ -7,6 +7,8 @@ fast, it can be done on every website pageview without problems.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.redisclient import client
 
 
diff --git a/mutalyzer/config/__init__.py b/mutalyzer/config/__init__.py
index def4630bc53ad26234896ab56165afa73bdc3c88..462a490e1bf21d18d7bca310e732ec95bd1f2e62 100644
--- a/mutalyzer/config/__init__.py
+++ b/mutalyzer/config/__init__.py
@@ -12,6 +12,8 @@ be used.
 """
 
 
+from __future__ import unicode_literals
+
 import collections
 import os
 
diff --git a/mutalyzer/config/default_settings.py b/mutalyzer/config/default_settings.py
index 43009e09e1e0142fed8f99bc67a00076c0ab9327..00dc9b2e8070f55b18bd47d9a384bcc7e9fa98cf 100644
--- a/mutalyzer/config/default_settings.py
+++ b/mutalyzer/config/default_settings.py
@@ -4,6 +4,9 @@ pointed-to by the `MUTALYZER_SETTINGS` environment variable.
 """
 
 
+from __future__ import unicode_literals
+
+
 # Use Mutalyzer in debug mode.
 DEBUG = False
 
diff --git a/mutalyzer/db/__init__.py b/mutalyzer/db/__init__.py
index b2192186773b542c68d1dae3884124f012ccfff4..71e8eaf5cd4eeea706873fcb4b179168e34187d7 100644
--- a/mutalyzer/db/__init__.py
+++ b/mutalyzer/db/__init__.py
@@ -4,6 +4,8 @@ using SQLAlchemy.
 """
 
 
+from __future__ import unicode_literals
+
 import sqlalchemy
 from sqlalchemy.engine.url import make_url
 from sqlalchemy.ext.declarative import declarative_base
diff --git a/mutalyzer/db/models.py b/mutalyzer/db/models.py
index 4119fa99e178b7dfbdaaa91e3cbb1352836dd3c0..faa0754c519549f71d78fbadcceb4c10586d43bf 100644
--- a/mutalyzer/db/models.py
+++ b/mutalyzer/db/models.py
@@ -3,6 +3,8 @@ Models backed by SQL using SQLAlchemy.
 """
 
 
+from __future__ import unicode_literals
+
 from datetime import datetime
 import sqlite3
 import uuid
@@ -50,7 +52,7 @@ class Positions(TypeDecorator):
 
     def process_bind_param(self, value, dialect):
         if value is not None:
-            value = ','.join(str(i) for i in value)
+            value = ','.join(unicode(i) for i in value)
         return value
 
     def process_result_value(self, value, dialect):
@@ -98,7 +100,7 @@ class BatchJob(db.Base):
         self.email = email
         self.download_url = download_url
         self.argument = argument
-        self.result_id = str(uuid.uuid4())
+        self.result_id = unicode(uuid.uuid4())
         self.added = datetime.now()
 
     def __repr__(self):
diff --git a/mutalyzer/db/queries.py b/mutalyzer/db/queries.py
index afdd2a44152e105976edc94db793c4ce12b764d1..7c54d137fa19e5ff0b8459a3df305ec4241c9d2e 100644
--- a/mutalyzer/db/queries.py
+++ b/mutalyzer/db/queries.py
@@ -7,6 +7,8 @@ Queries on database models.
 #   the models they work with.
 
 
+from __future__ import unicode_literals
+
 from datetime import datetime, timedelta
 
 from sqlalchemy import and_, or_
diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py
index 37fb60c238990b3cd12bcdbd9098c48ecfb6724d..d81254c39aeed1febbb7b5545ae48b82e3dfc7cb 100644
--- a/mutalyzer/describe.py
+++ b/mutalyzer/describe.py
@@ -7,13 +7,14 @@ leading from one sequence to an other.
 @requires: Bio.Seq
 """
 
+from __future__ import unicode_literals
+
 import collections
-from Bio import Seq
 from Bio.SeqUtils import seq3
 from Bio.Data import CodonTable
 
 from mutalyzer.util import longest_common_prefix, longest_common_suffix
-from mutalyzer.util import palinsnoop, roll
+from mutalyzer.util import palinsnoop, roll, reverse_complement
 from mutalyzer import models
 
 
@@ -34,9 +35,9 @@ class LCS(object):
         Initialise the class.
 
         @arg s1: A string.
-        @type s1: str
+        @type s1: unicode
         @arg s2: A string.
-        @type s2: str
+        @type s2: unicode
         @arg lcp: The length of the longest common prefix of {s1} and {s2}.
         @type lcp: int
         @arg s1_end: End of the substring in {s1}.
@@ -55,21 +56,21 @@ class LCS(object):
         self.__s2_rc = None
         self.__matrix_rc = None
         if DNA:
-            self.__s2_rc = Seq.reverse_complement(s2[self.__lcp:s2_end])
+            self.__s2_rc = reverse_complement(s2[self.__lcp:s2_end])
             self.__matrix_rc = self.LCSMatrix(self.__s1, self.__s2_rc)
         #if
     #__init__
 
-    def __str__(self):
+    def __unicode__(self):
         """
         Return a graphical representation of the LCS matrix, mainly for
         debugging.
 
         @returns: A graphical representation of the LCS matrix.
-        @rtype: str
+        @rtype: unicode
         """
         return self.visMatrix((0, len(self.__s1)), (0, len(self.__s2)))
-    #__str__
+    #__unicode__
 
     def visMatrix(self, r1, r2, rc=False):
         """
@@ -77,7 +78,7 @@ class LCS(object):
         debugging.
 
         @returns: A graphical representation of the LCS matrix.
-        @rtype: str
+        @rtype: unicode
         """
         nr1 = r1[0] - self.__lcp, r1[1] - self.__lcp
         nr2 = r2[0] - self.__lcp, r2[1] - self.__lcp
@@ -91,7 +92,7 @@ class LCS(object):
         out = self.__delim.join(self.__delim + '-' + s2[nr2[0]:nr2[1]]) + '\n'
         for i in range(nr1[0], nr1[1] + 1):
             out += (('-' + self.__s1)[i] + self.__delim +
-                self.__delim.join(map(lambda x: str(M[i][x]),
+                self.__delim.join(map(lambda x: unicode(M[i][x]),
                 range(nr2[0], nr2[1] + 1))) + '\n')
 
         return out
@@ -102,9 +103,9 @@ class LCS(object):
         Calculate the Longest Common Substring matrix.
 
         @arg s1: A string.
-        @type s1: str
+        @type s1: unicode
         @arg s2: A string.
-        @type s2: str
+        @type s2: unicode
 
         @returns: A matrix with the LCS of {s1}[i], {s2}[j] at position i, j.
         @rval: list[list[int]]
@@ -201,9 +202,9 @@ def __makeOverlaps(peptide):
     Make a list of overlapping 2-mers of {peptide} in order of appearance.
 
     @arg peptide: A peptide sequence.
-    @type peptide: str
+    @type peptide: unicode
     @returns: All 2-mers of {peptide} in order of appearance.
-    @rtype: list(str)
+    @rtype: list(unicode)
     """
     return map(lambda x: peptide[x:x+2], range(len(peptide) - 1))
 #__makeOverlaps
@@ -213,13 +214,13 @@ def __options(pList, peptidePrefix, FS, output):
     Enumerate all peptides that could result from a frame shift.
 
     @arg pList: List of overlapping 2-mers of a peptide.
-    @type pList: list(str)
+    @type pList: list(unicode)
     @arg peptidePrefix: Prefix of a peptide in the alternative reading frame.
-    @type peptidePrefix: str
+    @type peptidePrefix: unicode
     @arg FS: Frame shift table.
     @type FS: dict
     @arg output: List of peptides, should be empty initially.
-    @type output: list(str)
+    @type output: list(unicode)
     """
     if not pList:
         output.append(peptidePrefix)
@@ -234,7 +235,7 @@ def enumFS(peptide, FS):
     Enumerate all peptides that could result from a frame shift.
 
     @arg peptide: Original peptide sequence.
-    @type peptide: str
+    @type peptide: unicode
     @arg FS: Frame shift table.
     @type FS: dict
     """
@@ -250,9 +251,9 @@ def fitFS(peptide, altPeptide, FS):
     {peptide}.
 
     @arg peptide: Original peptide sequence.
-    @type peptide: str
+    @type peptide: unicode
     @arg altPeptide: Observed peptide sequence.
-    @type altPeptide: str
+    @type altPeptide: unicode
     @arg FS: Frame shift table.
     @type FS: dict
     """
@@ -302,11 +303,11 @@ class DescribeRawVar(models.RawVar):
         @arg end_offset:
         @type end_offset: int
         @arg type: Variant type.
-        @type type: str
+        @type type: unicode
         @arg deleted: Deleted part of the reference sequence.
-        @type deleted: str
+        @type deleted: unicode
         @arg inserted: Inserted part.
-        @type inserted: str
+        @type inserted: unicode
         @arg shift: Amount of freedom.
         @type shift: int
         """
@@ -336,7 +337,7 @@ class DescribeRawVar(models.RawVar):
         correct description. Also see the comment in the class definition.
 
         @returns: The HGVS description of the raw variant stored in this class.
-        @rtype: str
+        @rtype: unicode
         """
         if not self.start:
             return "="
@@ -365,7 +366,7 @@ class DescribeRawVar(models.RawVar):
         correct description. Also see the comment in the class definition.
 
         @returns: The HGVS description of the raw variant stored in this class.
-        @rtype: str
+        @rtype: unicode
         """
         if self.type == "unknown":
             return "?"
@@ -491,7 +492,7 @@ def alleleDescription(allele):
     @type allele: list(DescribeRawVar)
 
     @returns: The HGVS description of {allele}.
-    @rval: str
+    @rval: unicode
     """
     if len(allele) > 1:
         return "[%s]" % ';'.join(map(lambda x : x.hgvs, allele))
@@ -530,9 +531,9 @@ def DNA_description(M, s1, s2, s1_start, s1_end, s2_start, s2_end):
     {s1_start}..{s1_end} on {s1} and {s2_start}..{s2_end} on {s2}.
 
     arg s1: Sequence 1.
-    type s1: str
+    type s1: unicode
     arg s2: Sequence 2.
-    type s2: str
+    type s2: unicode
     arg s1_start: Start of the range on {s1}.
     type s1_start: int
     arg s1_end: End of the range on {s1}.
@@ -682,9 +683,9 @@ def protein_description(M, s1, s2, s1_start, s1_end, s2_start, s2_end):
     {s1_start}..{s1_end} on {s1} and {s2_start}..{s2_end} on {s2}.
 
     arg s1: Sequence 1.
-    type s1: str
+    type s1: unicode
     arg s2: Sequence 2.
-    type s2: str
+    type s2: unicode
     arg s1_start: Start of the range on {s1}.
     type s1_start: int
     arg s1_end: End of the range on {s1}.
@@ -810,15 +811,15 @@ def describe(original, mutated, DNA=True):
     Convenience function for DNA_description().
 
     @arg original:
-    @type original: str
+    @type original: unicode
     @arg mutated:
-    @type mutated: str
+    @type mutated: unicode
 
     @returns: A list of DescribeRawVar objects, representing the allele.
     @rval: list(DescribeRawVar)
     """
-    s1 = str(original)
-    s2 = str(mutated)
+    s1 = original
+    s2 = mutated
     lcp = len(longest_common_prefix(s1, s2))
     lcs = len(longest_common_suffix(s1[lcp:], s2[lcp:]))
     s1_end = len(s1) - lcs
diff --git a/mutalyzer/describe_c.py b/mutalyzer/describe_c.py
deleted file mode 100755
index 1da86f77293e015ba2a0f53a5a3f61a3fcaeca4d..0000000000000000000000000000000000000000
--- a/mutalyzer/describe_c.py
+++ /dev/null
@@ -1,587 +0,0 @@
-#!/usr/bin/python
-
-"""
-Prototype of a module that can generate a HGVS description of the variant(s)
-leading from one sequence to an other.
-
-@requires: Bio.Seq
-""" 
-import collections
-from Bio import Seq
-from Bio.SeqUtils import seq3
-from Bio.Data import CodonTable
-
-from mutalyzer.util import longest_common_prefix, longest_common_suffix
-from mutalyzer.util import palinsnoop, roll
-from mutalyzer import models
-
-from extractor import extractor
-
-def makeFSTables(table_id):
-    """
-    For every pair of amino acids, calculate the set of possible amino acids in
-    a different reading frame. Do this for both alternative reading frames (+1
-    and +2).
-
-    @arg table_id: Coding table ID.
-    @type table_id: int
-    @returns: Two dictionaries for the two alternative reading frames.
-    @rtype: tuple(dict, dict)
-    """
-    # Make the forward translation table.
-    table = dict(CodonTable.unambiguous_dna_by_id[table_id].forward_table)
-    for i in CodonTable.unambiguous_dna_by_id[table_id].stop_codons:
-        table[i] = '*'
-
-    # Make the reverse translation table.
-    reverse_table = collections.defaultdict(list)
-    for i in table:
-        reverse_table[table[i]].append(i)
-
-    # Make the frame shift tables.
-    FS1 = collections.defaultdict(set)
-    FS2 = collections.defaultdict(set)
-    for AA_i in reverse_table:
-        for AA_j in reverse_table:
-            for codon_i in reverse_table[AA_i]:
-                for codon_j in reverse_table[AA_j]:
-                    FS1[AA_i + AA_j].add(table[(codon_i + codon_j)[1:4]]) # +1.
-                    FS2[AA_i + AA_j].add(table[(codon_i + codon_j)[2:5]]) # +2.
-                #for
-    return FS1, FS2
-#makeFSTables
-
-def __makeOverlaps(peptide):
-    """
-    Make a list of overlapping 2-mers of {peptide} in order of appearance.
-
-    @arg peptide: A peptide sequence.
-    @type peptide: str
-    @returns: All 2-mers of {peptide} in order of appearance.
-    @rtype: list(str)
-    """
-    return map(lambda x: peptide[x:x+2], range(len(peptide) - 1))
-#__makeOverlaps
-
-def __options(pList, peptidePrefix, FS, output):
-    """
-    Enumerate all peptides that could result from a frame shift.
-
-    @arg pList: List of overlapping 2-mers of a peptide.
-    @type pList: list(str)
-    @arg peptidePrefix: Prefix of a peptide in the alternative reading frame.
-    @type peptidePrefix: str
-    @arg FS: Frame shift table.
-    @type FS: dict
-    @arg output: List of peptides, should be empty initially.
-    @type output: list(str)
-    """
-    if not pList:
-        output.append(peptidePrefix)
-        return
-    #if
-    for i in FS[pList[0]]:
-        __options(pList[1:], peptidePrefix + i, FS, output)
-#__options
-
-def enumFS(peptide, FS):
-    """
-    Enumerate all peptides that could result from a frame shift.
-
-    @arg peptide: Original peptide sequence.
-    @type peptide: str
-    @arg FS: Frame shift table.
-    @type FS: dict
-    """
-    output = []
-
-    __options(__makeOverlaps(peptide), "", FS, output)
-    return output
-#enumFS
-
-def fitFS(peptide, altPeptide, FS):
-    """
-    Check whether peptide {altPeptide} is a possible frame shift of peptide
-    {peptide}.
-
-    @arg peptide: Original peptide sequence.
-    @type peptide: str
-    @arg altPeptide: Observed peptide sequence.
-    @type altPeptide: str
-    @arg FS: Frame shift table.
-    @type FS: dict
-    """
-    # Todo: This is a temporary fix to prevent crashing on frameshift
-    #     detection (I think bug #124).
-    return False
-
-    if len(peptide) < len(altPeptide):
-        return False
-
-    pList = __makeOverlaps(peptide)
-
-    for i in range(len(altPeptide)):
-        if not altPeptide[i] in FS[pList[i]]:
-            return False
-    return True
-#fitFS
-
-def findFS(peptide, altPeptide, FS):
-    """
-    Find the longest part of {altPeptide} that fits in {peptide} in a certain
-    frame given by {FS}.
-
-    @arg peptide: Original peptide sequence.
-    @type peptide: str
-    @arg altPeptide: Observed peptide sequence.
-    @type altPeptide: str
-    @arg FS: Frame shift table.
-    @type FS: dict
-
-    @returns: The length and the offset in {peptide} of the largest frameshift.
-    @rtype: tuple(int, int)
-    """
-    pList = __makeOverlaps(peptide)
-    maxFS = 0
-    fsStart = 0
-    
-    for i in range(len(pList))[::-1]:
-        for j in range(min(i + 1, len(altPeptide))):
-            if not altPeptide[::-1][j] in FS[pList[i - j]]:
-                break
-        if j >= maxFS:
-            maxFS = j
-            fsStart = i - j + 2
-        #if
-    #for
-
-    return maxFS - 1, fsStart
-#findFS
-
-class RawVar(models.RawVar):
-    """
-    Container for a raw variant.
-
-    To use this class correctly, do not supply more than the minimum amount of
-    data. The {description()} function may not work properly if too much
-    information is given.
-
-    Example: if {end} is initialised for a substitution, a range will be
-      retuned, resulting in a description like: 100_100A>T
-    """
-
-    def __init__(self, DNA=True, start=0, start_offset=0, end=0, end_offset=0,
-        type="none", deleted="", inserted="", shift=0, startAA="", endAA="",
-        term=0):
-        """
-        Initialise the class with the appropriate values.
-
-        @arg start: Start position.
-        @type start: int
-        @arg start_offset:
-        @type start_offset: int
-        @arg end: End position.
-        @type end: int
-        @arg end_offset:
-        @type end_offset: int
-        @arg type: Variant type.
-        @type type: str
-        @arg deleted: Deleted part of the reference sequence.
-        @type deleted: str
-        @arg inserted: Inserted part.
-        @type inserted: str
-        @arg shift: Amount of freedom.
-        @type shift: int
-        """
-        # TODO: Will this container be used for all variants, or only genomic?
-        #       start_offset and end_offset may be never used.
-        self.DNA = DNA
-        self.start = start
-        self.start_offset = start_offset
-        self.end = end
-        self.end_offset = end_offset
-        self.type = type
-        self.deleted = deleted
-        self.inserted = inserted
-        self.shift = shift
-        self.startAA = startAA
-        self.endAA = endAA
-        self.term = term
-        self.update()
-        #self.hgvs = self.description()
-        #self.hgvsLength = self.descriptionLength()
-    #__init__
-
-    def __DNADescription(self):
-        """
-        Give the HGVS description of the raw variant stored in this class.
-
-        Note that this function relies on the absence of values to make the
-        correct description. Also see the comment in the class definition.
-
-        @returns: The HGVS description of the raw variant stored in this class.
-        @rtype: str
-        """
-        if not self.start:
-            return "="
-
-        descr = "%i" % self.start
-
-        if self.end:
-            descr += "_%i" % self.end
-
-        if self.type != "subst":
-            descr += "%s" % self.type
-
-            if self.inserted:
-                return descr + "%s" % self.inserted
-            return descr
-        #if
-
-        return descr + "%s>%s" % (self.deleted, self.inserted)
-    #__DNADescription
-
-    def __proteinDescription(self):
-        """
-        Give the HGVS description of the raw variant stored in this class.
-
-        Note that this function relies on the absence of values to make the
-        correct description. Also see the comment in the class definition.
-
-        @returns: The HGVS description of the raw variant stored in this class.
-        @rtype: str
-        """
-        if self.type == "unknown":
-            return "?"
-        if not self.start:
-            return "="
-
-        descr = ""
-        if not self.deleted:
-            if self.type == "ext":
-                descr += '*'
-            else:
-                descr += "%s" % seq3(self.startAA)
-        #if
-        else:
-            descr += "%s" % seq3(self.deleted)
-        descr += "%i" % self.start
-        if self.end:
-            descr += "_%s%i" % (seq3(self.endAA), self.end)
-        if self.type not in ["subst", "stop", "ext", "fs"]: # fs is not a type
-            descr += self.type
-        if self.inserted:
-            descr += "%s" % seq3(self.inserted)
-
-        if self.type == "stop":
-            return descr + '*'
-        if self.term:
-            return descr + "fs*%i" % self.term
-        return descr
-    #__proteinDescription
-
-    def __DNADescriptionLength(self):
-        """
-        Give the standardised length of the HGVS description of the raw variant
-        stored in this class.
-
-        Note that this function relies on the absence of values to make the
-        correct description. Also see the comment in the class definition.
-
-        @returns: The standardised length of the HGVS description of the raw
-            variant stored in this class.
-        @rtype: int
-        """
-        if not self.start: # `=' or `?'
-            return 1
-
-        descrLen = 1 # Start position.
-
-        if self.end: # '_' and end position.
-            descrLen += 2
-
-        if self.type != "subst":
-            descrLen += len(self.type)
-
-            if self.inserted:
-                return descrLen + len(self.inserted)
-            return descrLen
-        #if
-
-        return 4 # Start position, '>' and end position.
-    #__DNAdescriptionLength
-
-    def __proteinDescriptionLength(self):
-        """
-        Give the standardised length of the HGVS description of the raw variant
-        stored in this class.
-
-        Note that this function relies on the absence of values to make the
-        correct description. Also see the comment in the class definition.
-
-        @returns: The standardised length of the HGVS description of the raw
-            variant stored in this class.
-        @rtype: int
-        """
-        if not self.start: # =
-            return 1
-
-        descrLen = 1      # Start position.
-        if not self.deleted and self.type == "ext":
-            descrLen += 1 # *
-        else:
-            descrLen += 3 # One amino acid.
-        if self.end:
-            descrLen += 5 # `_' + one amino acid + end position.
-        if self.type not in ["subst", "stop", "ext", "fs"]:
-            descrLen += len(self.type)
-        if self.inserted:
-            descrLen += 3 * len(self.inserted)
-        if self.type == "stop":
-            return descrLen + 1 # *
-        if self.term:
-            return descrLen + len(self.type) + 2 # `*' + length until stop.
-        return descrLen
-    #__proteinDescriptionLength
-
-    def update(self):
-        """
-        """
-        self.hgvs = self.description()
-        self.hgvsLength = self.descriptionLength()
-    #update
-
-    def description(self):
-        """
-        """
-        if self.DNA:
-            return self.__DNADescription()
-        return self.__proteinDescription()
-    #description
-
-    def descriptionLength(self):
-        """
-        Give the standardised length of the HGVS description of the raw variant
-        stored in this class.
-
-        @returns: The standardised length of the HGVS description of the raw
-            variant stored in this class.
-        @rtype: int
-        """
-        if self.DNA:
-            return self.__DNADescriptionLength()
-        return self.__proteinDescriptionLength()
-    #descriptionLength
-#RawVar
-
-def alleleDescription(allele):
-    """
-    Convert a list of raw variants to an HGVS allele description.
-
-    @arg allele: A list of raw variants representing an allele description.
-    @type allele: list(RawVar)
-
-    @returns: The HGVS description of {allele}.
-    @rval: str
-    """
-    if len(allele) > 1:
-        return "[%s]" % ';'.join(map(lambda x: x.hgvs, allele))
-    return allele[0].hgvs
-#alleleDescription
-
-def alleleDescriptionLength(allele):
-    """
-    Calculate the standardised length of an HGVS allele description.
-
-    @arg allele: A list of raw variants representing an allele description.
-    @type allele: list(RawVar)
-
-    @returns: The standardised length of the HGVS description of {allele}.
-    @rval: int
-    """
-    # NOTE: Do we need to count the ; and [] ?
-    return sum(map(lambda x: x.hgvsLength, allele))
-#alleleDescriptionLength
-
-def printpos(s, start, end, fill=0):
-    """
-    For debugging purposes.
-    """
-    # TODO: See if this can partially replace or be merged with the
-    #       visualisation in the __mutate() function of mutator.py
-    fs = 10 # Flank size.
-
-    return "%s %s%s %s" % (s[start - fs:start], s[start:end], '-' * fill,
-        s[end:end + fs])
-#printpos
-
-def var2RawVar(s1, s2, var, DNA=True):
-    """
-    """
-    # Unknown.
-    if s1 == '?' or s2 == '?':
-        return [RawVar(DNA=DNA, type="unknown")]
-
-    # Insertion / Duplication.
-    if var.reference_start == var.reference_end:
-        ins_length = var.sample_end - var.sample_start
-        shift5, shift3 = roll(s2, var.sample_start + 1, var.sample_end)
-        shift = shift5 + shift3
-
-        var.reference_start += shift3
-        var.reference_end += shift3
-        var.sample_start += shift3
-        var.sample_end += shift3
-
-        if (var.sample_start - ins_length >= 0 and
-            s1[var.reference_start - ins_length:var.reference_start] ==
-            s2[var.sample_start:var.sample_end]):
-
-            if ins_length == 1:
-                return RawVar(DNA=DNA, start=var.reference_start, type="dup",
-                    shift=shift)
-            return RawVar(DNA=DNA, start=var.reference_start - ins_length + 1,
-                end=var.reference_end, type="dup", shift=shift)
-        #if
-        return RawVar(DNA=DNA, start=var.reference_start,
-            end=var.reference_start + 1,
-            inserted=s2[var.sample_start:var.sample_end], type="ins",
-            shift=shift)
-    #if
-
-    # Deletion.
-    if var.sample_start == var.sample_end:
-        shift5, shift3 = roll(s1, var.reference_start + 1, var.reference_end)
-        shift = shift5 + shift3
-
-        var.reference_start += shift3 + 1
-        var.reference_end += shift3
-
-        if var.reference_start == var.reference_end:
-            return RawVar(DNA=DNA, start=var.reference_start, type="del",
-                shift=shift)
-        return RawVar(DNA=DNA, start=var.reference_start,
-            end=var.reference_end, type="del", shift=shift)
-    #if
-
-    # Substitution.
-    if (var.reference_start + 1 == var.reference_end and
-        var.sample_start + 1 == var.sample_end):
-
-        return RawVar(DNA=DNA, start=var.reference_start + 1,
-            deleted=s1[var.reference_start], inserted=s2[var.sample_start],
-            type="subst")
-    #if
-
-    # Simple InDel.
-    if var.reference_start + 1 == var.reference_end:
-        return RawVar(DNA=DNA, start=var.reference_start + 1,
-            inserted=s2[var.sample_start:var.sample_end], type="delins")
-
-    # Inversion.
-    if var.type == extractor.VARIANT_REVERSE_COMPLEMENT:
-        trim = palinsnoop(s1[var.reference_start:var.reference_end])
-
-        if trim > 0: # Partial palindrome.
-            var.reference_end -= trim
-            var.sample_end -= trim
-        #if
-
-        return RawVar(DNA=DNA, start=var.reference_start + 1,
-            end=var.reference_end, type="inv")
-    #if
-
-    # InDel.
-    return RawVar(DNA=DNA, start=var.reference_start + 1,
-        end=var.reference_end, inserted=s2[var.sample_start:var.sample_end],
-        type="delins")
-#var2RawVar
-
-def description(s1, s2, DNA=True):
-    """
-    Give an allele description of the change from {s1} to {s2}.
-
-    arg s1: Sequence 1.
-    type s1: str
-    arg s2: Sequence 2.
-    type s2: str
-
-    @returns: A list of RawVar objects, representing the allele.
-    @rval: list(RawVar)
-    """
-    description = []
-
-    if not DNA:
-        FS1, FS2 = makeFSTables(1)
-        longestFSf = max(findFS(s1, s2, FS1), findFS(s1, s2, FS2))
-        longestFSr = max(findFS(s2, s1, FS1), findFS(s2, s1, FS2))
-
-        if longestFSf > longestFSr:
-            print s1[:longestFSf[1]], s1[longestFSf[1]:]
-            print s2[:len(s2) - longestFSf[0]], s2[len(s2) - longestFSf[0]:]
-            s1_part = s1[:longestFSf[1]]
-            s2_part = s2[:len(s2) - longestFSf[0]]
-            term = longestFSf[0]
-        #if
-        else:
-            print s1[:len(s1) - longestFSr[0]], s1[len(s1) - longestFSr[0]:]
-            print s2[:longestFSr[1]], s2[longestFSr[1]:]
-            s1_part = s1[:len(s1) - longestFSr[0]]
-            s2_part = s2[:longestFSr[1]]
-            term = len(s2) - longestFSr[1]
-        #else
-
-        s1_part = s1
-        s2_part = s2
-        for variant in extractor.extract(str(s1_part), len(s1_part),
-            str(s2_part), len(s2_part), 1):
-            description.append(var2RawVar(s1, s2, variant, DNA=DNA))
-
-        if description:
-            description[-1].term = term + 2
-            description[-1].update()
-        #if
-    #if
-    else:
-        for variant in extractor.extract(str(s1), len(s1), str(s2), len(s2),
-            0):
-            if variant.type != extractor.VARIANT_IDENTITY:
-                description.append(var2RawVar(s1, s2, variant, DNA=DNA))
-
-    # Nothing happened.
-    if not description:
-        return [RawVar(DNA=DNA)]
-
-    return description
-#description
-
-if __name__ == "__main__":
-    a = "ATAGATGATAGATAGATAGAT"
-    b = "ATAGATGATTGATAGATAGAT"
-    print alleleDescription(description(a, b, DNA=True))
-
-    a = "MAVLWRLSAVCGALGGRALLLRTPVVRPAH"
-    b = "MAVLWRLSAGCGALGGRALLLRTPVVRAH"
-    print alleleDescription(description(a, b, DNA=False))
-
-    a = "MDYSLAAALTLHGHWGLGQVVTDYVHGDALQKAAKAGLLALSALTFAGLCYFNYHDVGICKAVAMLWKL"
-    b = "MDYSLAAALTFMVTGALDKLLLTMFMGMPCRKLPRQGFWHFQL"
-    #print alleleDescription(description(a, b, DNA=False))
-    #print alleleDescription(description(b, a, DNA=False))
-    print "1"
-    extractor.extract(a, len(a), b, len(b), 1)
-    print "2"
-    extractor.extract(b, len(b), a, len(a), 1)
-    print "3"
-
-
-    a = "VVSVLLLGLLPAAYLNPCSAMYYSLAAALTLHGHWGLGQV"
-    b = "VVSVLLLGLLPAAYLNPCSAMDYSLAAALTLHGHWGLGQV"
-    print alleleDescription(description(a, b, DNA=False))
-    print alleleDescription(description(b, a, DNA=False))
-
-    a = "ACGCTCGATCGCTTATAGCATGGGGGGGGGATCTAGCTCTCTCTATAAGATA"
-    b = "ACGCTCGATCGCTTATACCCCCCCCATGCGATCTAGCTCTCTCTATAAGATA"
-    print alleleDescription(description(a, b, DNA=True))
-
-#if
diff --git a/mutalyzer/entrypoints/__init__.py b/mutalyzer/entrypoints/__init__.py
index 36b5ad16a25f2e75f11765e052dd8099697ddb13..7d95d01efa1575d1a4896bbc15e3a4f2e972b4cf 100644
--- a/mutalyzer/entrypoints/__init__.py
+++ b/mutalyzer/entrypoints/__init__.py
@@ -3,6 +3,12 @@ Entry points to Mutalyzer.
 """
 
 
+from __future__ import unicode_literals
+
+import locale
+import sys
+
+
 class _ReverseProxied(object):
     """
     Wrap the application in this middleware and configure the front-end server
@@ -36,3 +42,15 @@ class _ReverseProxied(object):
         if scheme:
             environ['wsgi.url_scheme'] = scheme
         return self.app(environ, *args, **kwargs)
+
+
+def _cli_string(argument):
+    """
+    Decode a command line argument byte string to unicode using our best
+    guess for the encoding (noop on unicode strings).
+    """
+    encoding = sys.stdin.encoding or locale.getpreferredencoding()
+
+    if isinstance(argument, unicode):
+        return argument
+    return unicode(argument, encoding=encoding)
diff --git a/mutalyzer/entrypoints/admin.py b/mutalyzer/entrypoints/admin.py
index 42929e6bb31c974149f11b12a7ba39680eb9c5ae..e7c74178ea0560a133ce39e4d04c8ee1e647d58d 100644
--- a/mutalyzer/entrypoints/admin.py
+++ b/mutalyzer/entrypoints/admin.py
@@ -3,16 +3,20 @@ Command line interface to Mutalyzer administrative tools.
 """
 
 
+from __future__ import unicode_literals
+
 import argparse
+import codecs
 import json
+import locale
 import os
 
 import alembic.command
 import alembic.config
 from alembic.migration import MigrationContext
-from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm.exc import NoResultFound
 
+from . import _cli_string
 from .. import announce
 from .. import db
 from ..db import session
@@ -26,10 +30,12 @@ class UserError(Exception):
     pass
 
 
-def add_assembly(assembly_file):
+def add_assembly(assembly_file, encoding):
     """
     Add genome assembly definition from a JSON file.
     """
+    assembly_file = codecs.getreader(encoding)(assembly_file)
+
     try:
         definition = json.load(assembly_file)
     except ValueError:
@@ -84,10 +90,13 @@ def list_assemblies():
                                assembly.taxonomy_id)
 
 
-def import_mapview(assembly_name_or_alias, mapview_file, group_label):
+def import_mapview(assembly_name_or_alias, mapview_file, encoding,
+                   group_label):
     """
     Import transcript mappings from an NCBI mapview file.
     """
+    mapview_file = codecs.getreader(encoding)(mapview_file)
+
     try:
         assembly = Assembly.by_name_or_alias(assembly_name_or_alias)
     except NoResultFound:
@@ -96,7 +105,7 @@ def import_mapview(assembly_name_or_alias, mapview_file, group_label):
     try:
         mapping.import_from_mapview_file(assembly, mapview_file, group_label)
     except mapping.MapviewSortError as e:
-        raise UserError(str(e))
+        raise UserError(unicode(e))
 
 
 def import_gene(assembly_name_or_alias, gene):
@@ -182,10 +191,13 @@ def main():
     """
     Command-line interface to Mutalyzer administrative tools.
     """
+    default_encoding = locale.getpreferredencoding()
+
     assembly_parser = argparse.ArgumentParser(add_help=False)
     assembly_parser.add_argument(
-        '-a', '--assembly', metavar='ASSEMBLY', dest='assembly_name_or_alias',
-        default='hg19', help='assembly to import to (default: hg19)')
+        '-a', '--assembly', metavar='ASSEMBLY', type=_cli_string,
+        dest='assembly_name_or_alias', default='hg19',
+        help='assembly to import to (default: hg19)')
 
     parser = argparse.ArgumentParser(
         description='Mutalyzer administrative tools.')
@@ -210,9 +222,13 @@ def main():
         description=add_assembly.__doc__.split('\n\n')[0])
     p.set_defaults(func=add_assembly)
     p.add_argument(
-        'assembly_file', metavar='FILE', type=argparse.FileType('r'),
+        'assembly_file', metavar='FILE', type=argparse.FileType('rb'),
         help='genome assembly definition JSON file (example: '
         'extras/assemblies/GRCh37.json)')
+    p.add_argument(
+        '--encoding', metavar='ENCODING', type=_cli_string,
+        default=default_encoding,
+        help='input file encoding (default: %s)' % default_encoding)
 
     # Subparser 'assemblies import-mapview'.
     p = s.add_parser(
@@ -224,10 +240,14 @@ def main():
         '`sort -t $\'\\t\' -k 11,11 -k 2,2` command.')
     p.set_defaults(func=import_mapview)
     p.add_argument(
-        'mapview_file', metavar='FILE', type=argparse.FileType('r'),
+        'mapview_file', metavar='FILE', type=argparse.FileType('rb'),
         help='file from NCBI mapview (example: seq_gene.md), see note below')
     p.add_argument(
-        'group_label', metavar='GROUP_LABEL',
+        '--encoding', metavar='ENCODING', type=_cli_string,
+        default=default_encoding,
+        help='input file encoding (default: %s)' % default_encoding)
+    p.add_argument(
+        'group_label', metavar='GROUP_LABEL', type=_cli_string,
         help='use only entries with this group label (example: '
         'GRCh37.p2-Primary Assembly)')
 
@@ -241,7 +261,7 @@ def main():
         ' (i.e., NCBI mapview).')
     p.set_defaults(func=import_gene)
     p.add_argument(
-        'gene', metavar='GENE_SYMBOL',
+        'gene', metavar='GENE_SYMBOL', type=_cli_string,
         help='gene to import all transcript mappings for from the UCSC '
         'database (example: TTN)')
 
@@ -255,7 +275,7 @@ def main():
         'usual source (i.e., NCBI mapview).')
     p.set_defaults(func=import_reference)
     p.add_argument(
-        'reference', metavar='ACCESSION',
+        'reference', metavar='ACCESSION', type=_cli_string,
         help='genomic reference to import all genes from (example: '
         'NC_012920.1)')
 
@@ -272,10 +292,10 @@ def main():
         description=set_announcement.__doc__.split('\n\n')[0])
     p.set_defaults(func=set_announcement)
     p.add_argument(
-        'body', metavar='ANNOUNCEMENT',
+        'body', metavar='ANNOUNCEMENT', type=_cli_string,
         help='announcement text to show to the user')
     p.add_argument(
-        '--url', metavar='URL', dest='url',
+        '--url', metavar='URL', dest='url', type=_cli_string,
         help='URL to more information on the announcement')
 
     # Subparser 'announcement unset'.
@@ -290,10 +310,10 @@ def main():
         description=sync_cache.__doc__.split('\n\n')[0],
         epilog='Intended use is to run daily from cron.')
     p.add_argument(
-        'wsdl_url', metavar='WSDL_URL',
+        'wsdl_url', metavar='WSDL_URL', type=_cli_string,
         help='location of the remote WSDL description')
     p.add_argument(
-        'url_template', metavar='URL_TEMPLATE',
+        'url_template', metavar='URL_TEMPLATE', type=_cli_string,
         help='URL for remote downloads, in which the filename is to be '
         'substituted for {file}')
     p.add_argument(
@@ -313,7 +333,7 @@ def main():
         '--destructive', dest='destructive', action='store_true',
         help='delete any existing tables and data')
     p.add_argument(
-        '-c', '--alembic-config', metavar='ALEMBIC_CONFIG',
+        '-c', '--alembic-config', metavar='ALEMBIC_CONFIG', type=_cli_string,
         dest='alembic_config_path', help='path to Alembic configuration file')
     p.set_defaults(func=setup_database)
 
@@ -323,7 +343,7 @@ def main():
         args.func(**{k: v for k, v in vars(args).items()
                      if k not in ('func', 'subcommand')})
     except UserError as e:
-        parser.error(str(e))
+        parser.error(unicode(e))
 
 
 if __name__ == '__main__':
diff --git a/mutalyzer/entrypoints/batch_processor.py b/mutalyzer/entrypoints/batch_processor.py
index 286c411609642515e8ff6e3308e759b4234b0b92..ae3c2945748db1a3b286690e8df52dedbe603c13 100644
--- a/mutalyzer/entrypoints/batch_processor.py
+++ b/mutalyzer/entrypoints/batch_processor.py
@@ -6,12 +6,13 @@ Mutalyzer batch processor.
 """
 
 
+from __future__ import unicode_literals
+
 import argparse
 import signal
 import sys
 import time
 
-from .. import config
 from .. import db
 from .. import Scheduler
 
diff --git a/mutalyzer/entrypoints/mutalyzer.py b/mutalyzer/entrypoints/mutalyzer.py
index d123482fbe92d6ffa0f0277dcfd2847d877ecbab..6717161d1d4795c923f70cfa6846358ace2972c8 100644
--- a/mutalyzer/entrypoints/mutalyzer.py
+++ b/mutalyzer/entrypoints/mutalyzer.py
@@ -5,8 +5,12 @@ Mutalyzer command-line name checker.
 """
 
 
+from __future__ import unicode_literals
+
 import argparse
+import sys
 
+from . import _cli_string
 from .. import describe
 from .. import output
 from .. import variantchecker
@@ -114,7 +118,7 @@ def main():
     parser = argparse.ArgumentParser(
         description='Mutalyzer command-line name checker.')
     parser.add_argument(
-        'description', metavar='DESCRIPTION',
+        'description', metavar='DESCRIPTION', type=_cli_string,
         help='variant description to run the name checker on')
 
     args = parser.parse_args()
diff --git a/mutalyzer/entrypoints/service_json.py b/mutalyzer/entrypoints/service_json.py
index 25ff8bbfc1d7d3a01a49a375a6caba90b846aed3..5e5d93d01a2e7d48d3acbf4e0014ac2f4ddde60f 100644
--- a/mutalyzer/entrypoints/service_json.py
+++ b/mutalyzer/entrypoints/service_json.py
@@ -18,6 +18,8 @@ You can also use the built-in HTTP server by running this file directly.
 """
 
 
+from __future__ import unicode_literals
+
 import argparse
 import logging
 import sys
@@ -25,7 +27,7 @@ import sys
 from wsgiref.simple_server import make_server
 from spyne.server.wsgi import WsgiApplication
 
-from . import _ReverseProxied
+from . import _cli_string, _ReverseProxied
 from ..config import settings
 from ..services import json
 
@@ -57,9 +59,9 @@ def main():
     parser = argparse.ArgumentParser(
         description='Mutalyzer HTTP/RPC+JSON webservice.')
     parser.add_argument(
-        '-H', '--host', metavar='HOSTNAME', dest='host', default='127.0.0.1',
-        help='hostname to listen on (default: 127.0.0.1; specify 0.0.0.0 to '
-        'listen on all hostnames)')
+        '-H', '--host', metavar='HOSTNAME', type=_cli_string, dest='host',
+        default='127.0.0.1', help='hostname to listen on (default: '
+        '127.0.0.1; specify 0.0.0.0 to listen on all hostnames)')
     parser.add_argument(
         '-p', '--port', metavar='PORT', dest='port', type=int,
         default=8082, help='port to listen on (default: 8082)')
diff --git a/mutalyzer/entrypoints/service_soap.py b/mutalyzer/entrypoints/service_soap.py
index 6b630ad6aa1bc885995e099bca91553260854b0d..8179faa358e7e109442cef799b1ffc2f8e4c0128 100644
--- a/mutalyzer/entrypoints/service_soap.py
+++ b/mutalyzer/entrypoints/service_soap.py
@@ -18,6 +18,8 @@ You can also use the built-in HTTP server by running this file directly.
 """
 
 
+from __future__ import unicode_literals
+
 import argparse
 import logging
 import sys
@@ -25,7 +27,7 @@ import sys
 from wsgiref.simple_server import make_server
 from spyne.server.wsgi import WsgiApplication
 
-from . import _ReverseProxied
+from . import _cli_string, _ReverseProxied
 from ..config import settings
 from ..services import soap
 
@@ -58,9 +60,9 @@ def main():
     parser = argparse.ArgumentParser(
         description='Mutalyzer SOAP webservice.')
     parser.add_argument(
-        '-H', '--host', metavar='HOSTNAME', dest='host', default='127.0.0.1',
-        help='hostname to listen on (default: 127.0.0.1; specify 0.0.0.0 to '
-        'listen on all hostnames)')
+        '-H', '--host', metavar='HOSTNAME', type=_cli_string, dest='host',
+        default='127.0.0.1', help='hostname to listen on (default: '
+        '127.0.0.1; specify 0.0.0.0 to listen on all hostnames)')
     parser.add_argument(
         '-p', '--port', metavar='PORT', dest='port', type=int,
         default=8081, help='port to listen on (default: 8081)')
diff --git a/mutalyzer/entrypoints/website.py b/mutalyzer/entrypoints/website.py
index a62e3bb332322312191d4f8eff800d711608037b..f387b70ff4cb5f4a315ca4de9a87c9e0d0033b5a 100644
--- a/mutalyzer/entrypoints/website.py
+++ b/mutalyzer/entrypoints/website.py
@@ -39,9 +39,12 @@ also serve the static files.
 """
 
 
+from __future__ import unicode_literals
+
 import argparse
+import sys
 
-from . import _ReverseProxied
+from . import _cli_string, _ReverseProxied
 from ..config import settings
 from .. import website
 
@@ -66,9 +69,9 @@ def main():
     parser = argparse.ArgumentParser(
         description='Mutalyzer website.')
     parser.add_argument(
-        '-H', '--host', metavar='HOSTNAME', dest='host', default='127.0.0.1',
-        help='hostname to listen on (default: 127.0.0.1; specify 0.0.0.0 to '
-        'listen on all hostnames)')
+        '-H', '--host', metavar='HOSTNAME', type=_cli_string, dest='host',
+        default='127.0.0.1', help='hostname to listen on (default: '
+        '127.0.0.1; specify 0.0.0.0 to listen on all hostnames)')
     parser.add_argument(
         '-p', '--port', metavar='PORT', dest='port', type=int,
         default=8089, help='port to listen on (default: 8080)')
diff --git a/mutalyzer/grammar.py b/mutalyzer/grammar.py
index 0e65ec574822f0182ee4d1eb0abe6accb548161d..8f231bf57cee26ed032a21dbfeb4cb3f7d83f1ce 100644
--- a/mutalyzer/grammar.py
+++ b/mutalyzer/grammar.py
@@ -19,6 +19,8 @@ The grammar is described in [3].
 """
 
 
+from __future__ import unicode_literals
+
 from pyparsing import *
 
 
@@ -48,7 +50,7 @@ class Grammar():
     ##########################################################################
 
     # BNF: Name -> ([a-z] | [a-Z] | [0-9])+
-    Name = Word(alphanums, min=1)
+    Name = Word(unicode(alphanums), min=1)
 
     # BNF: Nt -> `a' | `c' | `g' | `u' | `A' | `C' | `G' | `T' | `U'
     #Nt = Word('acgtuACGTU', exact=1)
@@ -66,7 +68,7 @@ class Grammar():
     NtString = Combine(OneOrMore(Nt))
 
     # BNF: Number -> [0-9]+
-    Number = Word(nums)
+    Number = Word(unicode(nums))
 
     ##########################################################################
     # Reference sequences
@@ -79,7 +81,7 @@ class Grammar():
     ProtIso = Suppress('_i') + Number('ProtIso')
 
     # BNF: GeneName -> ([a-Z] | [0-9] | `-')+
-    GeneName = Word(alphanums + '-', min=1)
+    GeneName = Word(unicode(alphanums) + '-', min=1)
 
     # BNF: GeneSymbol -> `(' Name (TransVar | ProtIso)? `)'
     GeneSymbol = Suppress('(') + Group(GeneName('GeneSymbol') + \
@@ -94,11 +96,11 @@ class Grammar():
 
     # BNF: AccNo -> ([a-Z] Number `_')+ Version?
     AccNo = NotAny('LRG_') + \
-            Combine(Word(alphas + '_') + Number)('RefSeqAcc') + \
+            Combine(Word(unicode(alphas) + '_') + Number)('RefSeqAcc') + \
             Optional(Version)
 
     # BNF: UD -> `UD_' [a-Z]+ (`_' Number)+
-    UD = Combine('UD_' + Word(alphas) + OneOrMore('_' + Number))('RefSeqAcc')
+    UD = Combine('UD_' + Word(unicode(alphas)) + OneOrMore('_' + Number))('RefSeqAcc')
 
     # BNF: LRGTranscriptID -> `t' [0-9]+
     LRGTranscriptID = Suppress('t') + Number('LRGTranscriptID')
@@ -467,7 +469,7 @@ class Grammar():
         the input where the error occurred (and return None).
 
         @arg variant: The input string that needs to be parsed.
-        @type variant: string
+        @type variant: unicode
 
         @return: The parse tree containing the parse results, or None in
                  case of a parsing error.
@@ -480,12 +482,12 @@ class Grammar():
             return self.Var.parseString(variant, parseAll=True)
             # Todo: check .dump()
         except ParseException as err:
-            print err.line
-            print " "*(err.column-1) + "^"
-            print err
+            #print err.line
+            #print " "*(err.column-1) + "^"
+            #print err
             # Log parse error and the position where it occurred.
-            self._output.addMessage(__file__, 4, 'EPARSE', str(err))
-            pos = int(str(err).split(':')[-1][:-1]) - 1
+            self._output.addMessage(__file__, 4, 'EPARSE', unicode(err))
+            pos = int(unicode(err).split(':')[-1][:-1]) - 1
             self._output.addOutput('parseError', variant)
             self._output.addOutput('parseError', pos * ' ' + '^')
             return None
diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py
index 693294d31b5a2a06319c24566d3e98259657882d..11e058997182252b01d75ea1b0586cb555347d18 100644
--- a/mutalyzer/mapping.py
+++ b/mutalyzer/mapping.py
@@ -10,11 +10,12 @@ update the database with this information.
 """
 
 
+from __future__ import unicode_literals
+
 from collections import defaultdict
 from itertools import groupby
 from operator import attrgetter, itemgetter
 
-from Bio.Seq import reverse_complement
 import MySQLdb
 
 from mutalyzer.db import session
@@ -24,6 +25,7 @@ from mutalyzer.models import SoapMessage, Mapping, Transcript
 from mutalyzer.output import Output
 from mutalyzer import Crossmap
 from mutalyzer import Retriever
+from mutalyzer import util
 
 
 class MapviewSortError(Exception):
@@ -40,28 +42,29 @@ def _construct_change(var, reverse=False):
     @type reverse: bool
 
     @return: Description of mutation (without reference and positions).
-    @rtype: string
+    @rtype: unicode
     """
+    # Note that the pyparsing parse tree yields `str('')` for nonexisting
+    # attributes, so we wrap the optional attributes in `unicode()`.
     if reverse:
-        # todo: if var.Arg1 is unicode, this crashes
         try:
-            arg1 = str(int(var.Arg1))
+            arg1 = unicode(int(var.Arg1))
         except ValueError:
-            arg1 = reverse_complement(str(var.Arg1) or '')
+            arg1 = util.reverse_complement(unicode(var.Arg1))
         try:
-            arg2 = str(int(var.Arg2))
+            arg2 = unicode(int(var.Arg2))
         except ValueError:
-            arg2 = reverse_complement(str(var.Arg2) or '')
+            arg2 = util.reverse_complement(unicode(var.Arg2))
     else:
-        arg1 = var.Arg1
-        arg2 = var.Arg2
+        arg1 = unicode(var.Arg1)
+        arg2 = unicode(var.Arg2)
 
     def parse_sequence(seq):
         if not seq.Sequence:
             raise NotImplementedError('Only explicit sequences are supported '
                                       'for insertions.')
         if reverse:
-            return reverse_complement(str(seq.Sequence))
+            return util.reverse_complement(seq.Sequence)
         return seq.Sequence
 
     if var.MutationType == 'subst':
@@ -72,7 +75,7 @@ def _construct_change(var, reverse=False):
                 seqs = reversed(var.SeqList)
             else:
                 seqs = var.SeqList
-            insertion = '[' + ';'.join(str(parse_sequence(seq))
+            insertion = '[' + ';'.join(parse_sequence(seq)
                                        for seq in seqs) + ']'
         else:
             insertion = parse_sequence(var.Seq)
@@ -161,11 +164,11 @@ class Converter(object) :
         Get data from database.
 
         @arg acc: NM_ accession number (without version)
-        @type acc: string
+        @type acc: unicode
         @arg version: version number
         @type version: integer
         @kwarg selector: Optional gene symbol selector.
-        @type selector: str
+        @type selector: unicode
         @kwarg selector_version: Optional transcript version selector.
         @type selector_version: int
         """
@@ -269,7 +272,7 @@ class Converter(object) :
         @arg Loc: A location in either I{g.} or I{c.} notation
         @type Loc: object
         @arg Type: The reference type
-        @type Type: string
+        @type Type: unicode
         @returns: triple:
             0. Main coordinate in I{c.} notation
             1. Offset coordinate in I{c.} notation
@@ -359,7 +362,7 @@ class Converter(object) :
         available.
 
         @arg accNo: transcript (NM_) accession number (with or without version)
-        @type accNo: string
+        @type accNo: unicode
 
         @return: transcription start, transcription end and CDS stop
         @rtype: triple
@@ -381,7 +384,7 @@ class Converter(object) :
         One of the entry points (called by the HTML publisher).
 
         @arg accNo: The full NM accession number (including version)
-        @type accNo: string
+        @type accNo: unicode
 
         @return: T ; ClassSerializer object with the types trans_start,
         trans_stop and CDS_stop
@@ -404,9 +407,9 @@ class Converter(object) :
         One of the entry points (called by the HTML publisher).
 
         @arg accNo: transcript (NM_) accession number (with version?)
-        @type accNo: string
+        @type accNo: unicode
         @arg mutation: the 'mutation' (e.g. c.123C>T)
-        @type mutation: string
+        @type mutation: unicode
 
         @return: ClassSerializer object
         @rtype: object
@@ -493,10 +496,10 @@ class Converter(object) :
         Converts a complete HGVS I{c.} notation into a chromosomal notation.
 
         @arg variant: The variant in HGVS I{c.} notation
-        @type variant: string
+        @type variant: unicode
 
         @return: var_in_g ; The variant in HGVS I{g.} notation
-        @rtype: string
+        @rtype: unicode
         """
         if self._parseInput(variant):
             acc = self.parseTree.RefSeqAcc
@@ -528,7 +531,7 @@ class Converter(object) :
                 r_change = _construct_change(variant, reverse=True)
             except NotImplementedError as e:
                 self.__output.addMessage(__file__, 3, 'ENOTIMPLEMENTED',
-                                         str(e))
+                                         unicode(e))
                 return None
 
             if self.mapping.orientation == 'forward':
@@ -568,14 +571,14 @@ class Converter(object) :
         @arg positions: Positions in c. notation to convert.
         @type positions: list
         @arg reference: Transcript reference.
-        @type reference: string
+        @type reference: unicode
         @kwarg version: Transcript reference version. If omitted, '0' is
             assumed.
-        @type version: string
+        @type version: unicode
 
         @return: Chromosome name, orientation (+ or -), and converted
             positions.
-        @rtype: tuple(string, string, list)
+        @rtype: tuple(unicode, unicode, list)
 
         This only works for positions on transcript references in c. notation.
         """
@@ -617,10 +620,10 @@ class Converter(object) :
     def correctChrVariant(self, variant) :
         """
         @arg variant:
-        @type variant: string
+        @type variant: unicode
 
         @return: variant ;
-        @rtype: string
+        @rtype: unicode
         """
 
         #Pre split check
@@ -651,12 +654,12 @@ class Converter(object) :
     def chrom2c(self, variant, rt, gene=None):
         """
         @arg variant: a variant description
-        @type variant: string
+        @type variant: unicode
         @arg rt: the return type
-        @type rt: string
+        @type rt: unicode
         @kwarg gene: Optional gene name. If given, return variant descriptions
             on all transcripts for this gene.
-        @type gene: string
+        @type gene: unicode
 
         @return: HGVS_notatations ;
         @rtype: dictionary or list
@@ -751,7 +754,7 @@ class Converter(object) :
                     r_change = _construct_change(variant, reverse=True)
                 except NotImplementedError as e:
                     self.__output.addMessage(__file__, 4,
-                                             "ENOTIMPLEMENTEDERROR", str(e))
+                                             "ENOTIMPLEMENTEDERROR", unicode(e))
                     return None
 
                 startp = self.crossmap.tuple2string((cmap.startmain, cmap.startoffset))
@@ -786,6 +789,8 @@ class Converter(object) :
 #Converter
 
 
+# Todo: This seems broken at the moment.
+# Todo: Correct handling of string encodings.
 def import_from_ucsc_by_gene(assembly, gene):
     """
     Import transcript mappings for a gene from the UCSC.
diff --git a/mutalyzer/models.py b/mutalyzer/models.py
index f7356dda60a68c840c455d28c65eaec14efd0a44..bc9bf5a0780a382af267b3973b17b017c6a8ff77 100644
--- a/mutalyzer/models.py
+++ b/mutalyzer/models.py
@@ -1,26 +1,16 @@
 """
 Collection of serilizable objects used by the SOAP web service. They extend
-from the spyne ClassModel.
+from the Spyne model classes.
 
-Default attributes for the spyne ClassModel:
-- nillable = True
-- min_occurs = 0
-- max_occurs = 1
-
-Additional attributes values for the spyne String model:
-- min_len = 0
-- max_len = 'unbounded'
-- pattern = None
-
-@todo: Use Mandatory.* models in the ClassModel extensions?
-@todo: See if it improves client code if we use Array(_, nillable=False).
 @todo: Move all these models to the mutalyzer.services package and refactor
   all uses of them in other places. The SOAP_NAMESPACE variable can then also
   be moved there.
 """
 
 
-from spyne.model.primitive import String, Integer, Boolean, DateTime
+from __future__ import unicode_literals
+
+from spyne.model.primitive import Integer, Boolean, DateTime, Unicode
 from spyne.model.binary import ByteArray
 from spyne.model.complex import ComplexModel, Array
 
@@ -30,9 +20,9 @@ from mutalyzer import SOAP_NAMESPACE
 class Mandatory(object):
     """
     This is spyne.model.primitive.Mandatory, but without min_length=1 for
-    the String model.
+    the Unicode model.
     """
-    String = String(type_name='mandatory_string', min_occurs=1, nillable=False)
+    Unicode = Unicode(type_name='mandatory_unicode', min_occurs=1, nillable=False)
     Integer = Integer(type_name='mandatory_integer', min_occurs=1, nillable=False)
     Boolean = Boolean(type_name='mandatory_boolean', min_occurs=1, nillable=False)
     DateTime = DateTime(type_name='mandatory_date_time', min_occurs=1, nillable=False)
@@ -46,8 +36,8 @@ class SoapMessage(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    errorcode = Mandatory.String
-    message = Mandatory.String
+    errorcode = Mandatory.Unicode
+    message = Mandatory.Unicode
 #SoapMessage
 
 
@@ -63,7 +53,7 @@ class Mapping(ComplexModel):
     endoffset = Integer
     start_g = Integer
     end_g = Integer
-    mutationType = String
+    mutationType = Unicode
     errorcode = Integer
     messages = Array(SoapMessage)
 #Mapping
@@ -87,8 +77,8 @@ class RawVariant(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    description = Mandatory.String
-    visualisation = Mandatory.String
+    description = Mandatory.Unicode
+    visualisation = Mandatory.Unicode
 #RawVariant
 
 
@@ -103,14 +93,14 @@ class RawVar(ComplexModel):
     start_offset = Mandatory.Integer
     end = Mandatory.Integer
     end_offset = Mandatory.Integer
-    type = Mandatory.String
-    deleted = Mandatory.String
-    inserted = Mandatory.String
+    type = Mandatory.Unicode
+    deleted = Mandatory.Unicode
+    inserted = Mandatory.Unicode
     shift = Mandatory.Integer
-    startAA = Mandatory.String
-    endAA = Mandatory.String
+    startAA = Mandatory.Unicode
+    endAA = Mandatory.Unicode
     term = Mandatory.Integer
-    hgvs = Mandatory.String
+    hgvs = Mandatory.Unicode
     hgvsLength = Mandatory.Integer
 #RawVar
 
@@ -121,7 +111,7 @@ class Allele(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    description = Mandatory.String
+    description = Mandatory.Unicode
     allele = Array(RawVar)
 #Allele
 
@@ -132,10 +122,10 @@ class ExonInfo(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    cStart = Mandatory.String
+    cStart = Mandatory.Unicode
     gStart = Mandatory.Integer
     chromStart = Integer
-    cStop = Mandatory.String
+    cStop = Mandatory.Unicode
     gStop = Mandatory.Integer
     chromStop = Integer
 #ExonInfo
@@ -147,34 +137,34 @@ class MutalyzerOutput(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    referenceId = Mandatory.String
-    sourceId = Mandatory.String
-    sourceAccession = String
-    sourceVersion = String
-    sourceGi = String
-    molecule = Mandatory.String
+    referenceId = Mandatory.Unicode
+    sourceId = Mandatory.Unicode
+    sourceAccession = Unicode
+    sourceVersion = Unicode
+    sourceGi = Unicode
+    molecule = Mandatory.Unicode
 
-    original = String
-    mutated = String
+    original = Unicode
+    mutated = Unicode
 
-    origMRNA = String
-    mutatedMRNA= String
+    origMRNA = Unicode
+    mutatedMRNA= Unicode
 
-    origCDS = String
-    newCDS= String
+    origCDS = Unicode
+    newCDS= Unicode
 
-    origProtein = String
-    newProtein = String
-    altProtein = String
+    origProtein = Unicode
+    newProtein = Unicode
+    altProtein = Unicode
 
     errors = Integer
     warnings = Integer
-    summary = String
+    summary = Unicode
 
-    chromDescription = String
-    genomicDescription = String
-    transcriptDescriptions = Array(String)
-    proteinDescriptions = Array(String)
+    chromDescription = Unicode
+    genomicDescription = Unicode
+    transcriptDescriptions = Array(Unicode)
+    proteinDescriptions = Array(Unicode)
 
     exons = Array(ExonInfo)
 
@@ -190,8 +180,8 @@ class TranscriptNameInfo(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    transcriptName = Mandatory.String
-    productName = Mandatory.String
+    transcriptName = Mandatory.Unicode
+    productName = Mandatory.Unicode
 #TranscriptNameInfo
 
 
@@ -201,9 +191,9 @@ class ProteinTranscript(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    name = Mandatory.String
-    id = Mandatory.String
-    product = Mandatory.String
+    name = Mandatory.Unicode
+    id = Mandatory.Unicode
+    product = Mandatory.Unicode
 #ProteinTranscript
 
 
@@ -219,27 +209,27 @@ class TranscriptInfo(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    name = Mandatory.String
-    id = Mandatory.String
-    product = Mandatory.String
+    name = Mandatory.Unicode
+    id = Mandatory.Unicode
+    product = Mandatory.Unicode
 
-    cTransStart = Mandatory.String
+    cTransStart = Mandatory.Unicode
     gTransStart = Mandatory.Integer
     chromTransStart = Integer
-    cTransEnd = Mandatory.String
+    cTransEnd = Mandatory.Unicode
     gTransEnd = Mandatory.Integer
     chromTransEnd = Integer
     sortableTransEnd = Mandatory.Integer
 
-    cCDSStart = Mandatory.String
+    cCDSStart = Mandatory.Unicode
     gCDSStart = Mandatory.Integer
     chromCDSStart = Integer
-    cCDSStop = Mandatory.String
+    cCDSStop = Mandatory.Unicode
     gCDSStop = Mandatory.Integer
     chromCDSStop = Integer
 
-    locusTag = Mandatory.String
-    linkMethod = Mandatory.String
+    locusTag = Mandatory.Unicode
+    linkMethod = Mandatory.Unicode
 
     exons = Array(ExonInfo)
 
@@ -253,10 +243,10 @@ class TranscriptMappingInfo(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    name = Mandatory.String
+    name = Mandatory.Unicode
     version = Mandatory.Integer
-    gene = Mandatory.String
-    orientation = Mandatory.String
+    gene = Mandatory.Unicode
+    orientation = Mandatory.Unicode
 
     start = Mandatory.Integer
     stop = Mandatory.Integer
@@ -283,15 +273,15 @@ class InfoOutput(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    version = String
-    versionParts = Array(String)
-    releaseDate = String
-    nomenclatureVersion = String
-    nomenclatureVersionParts = Array(String)
-    serverName = String
-    contactEmail = String
-    announcement = String
-    announcementUrl = String
+    version = Unicode
+    versionParts = Array(Unicode)
+    releaseDate = Unicode
+    nomenclatureVersion = Unicode
+    nomenclatureVersionParts = Array(Unicode)
+    serverName = Unicode
+    contactEmail = Unicode
+    announcement = Unicode
+    announcementUrl = Unicode
 #InfoOutput
 
 
@@ -301,14 +291,14 @@ class CacheEntry(ComplexModel):
     """
     __namespace__ = SOAP_NAMESPACE
 
-    name = Mandatory.String
-    gi = String
-    hash = Mandatory.String
-    chromosomeName = String
+    name = Mandatory.Unicode
+    gi = Unicode
+    hash = Mandatory.Unicode
+    chromosomeName = Unicode
     chromosomeStart = Integer
     chromosomeStop = Integer
     chromosomeOrientation = Integer
-    url = String
+    url = Unicode
     created = Mandatory.DateTime
-    cached = String
+    cached = Unicode
 #CacheEntry
diff --git a/mutalyzer/mutator.py b/mutalyzer/mutator.py
index 8047d932d4bab1ca4fa66b2020e2d69428d97853..4a4b0a2d157460e9ebc5cebbde89f0111090492f 100644
--- a/mutalyzer/mutator.py
+++ b/mutalyzer/mutator.py
@@ -12,12 +12,11 @@ The original as well as the mutated string are stored here.
 """
 
 
+from __future__ import unicode_literals
+
 from collections import defaultdict
 
 from Bio import Restriction
-from Bio.Seq import Seq
-from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA
-from Bio.Seq import reverse_complement
 
 from mutalyzer import util
 
@@ -46,7 +45,7 @@ class Mutator():
         Initialise the instance with the original sequence.
 
         @arg orig: The original sequence before mutation.
-        @type orig: str
+        @type orig: Bio.Seq.Seq
         @arg output: The output object.
         @type output: mutalyzer.Output.Output
         """
@@ -57,6 +56,8 @@ class Mutator():
         self._output = output
         self.orig = orig
 
+        # Note that we don't need to create a copy here, since mutation
+        # operations are not in place (`self._mutate`).
         self.mutated = orig
     #__init__
 
@@ -72,7 +73,7 @@ class Mutator():
         @rtype: dict
         """
         analysis = Restriction.Analysis(self._restriction_batch, sequence)
-        return dict((str(k), len(v)) for k, v in analysis.with_sites().items())
+        return dict((unicode(k), len(v)) for k, v in analysis.with_sites().items())
     #_restriction_count
 
     def _counts_diff(self, counts1, counts2):
@@ -109,10 +110,10 @@ class Mutator():
         @arg pos2: Second interbase position of the deleted sequence.
         @type pos2: int
         @arg ins: Inserted sequence.
-        @type ins: str
+        @type ins: unicode
 
         @return: Visualisation.
-        @rtype: str
+        @rtype: unicode
         """
         loflank = self.orig[max(pos1 - VIS_FLANK_LENGTH, 0):pos1]
         roflank = self.orig[pos2:pos2 + VIS_FLANK_LENGTH]
@@ -338,7 +339,7 @@ class Mutator():
         @arg pos2: Second interbase position of the deleted sequence.
         @type pos2: int
         @arg ins: Inserted sequence.
-        @type ins: str
+        @type ins: unicode
         """
         correct = 1 if pos1 == pos2 else 0
         self.mutated = (self.mutated[:self.shift(pos1 + 1) - 1] +
@@ -375,7 +376,7 @@ class Mutator():
         @arg pos: Interbase position where the insertion should take place.
         @type pos: int
         @arg ins: Inserted sequence.
-        @type ins: str
+        @type ins: unicode
         """
         visualisation = ['insertion between %i and %i' % (pos, pos + 1)]
         visualisation.extend(self._visualise(pos, pos, ins))
@@ -394,7 +395,7 @@ class Mutator():
         @arg pos2: Last nucleotide of the deleted sequence.
         @type pos2: int
         @arg ins: Inserted sequence.
-        @type ins: str
+        @type ins: unicode
         """
         visualisation = ['delins from %i to %i' % (pos1, pos2)]
         visualisation.extend(self._visualise(pos1 - 1, pos2, ins))
@@ -410,7 +411,7 @@ class Mutator():
         @arg pos: Position of the substitution.
         @type pos: int
         @arg nuc: Substituted nucleotide.
-        @type nuc: str
+        @type nuc: unicode
         """
         visualisation = ['substitution at %i' % pos]
         visualisation.extend(self._visualise(pos - 1, pos, nuc))
@@ -428,14 +429,13 @@ class Mutator():
         @arg pos2: Last nucleotide of the inverted sequence.
         @type pos2: int
         """
+        sequence = util.reverse_complement(unicode(self.orig[pos1 - 1:pos2]))
+
         visualisation = ['inversion between %i and %i' % (pos1, pos2)]
-        visualisation.extend(
-            self._visualise(pos1 - 1, pos2,
-                            reverse_complement(self.orig[pos1 - 1:pos2])))
+        visualisation.extend(self._visualise(pos1 - 1, pos2, sequence))
         self._output.addOutput('visualisation', visualisation)
 
-        self._mutate(pos1 - 1, pos2,
-                     reverse_complement(self.orig[pos1 - 1:pos2]))
+        self._mutate(pos1 - 1, pos2, sequence)
     #inversion
 
     def duplication(self, pos1, pos2):
@@ -447,11 +447,12 @@ class Mutator():
         @arg pos2: Last nucleotide of the duplicated sequence.
         @type pos2: int
         """
+        sequence = unicode(self.orig[pos1 - 1:pos2])
+
         visualisation = ['duplication from %i to %i' % (pos1, pos2)]
-        visualisation.extend(
-            self._visualise(pos2, pos2, self.orig[pos1 - 1:pos2]))
+        visualisation.extend(self._visualise(pos2, pos2, sequence))
         self._output.addOutput('visualisation', visualisation)
 
-        self._mutate(pos1 - 1, pos1 - 1, self.orig[pos1 - 1:pos2])
+        self._mutate(pos1 - 1, pos1 - 1, sequence)
     #duplication
 #Mutator
diff --git a/mutalyzer/output.py b/mutalyzer/output.py
index 3ca1c8a71d8a998463262074fd0fae17f8a5c84c..fbec8418274798c367ba9153a438096284f974ae 100644
--- a/mutalyzer/output.py
+++ b/mutalyzer/output.py
@@ -23,6 +23,9 @@ Public classes:
 """
 
 
+from __future__ import unicode_literals
+
+import io
 import time
 
 from mutalyzer import util
@@ -71,12 +74,13 @@ class Output() :
             - _warnings   ; Initialised to 0.
 
         @arg instance: The filename of the module that created this object
-        @type instance: string
+        @type instance: unicode
         """
         self._outputData = {}
         self._messages = []
         self._instance = util.nice_filename(instance)
-        self._loghandle = open(settings.LOG_FILE, "a+")
+        self._loghandle = io.open(settings.LOG_FILE, mode='a+',
+                                  encoding='utf-8')
         self._errors = 0
         self._warnings = 0
     #__init__
@@ -147,7 +151,7 @@ class Output() :
             - _messages   ; The messages list.
 
         @arg errorcode: The error code to filter on
-        @type errorcode: string
+        @type errorcode: unicode
 
         @return: A filtered list
         @rtype: list
@@ -194,7 +198,7 @@ class Output() :
             - _outputData ; The output dictionary.
 
         @arg name: Name of a node in the output dictionary
-        @type name: string
+        @type name: unicode
         @arg data: The data to be stored at this node
         @type data: object
         """
@@ -258,7 +262,7 @@ class Output() :
                 - Number of errors
                 - Number of warnings
                 - Summary
-        @rtype: integer, integer, string
+        @rtype: integer, integer, unicode
         """
         e_s = 's'
         w_s = 's'
@@ -297,13 +301,13 @@ class Message() :
             - description ; A description of the message.
 
         @arg origin: Name of the module creating this object
-        @type origin: string
+        @type origin: unicode
         @arg level: Importance of the message
         @type level: integer
         @arg code: The error code of the message
-        @type code: string
+        @type code: unicode
         @arg description: A description of the message
-        @type description: string
+        @type description: unicode
         """
         self.origin = origin
         self.level = level
@@ -316,17 +320,17 @@ class Message() :
                (self.origin, self.level, self.code, self.description)
     #__repr__
 
-    def __str__(self):
+    def __unicode__(self):
         return '%s (%s): %s' % \
                (self.named_level(), self.origin, self.description)
-    #__str__
+    #__unicode__
 
     def named_level(self):
         """
         Get message log level as readable string.
 
         @return:     A readable description of the log level.
-        @rtype:      string
+        @rtype:      unicode
         """
         if self.level == 0:
             return "Debug"
diff --git a/mutalyzer/parsers/__init__.py b/mutalyzer/parsers/__init__.py
index 3e1bd90dd08aa288d05a8c342e2bbae9218a730c..6b3f43347bc55d1518e6aaeb0279b5fa3bac9871 100644
--- a/mutalyzer/parsers/__init__.py
+++ b/mutalyzer/parsers/__init__.py
@@ -1,3 +1,6 @@
 """
 Parsers for GenRecord objects.
 """
+
+
+from __future__ import unicode_literals
diff --git a/mutalyzer/parsers/genbank.py b/mutalyzer/parsers/genbank.py
index 867fa78f7b2d838d9076fc460eb5fd02282aee58..247545989e105702211e0c796a88b256edd40f3d 100644
--- a/mutalyzer/parsers/genbank.py
+++ b/mutalyzer/parsers/genbank.py
@@ -4,6 +4,9 @@ mutalyzer GenRecord. Record populated with data from a GenBank file.
 """
 
 
+from __future__ import unicode_literals
+
+import codecs
 import re
 import bz2
 from itertools import izip_longest
@@ -41,7 +44,7 @@ class tempGene():
             - cdsList ; CDS list (including internal splice sites).
 
         @arg name: Gene name
-        @type name: string
+        @type name: unicode
         """
 
         self.name = name
@@ -75,8 +78,8 @@ class GBparser():
 
         ret = []
 
-        if not str(location.start).isdigit() or \
-           not str(location.end).isdigit() :
+        if not unicode(location.start).isdigit() or \
+           not unicode(location.end).isdigit() :
             return None
         #if
 
@@ -99,8 +102,8 @@ class GBparser():
 
         ret = []
 
-        if not str(locationList.location.start).isdigit() or \
-           not str(locationList.location.end).isdigit() :
+        if not unicode(locationList.location.start).isdigit() or \
+           not unicode(locationList.location.end).isdigit() :
             return None
         #if
 
@@ -128,10 +131,10 @@ class GBparser():
 
         @arg transcriptAcc: Accession number of the transcript for which we
                             want to find the protein
-        @type transcriptAcc: string
+        @type transcriptAcc: unicode
 
         @return: Accession number of a protein or None if nothing can be found
-        @rtype: string
+        @rtype: unicode
         """
         link = queries.get_transcript_protein_link(transcriptAcc)
         if link is not None:
@@ -146,7 +149,7 @@ class GBparser():
         finally:
             handle.close()
 
-        transcriptGI = result["IdList"][0]
+        transcriptGI = unicode(result["IdList"][0])
 
         handle = Entrez.elink(dbfrom = "nucleotide", db = "protein",
                               id = transcriptGI)
@@ -162,11 +165,11 @@ class GBparser():
             queries.update_transcript_protein_link(transcriptAcc)
             return None
 
-        proteinGI = result[0]["LinkSetDb"][0]["Link"][0]["Id"]
+        proteinGI = unicode(result[0]["LinkSetDb"][0]["Link"][0]["Id"])
 
         handle = Entrez.efetch(db='protein', id=proteinGI, rettype='acc', retmode='text')
 
-        proteinAcc = handle.read().split('.')[0]
+        proteinAcc = unicode(handle.read()).split('.')[0]
         handle.close()
 
         queries.update_transcript_protein_link(transcriptAcc, proteinAcc)
@@ -179,7 +182,7 @@ class GBparser():
         sentence from another. The index of the last word is counted backwards.
 
         @arg sentences: A list of sentences.
-        @type sentences: list of strings
+        @type sentences: list of unicode strings
 
         @return: The indices of the words where sentences start to differ,
             both are -1 when no mismatches are found.
@@ -217,7 +220,7 @@ class GBparser():
             [-1:1] yields the empty list.
         """
         # Create lists of words
-        lists = map(str.split, sentences)
+        lists = [s.split() for s in sentences]
 
         try:
             forward, reverse = [next(i for i, v in
@@ -239,7 +242,7 @@ class GBparser():
         @arg locus: The locus object on which the transfer should be performed
         @type locus: locus object
         @arg key: The name of the variable that should be transferred
-        @type key: string
+        @type key: unicode
         """
 
         if locus.qualifiers.has_key(key) :
@@ -315,7 +318,7 @@ class GBparser():
         @arg locusList: A list of loci
         @type locusList: list
         @arg tagName: Name of the tag to be checked
-        @type tagName: string
+        @type tagName: unicode
         """
 
         tags = []
@@ -476,13 +479,14 @@ class GBparser():
         Create a GenRecord.Record from a GenBank file
 
         @arg filename: The full path to the compressed GenBank file
-        @type filename: string
+        @type filename: unicode
 
         @return: A GenRecord.Record instance
         @rtype: object (record)
         """
         # first create an intermediate genbank record with BioPython
         file_handle = bz2.BZ2File(filename, "r")
+        file_handle = codecs.getreader('utf-8')(file_handle)
         biorecord = SeqIO.read(file_handle, "genbank")
         file_handle.close()
 
diff --git a/mutalyzer/parsers/lrg.py b/mutalyzer/parsers/lrg.py
index d3624360291b5035fc3f5e6a323de4a59a08bfdc..b22b7ce69f3e7bb1b58e0e72783432fe4d11c4a3 100644
--- a/mutalyzer/parsers/lrg.py
+++ b/mutalyzer/parsers/lrg.py
@@ -21,6 +21,8 @@ added in python2.5. Its main strengths are speed and readability [pythonesque].
 """
 
 
+from __future__ import unicode_literals
+
 import xml.dom.minidom
 from Bio.Seq import Seq
 from Bio.Alphabet import IUPAC
@@ -54,14 +56,14 @@ def _get_content(data, refname):
     @arg data:     a minidom object
     @type data:    object
     @arg refname:  the name of a member of the minidom object
-    @type refname: string
+    @type refname: unicode
 
-    @return: The UTF-8 content of the textnode or an emtpy string
+    @return: The content of the textnode or an emtpy string
     @rtype: string
     """
     temp = data.getElementsByTagName(refname)
     if temp:
-        return temp[0].lastChild.data.encode("utf8")
+        return temp[0].lastChild.data
     else:
         return ""
 #_get_content
@@ -75,14 +77,14 @@ def _attr2dict(attr):
     @type attr: object
 
     @return: A dictionary with pairing of node-attribute names and values.
-    Integer string values are converted to integers. String values are converted
-    to UTF-8
+    Integer string values are converted to integers.
     @rtype: dictionary
     """
     ret = {}
     for key, value in attr.items():
-        value = value.isdigit() and int(value) or value.encode("utf-8")
-        ret[key.encode("utf-8")] = value
+        if value.isdigit():
+            value = int(value)
+        ret[key] = value
     return ret
 #_attr2dict
 
@@ -110,7 +112,7 @@ def create_record(data):
     Create a GenRecord.Record of a LRG <xml> formatted string.
 
     @arg data: Content of LRG file
-    @type data: string
+    @type data: byte string
 
     @return: GenRecord.Record instance
     @rtype: object
@@ -166,7 +168,7 @@ def create_record(data):
     for tData in fixed.getElementsByTagName("transcript"):
         # iterate over the transcripts in the fixed section.
         # get the transcript from the updatable section and combine results
-        transcriptName = tData.getAttribute("name").encode("utf8")[1:]
+        transcriptName = tData.getAttribute("name")[1:]
         transcription = [t for t in gene.transcriptList if t.name ==
                 transcriptName][0]  #TODO?: swap with gene.findLocus
 
diff --git a/mutalyzer/redisclient.py b/mutalyzer/redisclient.py
index ec9e6050548a85d04dced7489fbd8de195a5c6fc..58acd7cacdda8818dcf902150f361ce8d7342313 100644
--- a/mutalyzer/redisclient.py
+++ b/mutalyzer/redisclient.py
@@ -18,6 +18,8 @@ simple and just use one global connection pool as created by `StrictRedis`.
 """
 
 
+from __future__ import unicode_literals
+
 import redis
 
 from mutalyzer.config import settings
@@ -37,7 +39,9 @@ class LazyClient(util.LazyObject):
             import mockredis
             self._wrapped = mockredis.MockRedis(strict=True)
         else:
-            self._wrapped = redis.StrictRedis.from_url(settings.REDIS_URI)
+            self._wrapped = redis.StrictRedis.from_url(settings.REDIS_URI,
+                                                       decode_responses=True,
+                                                       charset='utf-8')
 
 
 #: Global :class:`LazyClient` instance. Use this for all communication with
diff --git a/mutalyzer/services/__init__.py b/mutalyzer/services/__init__.py
index 05b3d031865b91b2a3ebd2ead081592a52a119e2..81887d7c05baaf74a3ef836f34fdb9dbe9c25336 100644
--- a/mutalyzer/services/__init__.py
+++ b/mutalyzer/services/__init__.py
@@ -1,3 +1,6 @@
 """
 Services (RPC) for Mutalyzer.
 """
+
+
+from __future__ import unicode_literals
diff --git a/mutalyzer/services/json.py b/mutalyzer/services/json.py
index c35b79293c1a790209185a9efa37772155acf07e..89c6a26e11cca3e0f2c64f2c96621cbaeffb236b 100644
--- a/mutalyzer/services/json.py
+++ b/mutalyzer/services/json.py
@@ -3,6 +3,8 @@ Mutalyzer web service HTTP/RPC with JSON response payloads.
 """
 
 
+from __future__ import unicode_literals
+
 from spyne.application import Application
 from spyne.protocol.http import HttpRpc
 from spyne.protocol.json import JsonDocument
diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py
index d681b94c27c3e2398db5884bad99ad3532c2f346..c65053587d4e3b7639df78bf6152100cbf39114a 100644
--- a/mutalyzer/services/rpc.py
+++ b/mutalyzer/services/rpc.py
@@ -9,23 +9,24 @@ Mutalyzer RPC services.
 """
 
 
+from __future__ import unicode_literals
+
 from spyne.decorator import srpc
 from spyne.service import ServiceBase
-from spyne.model.primitive import String, Integer, Boolean, DateTime
+from spyne.model.primitive import Integer, Boolean, DateTime, Unicode
 from spyne.model.complex import Array
 from spyne.model.fault import Fault
+import io
 import os
 import socket
-from cStringIO import StringIO
-import tempfile
-from operator import itemgetter, attrgetter
+from operator import attrgetter
 from sqlalchemy.orm.exc import NoResultFound
 
 import mutalyzer
 from mutalyzer.config import settings
 from mutalyzer.db import session
-from mutalyzer.db.models import (Assembly, Chromosome, BatchJob,
-                                 BatchQueueItem, TranscriptMapping)
+from mutalyzer.db.models import (Assembly, BatchJob, BatchQueueItem,
+                                 TranscriptMapping)
 from mutalyzer.output import Output
 from mutalyzer.grammar import Grammar
 from mutalyzer.sync import CacheSync
@@ -51,7 +52,7 @@ class MutalyzerService(ServiceBase):
         super(MutalyzerService, self).__init__(environ)
     #__init__
 
-    @srpc(Mandatory.ByteArray, String, String,  _returns=String)
+    @srpc(Mandatory.ByteArray, Unicode, Unicode, _returns=Unicode)
     def submitBatchJob(data, process='NameChecker', argument=''):
         """
         Submit a batch job.
@@ -90,6 +91,12 @@ class MutalyzerService(ServiceBase):
                         'The process argument must be one of %s.'
                         % ', '.join(batch_types))
 
+        # The Python type for `data` should be a sequence of `str` objects,
+        # but it seems we sometimes just get one `str` object. Perhaps only in
+        # the unit tests, but let's fix that anyway.
+        if isinstance(data, str):
+            data = [data]
+
         # Note that the max file size check below might be bogus, since Spyne
         # first checks the total request size, which by default has a maximum
         # of 2 megabytes.
@@ -103,7 +110,9 @@ class MutalyzerService(ServiceBase):
                         'Only files up to %d megabytes are accepted.'
                         % (settings.MAX_FILE_SIZE // 1048576))
 
-        batch_file = StringIO(''.join(data))
+        batch_file = io.BytesIO()
+        for d in data:
+            batch_file.write(d)
 
         job, columns = file_instance.parseBatchFile(batch_file)
         batch_file.close()
@@ -115,7 +124,7 @@ class MutalyzerService(ServiceBase):
                                      batch_types[process], argument)
         return result_id
 
-    @srpc(Mandatory.String, _returns=Integer)
+    @srpc(Mandatory.Unicode, _returns=Integer)
     def monitorBatchJob(job_id):
         """
         Get the number of entries left for a batch job.
@@ -129,7 +138,7 @@ class MutalyzerService(ServiceBase):
         """
         return BatchQueueItem.query.join(BatchJob).filter_by(result_id=job_id).count()
 
-    @srpc(Mandatory.String, _returns=ByteArray)
+    @srpc(Mandatory.Unicode, _returns=ByteArray)
     def getBatchJob(job_id):
         """
         Get the result of a batch job.
@@ -144,7 +153,7 @@ class MutalyzerService(ServiceBase):
 
         @arg job_id: Batch job identifier.
 
-        @return: Batch job result file.
+        @return: Batch job result file (UTF-8, base64 encoded).
         """
         left = BatchQueueItem.query.join(BatchJob).filter_by(result_id=job_id).count()
 
@@ -152,11 +161,11 @@ class MutalyzerService(ServiceBase):
             raise Fault('EBATCHNOTREADY', 'Batch job result is not yet ready.')
 
         filename = 'batch-job-%s.txt' % job_id
-        handle = open(os.path.join(settings.CACHE_DIR, filename))
+        handle = open(os.path.join(settings.CACHE_DIR, filename), 'rb')
         return handle
 
-    @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer, Boolean,
-        _returns=Array(Mandatory.String))
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Integer, Boolean,
+        _returns=Array(Mandatory.Unicode))
     def getTranscripts(build, chrom, pos, versions=False) :
         """
         Get all the transcripts that overlap with a chromosomal position.
@@ -215,7 +224,7 @@ class MutalyzerService(ServiceBase):
             return [m.accession for m in mappings]
     #getTranscripts
 
-    @srpc(Mandatory.String, Mandatory.String, _returns=Array(Mandatory.String))
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=Array(Mandatory.Unicode))
     def getTranscriptsByGeneName(build, name):
         """
         Todo: documentation.
@@ -243,8 +252,8 @@ class MutalyzerService(ServiceBase):
         return ['%s.%s' % (m.accession, m.version) for m in mappings]
     #getTranscriptsByGene
 
-    @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer,
-        Mandatory.Integer, Mandatory.Integer, _returns=Array(Mandatory.String))
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Integer,
+        Mandatory.Integer, Mandatory.Integer, _returns=Array(Mandatory.Unicode))
     def getTranscriptsRange(build, chrom, pos1, pos2, method) :
         """
         Get all the transcripts that overlap with a range on a chromosome.
@@ -302,7 +311,7 @@ class MutalyzerService(ServiceBase):
         return [m.accession for m in mappings]
     #getTranscriptsRange
 
-    @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer,
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Integer,
         Mandatory.Integer, Mandatory.Integer,
         _returns=Array(TranscriptMappingInfo))
     def getTranscriptsMapping(build, chrom, pos1, pos2, method):
@@ -387,7 +396,7 @@ class MutalyzerService(ServiceBase):
         return transcripts
     #getTranscriptsMapping
 
-    @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String)
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=Mandatory.Unicode)
     def getGeneName(build, accno) :
         """
         Find the gene name associated with a transcript.
@@ -424,8 +433,8 @@ class MutalyzerService(ServiceBase):
         return mapping.gene
     #getGeneName
 
-    @srpc(Mandatory.String, Mandatory.String, Mandatory.String,
-        Mandatory.String, _returns=Mapping)
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Unicode,
+        Mandatory.Unicode, _returns=Mapping)
     def mappingInfo(LOVD_ver, build, accNo, variant) :
         """
         Search for an NM number in the MySQL database, if the version
@@ -492,7 +501,7 @@ class MutalyzerService(ServiceBase):
         return result
     #mappingInfo
 
-    @srpc(Mandatory.String, Mandatory.String, Mandatory.String,
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Unicode,
         _returns=Transcript)
     def transcriptInfo(LOVD_ver, build, accNo) :
         """
@@ -536,7 +545,7 @@ class MutalyzerService(ServiceBase):
         return T
     #transcriptInfo
 
-    @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String)
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=Mandatory.Unicode)
     def chromAccession(build, name) :
         """
         Get the accession number of a chromosome, given a name.
@@ -574,7 +583,7 @@ class MutalyzerService(ServiceBase):
         return chromosome.accession
     #chromAccession
 
-    @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String)
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=Mandatory.Unicode)
     def chromosomeName(build, accNo) :
         """
         Get the name of a chromosome, given a chromosome accession number.
@@ -612,7 +621,7 @@ class MutalyzerService(ServiceBase):
         return chromosome.name
     #chromosomeName
 
-    @srpc(Mandatory.String, Mandatory.String, _returns=Mandatory.String)
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=Mandatory.Unicode)
     def getchromName(build, acc) :
         """
         Get the chromosome name, given a transcript identifier (NM number).
@@ -649,8 +658,8 @@ class MutalyzerService(ServiceBase):
         return mapping.chromosome.name
     #chromosomeName
 
-    @srpc(Mandatory.String, Mandatory.String, String,
-          _returns=Array(Mandatory.String))
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, Unicode,
+          _returns=Array(Mandatory.Unicode))
     def numberConversion(build, variant, gene=None):
         """
         Converts I{c.} to I{g.} notation or vice versa
@@ -696,7 +705,7 @@ class MutalyzerService(ServiceBase):
         return result
     #numberConversion
 
-    @srpc(Mandatory.String, _returns=CheckSyntaxOutput)
+    @srpc(Mandatory.Unicode, _returns=CheckSyntaxOutput)
     def checkSyntax(variant):
         """
         Checks the syntax of a variant.
@@ -739,7 +748,7 @@ class MutalyzerService(ServiceBase):
         return result
     #checkSyntax
 
-    @srpc(Mandatory.String, _returns=MutalyzerOutput)
+    @srpc(Mandatory.Unicode, _returns=MutalyzerOutput)
     def runMutalyzer(variant) :
         """
         Run the Mutalyzer name checker.
@@ -804,23 +813,18 @@ class MutalyzerService(ServiceBase):
         result.sourceGi = O.getIndexedOutput('source_gi', 0)
         result.molecule = O.getIndexedOutput('molecule', 0)
 
-        # We force the results to strings here, because some results
-        # may be of type Bio.Seq.Seq which spyne doesn't like.
-        #
-        # todo: We might have to also do this elsewhere.
+        result.original = O.getIndexedOutput("original", 0)
+        result.mutated = O.getIndexedOutput("mutated", 0)
 
-        result.original = str(O.getIndexedOutput("original", 0))
-        result.mutated = str(O.getIndexedOutput("mutated", 0))
+        result.origMRNA = O.getIndexedOutput("origMRNA", 0)
+        result.mutatedMRNA = O.getIndexedOutput("mutatedMRNA", 0)
 
-        result.origMRNA = str(O.getIndexedOutput("origMRNA", 0))
-        result.mutatedMRNA = str(O.getIndexedOutput("mutatedMRNA", 0))
+        result.origCDS = O.getIndexedOutput("origCDS", 0)
+        result.newCDS = O.getIndexedOutput("newCDS", 0)
 
-        result.origCDS = str(O.getIndexedOutput("origCDS", 0))
-        result.newCDS = str(O.getIndexedOutput("newCDS", 0))
-
-        result.origProtein = str(O.getIndexedOutput("oldprotein", 0))
-        result.newProtein = str(O.getIndexedOutput("newprotein", 0))
-        result.altProtein = str(O.getIndexedOutput("altProtein", 0))
+        result.origProtein = O.getIndexedOutput("oldprotein", 0)
+        result.newProtein = O.getIndexedOutput("newprotein", 0)
+        result.altProtein = O.getIndexedOutput("altProtein", 0)
 
         result.chromDescription = \
             O.getIndexedOutput("genomicChromDescription", 0)
@@ -860,7 +864,7 @@ class MutalyzerService(ServiceBase):
         return result
     #runMutalyzer
 
-    @srpc(Mandatory.String, Mandatory.String, _returns=TranscriptNameInfo)
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=TranscriptNameInfo)
     def getGeneAndTranscript(genomicReference, transcriptReference) :
         """
         Todo: documentation.
@@ -892,7 +896,7 @@ class MutalyzerService(ServiceBase):
         return ret
     #getGeneAndTranscript
 
-    @srpc(Mandatory.String, String, _returns=Array(TranscriptInfo))
+    @srpc(Mandatory.Unicode, Unicode, _returns=Array(TranscriptInfo))
     def getTranscriptsAndInfo(genomicReference, geneName=None):
         """
         Given a genomic reference, return all its transcripts with their
@@ -995,7 +999,7 @@ class MutalyzerService(ServiceBase):
                     transcript.CM.info()
                 cds_start = 1
 
-                t.cTransEnd = str(t.exons[-1].cStop)
+                t.cTransEnd = unicode(t.exons[-1].cStop)
                 t.gTransEnd = t.exons[-1].gStop
                 t.chromTransEnd = GenRecordInstance.record.toChromPos(
                     t.gTransEnd)
@@ -1009,15 +1013,15 @@ class MutalyzerService(ServiceBase):
                 t.name = '%s_v%s' % (gene.name, transcript.name)
                 t.id = transcript.transcriptID
                 t.product = transcript.transcriptProduct
-                t.cTransStart = str(trans_start)
+                t.cTransStart = unicode(trans_start)
                 t.gTransStart = transcript.CM.x2g(trans_start, 0)
                 t.chromTransStart = GenRecordInstance.record.toChromPos(
                     t.gTransStart)
-                t.cCDSStart = str(cds_start)
+                t.cCDSStart = unicode(cds_start)
                 t.gCDSStart = transcript.CM.x2g(cds_start, 0)
                 t.chromCDSStart = GenRecordInstance.record.toChromPos(
                     t.gCDSStart)
-                t.cCDSStop = str(cds_stop)
+                t.cCDSStop = unicode(cds_stop)
                 t.gCDSStop = transcript.CM.x2g(cds_stop, 0)
                 t.chromCDSStop = GenRecordInstance.record.toChromPos(t.gCDSStop)
                 t.locusTag = transcript.locusTag
@@ -1040,12 +1044,12 @@ class MutalyzerService(ServiceBase):
         return transcripts
     #getTranscriptsAndInfo
 
-    @srpc(Mandatory.ByteArray, _returns=Mandatory.String)
+    @srpc(Mandatory.ByteArray, _returns=Mandatory.Unicode)
     def uploadGenBankLocalFile(data):
         """
         Upload a genbank file.
 
-        @arg data: Genbank file (base64 encoded).
+        @arg data: Genbank file (UTF-8, base64 encoded).
         @return: UD accession number for the uploaded genbank file.
         """
         output = Output(__file__)
@@ -1054,6 +1058,12 @@ class MutalyzerService(ServiceBase):
         output.addMessage(__file__, -1, 'INFO',
                           'Received request uploadGenBankLocalFile()')
 
+        # The Python type for `data` should be a sequence of `str` objects,
+        # but it seems we sometimes just get one `str` object. Perhaps only in
+        # the unit tests, but let's fix that anyway.
+        if isinstance(data, str):
+            data = [data]
+
         # Note that the max file size check below might be bogus, since Spyne
         # first checks the total request size, which by default has a maximum
         # of 2 megabytes.
@@ -1067,7 +1077,7 @@ class MutalyzerService(ServiceBase):
                         'Only files up to %d megabytes are accepted.'
                         % (settings.MAX_FILE_SIZE // 1048576))
 
-        ud = retriever.uploadrecord(''.join(data))
+        ud = retriever.uploadrecord(b''.join(data))
 
         output.addMessage(__file__, -1, 'INFO',
                           'Finished processing uploadGenBankLocalFile()')
@@ -1075,13 +1085,13 @@ class MutalyzerService(ServiceBase):
         # Todo: use SOAP Fault object here (see Trac issue #41).
         if not ud:
             error = 'The request could not be completed\n' \
-                    + '\n'.join(map(lambda m: str(m), output.getMessages()))
+                    + '\n'.join(map(lambda m: unicode(m), output.getMessages()))
             raise Exception(error)
 
         return ud
     #upLoadGenBankLocalFile
 
-    @srpc(Mandatory.String, _returns=Mandatory.String)
+    @srpc(Mandatory.Unicode, _returns=Mandatory.Unicode)
     def uploadGenBankRemoteFile(url) :
         """
         Not implemented yet.
@@ -1089,8 +1099,8 @@ class MutalyzerService(ServiceBase):
         raise Fault('ENOTIMPLEMENTED', 'Not implemented yet')
     #upLoadGenBankRemoteFile
 
-    @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer,
-        Mandatory.Integer, _returns=Mandatory.String)
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Integer,
+        Mandatory.Integer, _returns=Mandatory.Unicode)
     def sliceChromosomeByGene(geneSymbol, organism, upStream,
         downStream) :
         """
@@ -1112,14 +1122,14 @@ class MutalyzerService(ServiceBase):
         # Todo: use SOAP Fault object here (see Trac issue #41).
         if not UD:
             error = 'The request could not be completed\n' \
-                    + '\n'.join(map(lambda m: str(m), O.getMessages()))
+                    + '\n'.join(map(lambda m: unicode(m), O.getMessages()))
             raise Exception(error)
 
         return UD
     #sliceChromosomeByGene
 
-    @srpc(Mandatory.String, Mandatory.Integer, Mandatory.Integer,
-        Mandatory.Integer, _returns=Mandatory.String)
+    @srpc(Mandatory.Unicode, Mandatory.Integer, Mandatory.Integer,
+        Mandatory.Integer, _returns=Mandatory.Unicode)
     def sliceChromosome(chromAccNo, start, end, orientation) :
         """
         Todo: documentation, error handling, argument checking, tests.
@@ -1190,7 +1200,7 @@ class MutalyzerService(ServiceBase):
         return result
     #info
 
-    @srpc(_returns=Mandatory.String)
+    @srpc(_returns=Mandatory.Unicode)
     def ping():
         """
         Simple function to test the interface.
@@ -1201,7 +1211,7 @@ class MutalyzerService(ServiceBase):
         return 'pong'
     #ping
 
-    @srpc(Mandatory.String, Mandatory.String, _returns=Allele)
+    @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=Allele)
     def descriptionExtract(reference, observed):
         """
         Extract the HGVS variant description from a reference sequence and an
@@ -1253,7 +1263,7 @@ class MutalyzerService(ServiceBase):
         return map(cache_entry_to_soap, cache)
     #getCache
 
-    @srpc(Mandatory.String, _returns=Array(Mandatory.String))
+    @srpc(Mandatory.Unicode, _returns=Array(Mandatory.Unicode))
     def getdbSNPDescriptions(rs_id):
         """
         Lookup HGVS descriptions for a dbSNP rs identifier.
@@ -1281,7 +1291,7 @@ class MutalyzerService(ServiceBase):
         messages = output.getMessages()
         if messages:
             error = 'The request could not be completed\n' + \
-                '\n'.join(map(lambda m: str(m), output.getMessages()))
+                '\n'.join(map(lambda m: unicode(m), output.getMessages()))
             raise Exception(error)
 
         return descriptions
diff --git a/mutalyzer/services/soap.py b/mutalyzer/services/soap.py
index a7d7b001868705b65807f27edba653c1050e6fda..d8f28407bd29afbedc37be52020122a96c2c8490 100644
--- a/mutalyzer/services/soap.py
+++ b/mutalyzer/services/soap.py
@@ -3,6 +3,8 @@ Mutalyzer SOAP/1.1 web service.
 """
 
 
+from __future__ import unicode_literals
+
 from spyne.application import Application
 from spyne.protocol.soap import Soap11
 
diff --git a/mutalyzer/stats.py b/mutalyzer/stats.py
index bb1dec573161b469af85f52f8862d5883b45f4a7..e7228cdfb4e8dbb34a1a59ebcc07654f42679a8c 100644
--- a/mutalyzer/stats.py
+++ b/mutalyzer/stats.py
@@ -17,6 +17,8 @@ module much more.
 """
 
 
+from __future__ import unicode_literals
+
 import time
 
 from mutalyzer.redisclient import client
@@ -36,7 +38,8 @@ def increment_counter(counter):
     pipe.incr('counter:%s:total' % counter)
 
     for label, bucket, expire in INTERVALS:
-        key = 'counter:%s:%s:%s' % (counter, label, time.strftime(bucket))
+        key = 'counter:%s:%s:%s' % (counter, label,
+                                    unicode(time.strftime(bucket)))
         pipe.incr(key)
 
         # It's safe to just keep on expiring the counter, even if it already
diff --git a/mutalyzer/sync.py b/mutalyzer/sync.py
index e5465e1e35e6f5a1cbc1556b8f5f817520947a2a..a1a1b7f90a3e687ef17aa833f3613b7895054d9c 100644
--- a/mutalyzer/sync.py
+++ b/mutalyzer/sync.py
@@ -3,6 +3,8 @@ Synchronizing the reference file cache with other Mutalyzer instances.
 """
 
 
+from __future__ import unicode_literals
+
 from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
 
 from datetime import datetime, timedelta
@@ -86,7 +88,7 @@ class CacheSync(object):
         or later.
 
         :arg remote_wsdl: The url of the remote SOAP WSDL description.
-        :type remote_wsdl: str
+        :type remote_wsdl: unicode
         :arg created_since: Only entries with this creation date or later
           are returned.
         :type created_since: datatime.datetime
@@ -111,11 +113,11 @@ class CacheSync(object):
                                 1: 'forward',
                                 2: 'reverse'}
 
-            entry_dict =  {'name':    str(entry.name),
-                           'hash':    str(entry.hash),
+            entry_dict =  {'name':    entry.name,
+                           'hash':    entry.hash,
                            'created': entry.created}
             for attribute in ('gi', 'chromosomeName', 'url', 'cached'):
-                entry_dict[attribute] = str(entry[attribute]) \
+                entry_dict[attribute] = entry[attribute] \
                                         if attribute in entry else None
             for attribute in ('chromosomeStart', 'chromosomeStop'):
                 entry_dict[attribute] = int(entry[attribute]) \
@@ -131,9 +133,9 @@ class CacheSync(object):
         Download a remote file located at `url` and store it as `name`.
 
         :arg name: Name to store the file under.
-        :type name: str
+        :type name: unicode
         :arg url: Url to the remote file.
-        :type url: str
+        :type url: unicode
         """
         if not re.match('^[\da-zA-Z\._-]+$', name):
             return
@@ -160,10 +162,10 @@ class CacheSync(object):
             (14, 3)
 
         :arg remote_wsdl: The url of the remote SOAP WSDL description.
-        :type remote_wsdl: str
+        :type remote_wsdl: unicode
         :arg url_template: Formatting string containing a ``{file}``
           occurence, see example usage above.
-        :string url_template: str
+        :string url_template: unicode
         :arg days: Only remote entries added this number of days ago or
           later are considered.
         :type days: int
diff --git a/mutalyzer/util.py b/mutalyzer/util.py
index 936f0812b6abb077cb17dcb252a146cb3a5285f5..6b7987b31c8f9a7bed62507572f0c417589d6c4a 100644
--- a/mutalyzer/util.py
+++ b/mutalyzer/util.py
@@ -19,20 +19,88 @@ General utility functions.
 """
 
 
+from __future__ import unicode_literals
+
 from functools import wraps
 import inspect
 from itertools import izip_longest
 import math
 import operator
-import os
 import sys
 import time
 
-from Bio.Alphabet import IUPAC
-import Bio.Seq
 from Bio.SeqUtils import seq3
 
 
+# Taken from BioPython.
+AMBIGUOUS_DNA_COMPLEMENT = {
+    'A': 'T',
+    'C': 'G',
+    'G': 'C',
+    'T': 'A',
+    'M': 'K',
+    'R': 'Y',
+    'W': 'W',
+    'S': 'S',
+    'Y': 'R',
+    'K': 'M',
+    'V': 'B',
+    'H': 'D',
+    'D': 'H',
+    'B': 'V',
+    'X': 'X',
+    'N': 'N'}
+AMBIGUOUS_RNA_COMPLEMENT = {
+    'A': 'U',
+    'C': 'G',
+    'G': 'C',
+    'U': 'A',
+    'M': 'K',
+    'R': 'Y',
+    'W': 'W',
+    'S': 'S',
+    'Y': 'R',
+    'K': 'M',
+    'V': 'B',
+    'H': 'D',
+    'D': 'H',
+    'B': 'V',
+    'X': 'X',
+    'N': 'N'}
+
+
+def _make_translation_table(complement_mapping):
+    before = complement_mapping.keys()
+    before += [b.lower() for b in before]
+    after = complement_mapping.values()
+    after += [b.lower() for b in after]
+    return {ord(k): v for k, v in zip(before, after)}
+
+
+_dna_complement_table = _make_translation_table(AMBIGUOUS_DNA_COMPLEMENT)
+_rna_complement_table = _make_translation_table(AMBIGUOUS_RNA_COMPLEMENT)
+
+
+def reverse_complement(sequence):
+    """
+    Reverse complement of a sequence represented as unicode string.
+    """
+    if 'U' in sequence or 'u' in sequence:
+        table = _rna_complement_table
+    else:
+        table = _dna_complement_table
+
+    return ''.join(reversed(sequence.translate(table)))
+
+
+def is_utf8_alias(encoding):
+    """
+    Returns `True` if the given encoding is recognized as UTF-8.
+    """
+    aliases = ('utf_8', 'u8', 'utf', 'utf8')
+    return encoding.lower().replace('-', '_') in aliases
+
+
 def grouper(iterable, n=2, fillvalue=None):
     """
     Make an iterator that takes {n} elements at a time from {iterable}, using
@@ -115,17 +183,17 @@ def splice(s, splice_sites):
         'bcdghijklmnoptuvw'
 
     @arg s: A DNA sequence.
-    @type s: string
+    @type s: any sequence type
     @arg splice_sites: A list of even length of integers.
     @type splice_sites: list
 
     @return: The concatenation of slices from the sequence that is present
              in the GenBank record.
-    @rtype: string
+    @rtype: type(s)
 
     @todo: Assert length of splice_sites is even.
     """
-    transcript = ''
+    transcript = s[:0]
 
     for acceptor, donor in grouper(splice_sites):
         transcript += s[acceptor - 1:donor]
@@ -146,7 +214,7 @@ def __nsplice(string, splice_sites, CDS, orientation) :
     @todo: documentation
     """
 
-    transcript = ""
+    transcript = string[:0]
     if orientation == 1 :
         for i in range(0, len(splice_sites), 2) :
             if CDS[0] >= splice_sites[i] and CDS[0] <= splice_sites[i + 1] :
@@ -212,14 +280,15 @@ def format_range(first, last):
     @type last: integer
 
     @return: {first}_{last} in case of a real range, {first} otherwise.
-    @rtype: string
+    @rtype: unicode
     """
     if first == last:
-        return str(first)
+        return unicode(first)
 
     return '%i_%i' % (first, last)
 #format_range
 
+
 def roll_(s, start, end) :
     """
     Different (and easier) way of finding the variability of a substring.
@@ -239,6 +308,7 @@ def roll_(s, start, end) :
     return j, i
 #roll
 
+
 def roll(s, first, last):
     """
     Determine the variability of a variant by looking at cyclic
@@ -254,7 +324,7 @@ def roll(s, first, last):
         (1, 3)
 
     @arg s: A reference sequence.
-    @type s: string
+    @type s: any sequence type
     @arg first: First position of the pattern in the reference sequence.
     @type first: int
     @arg last: Last position of the pattern in the reference sequence.
@@ -302,13 +372,13 @@ def palinsnoop(s):
         0
 
     @arg s: A nucleotide sequence.
-    @type s: string
+    @type s: unicode
 
     @return: The number of elements that are palindromic or -1 if the string
              is a 'palindrome'.
-    @rtype: string
+    @rtype: int
     """
-    s_revcomp = Bio.Seq.reverse_complement(s)
+    s_revcomp = reverse_complement(s)
 
     for i in range(int(math.ceil(len(s) / 2.0))):
         if s[i] != s_revcomp[i]:
@@ -330,12 +400,12 @@ def longest_common_prefix(s1, s2):
         'abcdefg'
 
     @arg s1: The first string.
-    @type s1: string
+    @type s1: unicode
     @arg s2: The second string.
-    @type s2: string
+    @type s2: unicode
 
     @return: The longest common prefix of s1 and s2.
-    @rtype: string
+    @rtype: unicode
 
     @todo: This is mostly used just for the length of the returned string,
            and we could also return that directly.
@@ -359,9 +429,9 @@ def longest_common_suffix(s1, s2):
         'efg'
 
     @arg s1: The first string.
-    @type s1: string
+    @type s1: unicode
     @arg s2: The second string.
-    @type s2: string
+    @type s2: unicode
 
     @return: The longest common suffix of s1 and s2.
     @rtype: string
@@ -380,15 +450,15 @@ def trim_common(s1, s2):
         ('xyzef', 'abc', 3, 1)
 
     @arg s1: A string.
-    @type s1: string
+    @type s1: unicode
     @arg s2: Another string.
-    @type s2: string
+    @type s2: unicode
 
     @return: A tuple of:
-        - string: Trimmed version of s1.
-        - string: Trimmed version of s2.
-        - int:    Length of longest common prefix.
-        - int:    Length of longest common suffix.
+        - unicode: Trimmed version of s1.
+        - unicode: Trimmed version of s2.
+        - int:     Length of longest common prefix.
+        - int:     Length of longest common suffix.
 
     @todo: More intelligently handle longest_common_prefix().
     """
@@ -407,14 +477,14 @@ def is_dna(s):
         >>> is_dna('TACUGT')
         False
 
-    @arg s: Any string or Bio.Seq.Seq instance.
-    @type s: string
+    @arg s: Any string.
+    @type s: unicode
 
     @return: True if the string is a DNA string, False otherwise.
     @rtype: boolean
     """
-    for i in str(s):
-        if not i in IUPAC.unambiguous_dna.letters:
+    for i in s:
+        if i not in 'ATCG':
             return False
 
     return True
@@ -435,16 +505,16 @@ def in_frame_description(s1, s2) :
         ('p.(Pro4_Gln6delinsGlnGlnMet)', 3, 6, 6)
 
     @arg s1: The original protein.
-    @type s1: string
+    @type s1: unicode
     @arg s2: The mutated protein.
-    @type s2: string
+    @type s2: unicode
 
     @return: A tuple of:
-        - string ; Protein description of the change.
-        - int    ; First position of the change.
-        - int    ; Last position of the change in the first protein.
-        - int    ; Last position of the change in the second protein.
-    @rtype: tuple(string, int, int, int)
+        - unicode ; Protein description of the change.
+        - int     ; First position of the change.
+        - int     ; Last position of the change in the first protein.
+        - int     ; Last position of the change in the second protein.
+    @rtype: tuple(unicode, int, int, int)
 
     @todo: More intelligently handle longest_common_prefix().
     @todo: Refactor this code (too many return statements).
@@ -528,16 +598,16 @@ def out_of_frame_description(s1, s2):
         ('p.(Pro4Glnfs*5)', 3, 7, 7)
 
     @arg s1: The original protein.
-    @type s1: string
+    @type s1: unicode
     @arg s2: The mutated protein.
-    @type s2: string
+    @type s2: unicode
 
     @return: A tuple of:
-        - string ; Protein description of the change.
-        - int    ; First position of the change.
-        - int    ; Last position of the first protein.
-        - int    ; Last position of the second protein.
-    @rtype: tuple(string, int, int, int)
+        - unicode ; Protein description of the change.
+        - int     ; First position of the change.
+        - int     ; Last position of the first protein.
+        - int     ; Last position of the second protein.
+    @rtype: tuple(unicode, int, int, int)
 
     @todo: More intelligently handle longest_common_prefix().
     """
@@ -573,23 +643,23 @@ def protein_description(cds_stop, s1, s2) :
     @arg cds_stop: Position of the stop codon in c. notation (CDS length).
     @type cds_stop: int
     @arg s1: The original protein.
-    @type s1: string
+    @type s1: unicode
     @arg s2: The mutated protein.
-    @type s2: string
+    @type s2: unicode
 
     @return: A tuple of:
-        - string ; Protein description of the change.
-        - int    ; First position of the change.
-        - int    ; Last position of the change in the first protein.
-        - int    ; Last position of the change in the second protein.
-    @rtype: tuple(string, int, int, int)
+        - unicode ; Protein description of the change.
+        - int     ; First position of the change.
+        - int     ; Last position of the change in the first protein.
+        - int     ; Last position of the change in the second protein.
+    @rtype: tuple(unicode, int, int, int)
     """
     if cds_stop % 3:
-        description = out_of_frame_description(str(s1), str(s2))
+        description = out_of_frame_description(s1, s2)
     else:
-        description = in_frame_description(str(s1), str(s2))
+        description = in_frame_description(s1, s2)
 
-    if not s2 or str(s1[0]) != str(s2[0]):
+    if not s2 or s1[0] != s2[0]:
         # Mutation in start codon.
         return 'p.?', description[1], description[2], description[3]
 
@@ -603,7 +673,7 @@ def visualise_sequence(sequence, max_length=25, flank_size=6):
     string is clipped; otherwise the string is just returned.
 
     @arg sequence: DNA sequence.
-    @type sequence: str
+    @type sequence: unicode
     @arg max_length: Maximum length of visualised sequence.
     @type max_length: int
     @arg flank_size: Length of the flanks in clipped visualised sequence.
@@ -629,19 +699,19 @@ def _insert_tag(s, pos1, pos2, tag1, tag2):
     anything either.
 
     @arg s: A sequence.
-    @type s:
+    @type s: unicode
     @arg pos1: Position of tag1.
     @type pos1: int
     @arg pos2: Position of tag2.
     @type pos2: int
     @arg tag1: Content of tag1.
-    @type tag1: string
+    @type tag1: unicode
     @arg tag2: Content of tag2.
-    @type tag2: string
+    @type tag2: unicode
 
     @return: The original sequence, or a sequence with eiter tag1, tag2 or
              both tags inserted.
-    @rtype: string
+    @rtype: unicode
 
     @todo: Cleanup (note: only used in print_protein_html).
     """
@@ -670,7 +740,7 @@ def print_protein_html(s, first, last, O, where, text=False):
     and is suitable for viewing in a monospaced font.
 
     @arg s: A protein sequence.
-    @type s: string
+    @type s: unicode
     @arg first: First position to highlight.
     @type first: int
     @arg last: Last position to highlight.
@@ -678,7 +748,7 @@ def print_protein_html(s, first, last, O, where, text=False):
     @arg O: The Output object.
     @type O: Modules.Output.Output
     @arg where: Location in the {O} object to store the representation.
-    @type where: string
+    @type where: unicode
 
     @todo: Cleanup.
     """
@@ -701,7 +771,7 @@ def print_protein_html(s, first, last, O, where, text=False):
     o = 1
 
     # Add the first position.
-    output = '%s ' % str(o).rjust(m)
+    output = '%s ' % unicode(o).rjust(m)
 
     for i in range(0, len(s), block):
         # Add the blocks.
@@ -714,13 +784,13 @@ def print_protein_html(s, first, last, O, where, text=False):
             # Add the position (while escaping any potential highlighting).
             if text:
                 if first < o < last:
-                    output = '%s%s%s ' % (tag2, str(o).rjust(m), tag1)
+                    output = '%s%s%s ' % (tag2, unicode(o).rjust(m), tag1)
                 else:
-                    output = '%s ' % str(o).rjust(m)
+                    output = '%s ' % unicode(o).rjust(m)
             else:
                 output = \
                     '<tt style="color:000000;font-weight:normal">%s</tt> ' % \
-                    str(o).rjust(m)
+                    unicode(o).rjust(m)
 
     # Add last line.
     O.addOutput(where, output)
@@ -748,10 +818,10 @@ def nice_filename(filename):
     Strip the path and the extention from a filename.
 
     @arg filename: A complete path plus extention.
-    @type filename: string
+    @type filename: unicode
 
     @return: The bare filename without a path and extention.
-    @rtype: string
+    @rtype: unicode
     """
     return filename.split('/')[-1].split('.')[0]
 #nice_filename
@@ -788,16 +858,16 @@ def format_usage(usage=None, keywords={}):
 
     @kwarg usage: The string to format. If omitted, the calling module's
         docstring is used.
-    @type usage: string
+    @type usage: unicode
     @kwarg keywords: A dictionary of (keyword, value) pairs used to format
         the usage string. If it does not contain the key 'command', it is
         added with the value of sys.argv[0].
-    @type keywords: dictionary(string, string)
+    @type keywords: dictionary(unicode, unicode)
 
     @return: Formatted usage string. This is {usage} with any entries from
         {keywords} replaced and cut-off at the first occurence of two
         consecutive empty lines.
-    @rtype: string
+    @rtype: unicode
     """
     if not usage:
         caller = inspect.stack()[1]
diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py
index 65dd70564a727e88eb38ddac39baf38e5befb286..3f0ee4220d8d38451ab7cf4067f287c31ae29383 100644
--- a/mutalyzer/variantchecker.py
+++ b/mutalyzer/variantchecker.py
@@ -9,17 +9,22 @@ Notes about naming positions:
 * translation -> begin/end
 * any range of bases -> first/last
 * interbase position (if two numbers are used) -> before/after
+
+Notes about string representations:
+* All variant descriptions and their parts are unicode strings
+* All reference sequences (and their mutated version) are Bio.Seq.Seq objects
 """
 
 
-from operator import itemgetter, attrgetter
+from __future__ import unicode_literals
+
+from operator import attrgetter
 
-import Bio
-import Bio.Seq
-from Bio.Seq import Seq
+from Bio.Data import CodonTable
 from Bio.Alphabet import IUPAC
 from Bio.Alphabet import DNAAlphabet
 from Bio.Alphabet import ProteinAlphabet
+from Bio.Alphabet import _verify_alphabet
 
 from mutalyzer import util
 from mutalyzer.db.models import Assembly
@@ -126,14 +131,14 @@ def _check_argument(argument, reference, first, last, output):
     Do several checks for the optional argument of a variant. Raise a
     _RawVariantError exception if the checks fail.
 
+    @arg argument: The optional argument.
+    @type argument: unicode
     @arg reference: The reference sequence.
-    @type reference: string
+    @type reference: Bio.Seq.Seq
     @arg first: Start position of the variant.
     @type first: int
     @arg last: End position of the variant.
     @type last: int
-    @arg argument: The optional argument.
-    @type argument: string
     @arg output: The Output object.
     @type output: mutalyzer.Output.Output
 
@@ -164,8 +169,8 @@ def _check_argument(argument, reference, first, last, output):
                               'Invalid letters in argument.')
             raise _NotDNAError()
         # And the DNA must match the reference sequence.
-        reference_slice = str(reference[first - 1:last])
-        if reference_slice != str(argument):
+        reference_slice = unicode(reference[first - 1:last])
+        if reference_slice != argument:
             # Todo: Be more informative.
             output.addMessage(__file__, 3, 'EREF',
                               '%s not found at position %s, found %s ' \
@@ -286,9 +291,9 @@ def apply_substitution(position, original, substitute, mutator, record, O):
     @arg position: Genomic location of the substitution.
     @type position: int
     @arg original: Nucleotide in the reference sequence.
-    @type original: string
+    @type original: unicode
     @arg substitute: Nucleotide in the mutated sequence.
-    @type substitute: string
+    @type substitute: unicode
     @arg mutator: A Mutator instance.
     @type mutator: mutalyzer.mutator.Mutator
     @arg record: A GenRecord object.
@@ -310,7 +315,7 @@ def apply_substitution(position, original, substitute, mutator, record, O):
 
     mutator.substitution(position, substitute)
 
-    record.name(position, position, 'subst', mutator.orig[position - 1],
+    record.name(position, position, 'subst', unicode(mutator.orig[position - 1]),
                 substitute, None)
 #apply_substitution
 
@@ -326,7 +331,7 @@ def apply_deletion_duplication(first, last, type, mutator, record, O,
     @arg last: Genomic end position of the del/dup.
     @type last: int
     @arg type: The variant type (del or dup).
-    @type type: string
+    @type type: unicode
     @arg mutator: A Mutator instance.
     @type mutator: mutalyzer.mutator.Mutator
     @arg record: A GenRecord object.
@@ -376,9 +381,9 @@ def apply_deletion_duplication(first, last, type, mutator, record, O,
             'Sequence "%s" at position %s was given, however, ' \
             'the HGVS notation prescribes that on the forward strand ' \
             'it should be "%s" at position %s.' % (
-            util.visualise_sequence(str(mutator.orig[first - 1:last])),
+            util.visualise_sequence(unicode(mutator.orig[first - 1:last])),
             util.format_range(first, last),
-            util.visualise_sequence(str(mutator.orig[new_first - 1:new_stop])),
+            util.visualise_sequence(unicode(mutator.orig[new_first - 1:new_stop])),
             util.format_range(new_first, new_stop)))
 
     if forward_roll != original_forward_roll and not reverse_strand:
@@ -388,9 +393,9 @@ def apply_deletion_duplication(first, last, type, mutator, record, O,
         O.addMessage(__file__, 1, 'IROLLBACK',
             'Sequence "%s" at position %s was not corrected to "%s" at ' \
             'position %s, since they reside in different exons.' % (
-            util.visualise_sequence(str(mutator.orig[first - 1:last])),
+            util.visualise_sequence(unicode(mutator.orig[first - 1:last])),
             util.format_range(first, last),
-            util.visualise_sequence(str(mutator.orig[incorrect_first - 1:incorrect_stop])),
+            util.visualise_sequence(unicode(mutator.orig[incorrect_first - 1:incorrect_stop])),
             util.format_range(incorrect_first, incorrect_stop)))
 
     if reverse_roll and reverse_strand:
@@ -400,9 +405,9 @@ def apply_deletion_duplication(first, last, type, mutator, record, O,
             'Sequence "%s" at position %s was given, however, ' \
             'the HGVS notation prescribes that on the reverse strand ' \
             'it should be "%s" at position %s.' % (
-            util.visualise_sequence(str(mutator.orig[first - 1:last])),
+            util.visualise_sequence(unicode(mutator.orig[first - 1:last])),
             util.format_range(first, last),
-            util.visualise_sequence(str(mutator.orig[new_first - 1:new_stop])),
+            util.visualise_sequence(unicode(mutator.orig[new_first - 1:new_stop])),
             util.format_range(new_first, new_stop)))
 
     # We don't go through the trouble of visualising the *corrected* variant
@@ -434,7 +439,7 @@ def apply_inversion(first, last, mutator, record, O):
     @arg O: The Output object.
     @type O: Modules.Output.Output
     """
-    snoop = util.palinsnoop(mutator.orig[first - 1:last])
+    snoop = util.palinsnoop(unicode(mutator.orig[first - 1:last]))
 
     if snoop:
         # We have a reverse-complement-palindromic prefix.
@@ -444,7 +449,7 @@ def apply_inversion(first, last, mutator, record, O):
             O.addMessage(__file__, 2, 'WNOCHANGE',
                 'Sequence "%s" at position %i_%i is a palindrome ' \
                 '(its own reverse complement).' % (
-                util.visualise_sequence(str(mutator.orig[first - 1:last])),
+                util.visualise_sequence(unicode(mutator.orig[first - 1:last])),
                 first, last))
             return
         else:
@@ -453,10 +458,10 @@ def apply_inversion(first, last, mutator, record, O):
                 'palindrome (the first %i nucleotide(s) are the reverse ' \
                 'complement of the last one(s)), the HGVS notation ' \
                 'prescribes that it should be "%s" at position %i_%i.' % (
-                util.visualise_sequence(str(mutator.orig[first - 1:last])),
+                util.visualise_sequence(unicode(mutator.orig[first - 1:last])),
                 first, last, snoop,
                 util.visualise_sequence(
-                    str(mutator.orig[first + snoop - 1: last - snoop])),
+                    unicode(mutator.orig[first + snoop - 1: last - snoop])),
                 first + snoop, last - snoop))
             first += snoop
             last -= snoop
@@ -466,8 +471,8 @@ def apply_inversion(first, last, mutator, record, O):
     if first == last:
         O.addMessage(__file__, 2, 'WWRONGTYPE', 'Inversion at position ' \
             '%i is actually a substitution.' % first)
-        record.name(first, first, 'subst', mutator.orig[first - 1],
-            Bio.Seq.reverse_complement(mutator.orig[first - 1]), None)
+        record.name(first, first, 'subst', unicode(mutator.orig[first - 1]),
+                    util.reverse_complement(unicode(mutator.orig[first - 1])), None)
     else :
         record.name(first, last, 'inv', '', '', None)
 #apply_inversion
@@ -483,7 +488,7 @@ def apply_insertion(before, after, s, mutator, record, O):
     @arg after: Genomic position after the insertion.
     @type after: int
     @arg s: Nucleotides to be inserted.
-    @type s: string
+    @type s: nucleotide
     @arg mutator: A Mutator instance.
     @type mutator: mutalyzer.mutator.Mutator
     @arg record: A GenRecord object.
@@ -547,7 +552,7 @@ def apply_insertion(before, after, s, mutator, record, O):
             'however, the HGVS notation prescribes that it should be a ' \
             'duplication of %s at position %i_%i.' % (
             s, before, before + 1,
-            mutator.mutated[new_before + forward_roll:new_stop + forward_roll],
+            unicode(mutator.mutated[new_before + forward_roll:new_stop + forward_roll]),
             before + forward_roll,
             before + forward_roll + insertion_length - 1))
         after += forward_roll - 1
@@ -566,7 +571,7 @@ def apply_insertion(before, after, s, mutator, record, O):
             'that on the forward strand it should be an insertion of %s ' \
             'at position %i_%i.' % (
             s, before, before + 1,
-            mutator.mutated[new_before + forward_roll:new_stop + forward_roll],
+            unicode(mutator.mutated[new_before + forward_roll:new_stop + forward_roll]),
             new_before + forward_roll, new_before + forward_roll + 1))
 
     if forward_roll != original_forward_roll and not reverse_strand:
@@ -576,7 +581,7 @@ def apply_insertion(before, after, s, mutator, record, O):
             'insertion of %s at position %i_%i, since they reside in ' \
             'different exons.' % (
             s, before, before + 1,
-            mutator.mutated[new_before + original_forward_roll:new_stop + original_forward_roll],
+            unicode(mutator.mutated[new_before + original_forward_roll:new_stop + original_forward_roll]),
             new_before + original_forward_roll, new_before + original_forward_roll + 1))
 
     if reverse_roll and reverse_strand:
@@ -585,13 +590,13 @@ def apply_insertion(before, after, s, mutator, record, O):
             'that on the reverse strand it should be an insertion of %s ' \
             'at position %i_%i.' % (
             s, before, before + 1,
-            mutator.mutated[new_before - reverse_roll:new_stop - reverse_roll],
+            unicode(mutator.mutated[new_before - reverse_roll:new_stop - reverse_roll]),
             new_before - reverse_roll, (new_before - reverse_roll) + 1))
 
     record.name(before, before + 1, 'ins',
-                mutator.mutated[new_before + forward_roll:new_stop + forward_roll],
+                unicode(mutator.mutated[new_before + forward_roll:new_stop + forward_roll]),
                 '', (reverse_roll, forward_roll),
-                mutator.mutated[new_before - reverse_roll:new_stop - reverse_roll])
+                unicode(mutator.mutated[new_before - reverse_roll:new_stop - reverse_roll]))
 #apply_insertion
 
 
@@ -605,7 +610,7 @@ def apply_delins(first, last, insert, mutator, record, output):
     @arg last: Genomic end position of the delins.
     @type last: int
     @arg insert: Sequence to insert.
-    @type insert: string
+    @type insert: unicode
     @arg mutator: A Mutator instance.
     @type mutator: mutalyzer.mutator.Mutator
     @arg record: A GenRecord object.
@@ -613,14 +618,13 @@ def apply_delins(first, last, insert, mutator, record, output):
     @arg output: The Output object.
     @type output: Modules.Output.Output
     """
-    delete = mutator.orig[first - 1:last]
+    delete = unicode(mutator.orig[first - 1:last])
 
-    if str(delete) == str(insert):
+    if delete == insert:
         output.addMessage(__file__, 2, 'WNOCHANGE',
                           'Sequence "%s" at position %i_%i is identical to ' \
                           'the variant.' % (
-                util.visualise_sequence(str(mutator.orig[first - 1:last])),
-                first, last))
+                              util.visualise_sequence(delete), first, last))
         return
 
     delete_trimmed, insert_trimmed, lcp, lcs = util.trim_common(delete, insert)
@@ -646,7 +650,7 @@ def apply_delins(first, last, insert, mutator, record, output):
                                    mutator, record, output)
         return
 
-    if str(Bio.Seq.reverse_complement(delete_trimmed)) == insert_trimmed:
+    if util.reverse_complement(delete_trimmed) == insert_trimmed:
         output.addMessage(__file__, 2, 'WWRONGTYPE', 'The given DelIns ' \
                           'is actually an inversion.')
         apply_inversion(first + lcp, last - lcs, mutator,
@@ -658,7 +662,7 @@ def apply_delins(first, last, insert, mutator, record, output):
                 'Sequence "%s" at position %i_%i has the same prefix or ' \
                 'suffix as the inserted sequence "%s". The HGVS notation ' \
                 'prescribes that it should be "%s" at position %i_%i.' % (
-                util.visualise_sequence(str(mutator.orig[first - 1:last])),
+                util.visualise_sequence(unicode(mutator.orig[first - 1:last])),
                 first, last, insert, insert_trimmed, first + lcp, last - lcs))
 
     mutator.delins(first + lcp, last - lcs, insert_trimmed)
@@ -952,17 +956,19 @@ def process_raw_variant(mutator, variant, record, transcript, output):
     """
     variant, original_description = variant.RawVar, variant[-1]
 
-    # {argument} may be a number, or a subsequence of the reference.
-    # {sequence} is the variant subsequence.
-    argument = variant.Arg1
-    sequence = variant.Arg2
+    # `argument` may be a number, or a subsequence of the reference.
+    # `sequence` is the variant subsequence.
+    # Note that pyparsing will return `str('')` if the attribute does not
+    # exist, so we explicitely convert the result to unicode.
+    argument = unicode(variant.Arg1)
+    sequence = unicode(variant.Arg2)
 
     # If we are on the reverse strand, subsequences must be in reverse
     # complement.
     if transcript and transcript.CM.orientation == -1:
-        sequence = Bio.Seq.reverse_complement(sequence)
+        sequence = util.reverse_complement(sequence)
         if util.is_dna(argument):
-            argument = Bio.Seq.reverse_complement(argument)
+            argument = util.reverse_complement(argument)
 
     # Get genomic first and last positions for this variant. Below we handle
     # the different ways of describing these positions.
@@ -1189,7 +1195,7 @@ def process_raw_variant(mutator, variant, record, transcript, output):
     def parse_sequence(seq):
         if seq.Sequence:
             if transcript and transcript.CM.orientation == -1:
-                return Bio.Seq.reverse_complement(str(seq.Sequence))
+                return util.reverse_complement(seq.Sequence)
             return seq.Sequence
 
         if seq.StartLoc and seq.EndLoc:
@@ -1228,9 +1234,9 @@ def process_raw_variant(mutator, variant, record, transcript, output):
                                   'Position %s is out of range.' % range_last)
                 raise _RawVariantError()
 
-            insertion = mutator.orig[range_first - 1:range_last]
+            insertion = unicode(mutator.orig[range_first - 1:range_last])
             if seq.Inv:
-                insertion = Bio.Seq.reverse_complement(str(insertion))
+                insertion = util.reverse_complement(insertion)
 
             return insertion
 
@@ -1245,7 +1251,7 @@ def process_raw_variant(mutator, variant, record, transcript, output):
                 seqs = reversed(variant.SeqList)
             else:
                 seqs = variant.SeqList
-            insertion = ''.join(str(parse_sequence(seq))
+            insertion = ''.join(parse_sequence(seq)
                                 for seq in seqs)
         else:
             insertion = parse_sequence(variant.Seq)
@@ -1316,32 +1322,33 @@ def _add_transcript_info(mutator, transcript, output):
     if transcript.transcribe:
         output.addOutput('myTranscriptDescription', transcript.description or '=')
         output.addOutput('origMRNA',
-            str(util.splice(mutator.orig, transcript.mRNA.positionList)))
+            unicode(util.splice(mutator.orig, transcript.mRNA.positionList)))
         output.addOutput('mutatedMRNA',
-            str(util.splice(mutator.mutated,
+            unicode(util.splice(mutator.mutated,
                         mutator.shift_sites(transcript.mRNA.positionList))))
 
     # Add protein prediction to output.
     if transcript.translate:
-        cds_original = Seq(str(util.splice(mutator.orig, transcript.CDS.positionList)),
-                           IUPAC.unambiguous_dna)
-        cds_variant = Seq(str(util.__nsplice(mutator.mutated,
-                                        mutator.shift_sites(transcript.mRNA.positionList),
-                                        mutator.shift_sites(transcript.CDS.location),
-                                        transcript.CM.orientation)),
-                          IUPAC.unambiguous_dna)
+        cds_original = util.splice(mutator.orig, transcript.CDS.positionList)
+        cds_original.alphabet = IUPAC.unambiguous_dna
 
-        #output.addOutput('origCDS', cds_original)
-
-        if transcript.CM.orientation == -1:
-            cds_original = Bio.Seq.reverse_complement(cds_original)
-            cds_variant = Bio.Seq.reverse_complement(cds_variant)
-
-        if not util.is_dna(cds_original):
+        if not _verify_alphabet(cds_original):
             output.addMessage(__file__, 4, 'ENODNA',
                               'Invalid letters in reference sequence.')
             return
 
+        cds_variant = util.__nsplice(mutator.mutated,
+                                     mutator.shift_sites(transcript.mRNA.positionList),
+                                     mutator.shift_sites(transcript.CDS.location),
+                                     transcript.CM.orientation)
+        cds_variant.alphabet = IUPAC.unambiguous_dna
+
+        #output.addOutput('origCDS', cds_original)
+
+        if transcript.CM.orientation == -1:
+            cds_original = cds_original.reverse_complement()
+            cds_variant = cds_variant.reverse_complement()
+
         if '*' in cds_original.translate(table=transcript.txTable)[:-1]:
             output.addMessage(__file__, 3, 'ESTOP',
                               'In frame stop codon found.')
@@ -1354,36 +1361,35 @@ def _add_transcript_info(mutator, transcript, output):
 
         # Note: addOutput('origCDS', ...) was first before the possible
         #       reverse complement operation above.
-        output.addOutput('origCDS', cds_original)
-        output.addOutput("newCDS", cds_variant[:(len(str(protein_variant)) + 1) * 3])
+        output.addOutput('origCDS', unicode(cds_original))
+        output.addOutput("newCDS", unicode(cds_variant[:(len(protein_variant) + 1) * 3]))
 
-        output.addOutput('oldprotein', protein_original + '*')
+        output.addOutput('oldprotein', unicode(protein_original) + '*')
 
         # Todo: Don't generate the fancy HTML protein views here, do this in
         # website.py.
         # I think it would also be nice to include the mutated list of splice
         # sites.
-        if not protein_variant or protein_variant[0] != 'M':
+        if not protein_variant or unicode(protein_variant[0]) != 'M':
             # Todo: Protein differences are not color-coded,
             # use something like below in protein_description().
-            util.print_protein_html(protein_original + '*', 0, 0, output,
-                                    'oldProteinFancy')
-            util.print_protein_html(protein_original + '*', 0, 0, output,
-                                    'oldProteinFancyText', text=True)
-            if str(cds_variant[0:3]) in \
-                   Bio.Data.CodonTable.unambiguous_dna_by_id \
-                   [transcript.txTable].start_codons:
+            util.print_protein_html(unicode(protein_original) + '*', 0, 0,
+                                    output, 'oldProteinFancy')
+            util.print_protein_html(unicode(protein_original) + '*', 0, 0,
+                                    output, 'oldProteinFancyText', text=True)
+            if unicode(cds_variant[0:3]) in \
+                   CodonTable.unambiguous_dna_by_id[transcript.txTable].start_codons:
                 output.addOutput('newprotein', '?')
                 util.print_protein_html('?', 0, 0, output, 'newProteinFancy')
                 util.print_protein_html('?', 0, 0, output,
                     'newProteinFancyText', text=True)
-                output.addOutput('altStart', str(cds_variant[0:3]))
-                if str(protein_original[1:]) != str(protein_variant[1:]):
+                output.addOutput('altStart', unicode(cds_variant[0:3]))
+                if unicode(protein_original[1:]) != unicode(protein_variant[1:]):
                     output.addOutput('altProtein',
-                                     'M' + protein_variant[1:] + '*')
-                    util.print_protein_html('M' + protein_variant[1:] + '*', 0,
+                                     'M' + unicode(protein_variant[1:]) + '*')
+                    util.print_protein_html('M' + unicode(protein_variant[1:]) + '*', 0,
                         0, output, 'altProteinFancy')
-                    util.print_protein_html('M' + protein_variant[1:] + '*', 0,
+                    util.print_protein_html('M' + unicode(protein_variant[1:]) + '*', 0,
                         0, output, 'altProteinFancyText', text=True)
             else :
                 output.addOutput('newprotein', '?')
@@ -1395,21 +1401,22 @@ def _add_transcript_info(mutator, transcript, output):
             cds_length = util.cds_length(
                 mutator.shift_sites(transcript.CDS.positionList))
             descr, first, last_original, last_variant = \
-                   util.protein_description(cds_length, protein_original,
-                                            protein_variant)
+                   util.protein_description(cds_length,
+                                            unicode(protein_original),
+                                            unicode(protein_variant))
 
             # This is never used.
             output.addOutput('myProteinDescription', descr)
 
-            util.print_protein_html(protein_original + '*', first,
+            util.print_protein_html(unicode(protein_original) + '*', first,
                 last_original, output, 'oldProteinFancy')
-            util.print_protein_html(protein_original + '*', first,
+            util.print_protein_html(unicode(protein_original) + '*', first,
                 last_original, output, 'oldProteinFancyText', text=True)
-            if str(protein_original) != str(protein_variant):
-                output.addOutput('newprotein', protein_variant + '*')
-                util.print_protein_html(protein_variant + '*', first,
+            if unicode(protein_original) != unicode(protein_variant):
+                output.addOutput('newprotein', unicode(protein_variant) + '*')
+                util.print_protein_html(unicode(protein_variant) + '*', first,
                     last_variant, output, 'newProteinFancy')
-                util.print_protein_html(protein_variant + '*', first,
+                util.print_protein_html(unicode(protein_variant) + '*', first,
                     last_variant, output, 'newProteinFancyText', text=True)
 #_add_transcript_info
 
@@ -1473,6 +1480,7 @@ def process_variant(mutator, description, record, output):
         if description.LrgAcc:
             # LRG case, pick the top gene.
             gene = record.record.geneList[0]
+
             if transcript_id:
                 transcript = gene.findLocus(transcript_id)
                 if not transcript:
@@ -1481,7 +1489,7 @@ def process_variant(mutator, description, record, output):
                     # NG_012772.1).
                     output.addMessage(__file__, 4, "ENOTRANSCRIPT",
                         "Multiple transcripts found for gene %s. Please " \
-                        "choose from: %s" %(gene.name,
+                        "choose from: %s" % (gene.name,
                             ", ".join(gene.listLoci())))
             else:
                 # No transcript id given.
@@ -1563,10 +1571,10 @@ def process_variant(mutator, description, record, output):
                                   'Protein level descriptions can only be done on a protein or transcript reference.')
                 raise _VariantError()
             else:
-                cds = Seq(str(util.splice(mutator.orig, transcript.CDS.positionList)),
-                          IUPAC.unambiguous_dna)
+                cds = util.splice(mutator.orig, transcript.CDS.positionList)
+                cds.alphabet = IUPAC.unambiguous_dna
                 if transcript.CM.orientation == -1:
-                    cds = Bio.Seq.reverse_complement(cds)
+                    cds = cds.reverse_complement()
                 protein = cds.translate(table=transcript.txTable, cds=True, to_stop=True)
                 mutator.orig = protein
                 mutator.mutated = protein
@@ -1644,12 +1652,12 @@ def check_variant(description, output):
 
     if parsed_description.LrgAcc:
         record_id = parsed_description.LrgAcc
-    elif parsed_description.Version:
-        record_id = parsed_description.RefSeqAcc + '.' + parsed_description.Version
+    elif parsed_description.RefSeqAcc:
+        if parsed_description.Version:
+            record_id = parsed_description.RefSeqAcc + '.' + parsed_description.Version
+        else:
+            record_id = parsed_description.RefSeqAcc
     else:
-        record_id = parsed_description.RefSeqAcc
-
-    if not record_id:
         output.addMessage(__file__, 4, 'ENOREF', 'No reference sequence given.')
         return
 
@@ -1657,7 +1665,7 @@ def check_variant(description, output):
 
     if parsed_description.LrgAcc:
         filetype = 'LRG'
-        transcript_id = parsed_description.LRGTranscriptID
+        transcript_id = parsed_description.LRGTranscriptID or ''
         retriever = Retriever.LRGRetriever(output)
     else:
         filetype = 'GB'
@@ -1732,8 +1740,8 @@ def check_variant(description, output):
     except _VariantError:
         return
 
-    output.addOutput('original', str(mutator.orig))
-    output.addOutput('mutated', str(mutator.mutated))
+    output.addOutput('original', unicode(mutator.orig))
+    output.addOutput('mutated', unicode(mutator.mutated))
 
     # Chromosomal region (only for GenBank human transcript references).
     # This is still quite ugly code, and should be cleaned up once we have
@@ -1775,17 +1783,18 @@ def check_variant(description, output):
                 transcript.proteinDescription = 'p.?'
                 continue
 
-            cds_original = Seq(str(util.splice(mutator.orig, transcript.CDS.positionList)),
-                               IUPAC.unambiguous_dna)
-            cds_variant = Seq(str(util.__nsplice(mutator.mutated,
-                                            mutator.shift_sites(transcript.mRNA.positionList),
-                                            mutator.shift_sites(transcript.CDS.location),
-                                            transcript.CM.orientation)),
-                              IUPAC.unambiguous_dna)
+            cds_original = util.splice(mutator.orig, transcript.CDS.positionList)
+            cds_original.alphabet = IUPAC.unambiguous_dna
+
+            cds_variant = util.__nsplice(mutator.mutated,
+                                         mutator.shift_sites(transcript.mRNA.positionList),
+                                         mutator.shift_sites(transcript.CDS.location),
+                                         transcript.CM.orientation)
+            cds_variant.alphabet = IUPAC.unambiguous_dna
 
             if transcript.CM.orientation == -1:
-                cds_original = Bio.Seq.reverse_complement(cds_original)
-                cds_variant = Bio.Seq.reverse_complement(cds_variant)
+                cds_original = cds_original.reverse_complement()
+                cds_variant = cds_variant.reverse_complement()
 
             #if '*' in cds_original.translate()[:-1]:
             #    output.addMessage(__file__, 3, "ESTOP",
@@ -1801,7 +1810,7 @@ def check_variant(description, output):
                     # FIXME this is a bit of a rancid fix.
                     protein_original = cds_original.translate(
                         table=transcript.txTable, cds=True, to_stop=True)
-                except Bio.Data.CodonTable.TranslationError:
+                except CodonTable.TranslationError:
                     if transcript.current:
                         output.addMessage(
                             __file__, 2, "WTRANS",
@@ -1822,7 +1831,7 @@ def check_variant(description, output):
                         cds_length = util.cds_length(
                             mutator.shift_sites(transcript.CDS.positionList))
                         transcript.proteinDescription = util.protein_description(
-                            cds_length, protein_original, protein_variant)[0]
+                            cds_length, unicode(protein_original), unicode(protein_variant))[0]
                     except IndexError:
                         # Todo: Probably CDS start was hit by removal of exon..
                         transcript.proteinDescription = 'p.?'
diff --git a/mutalyzer/website/__init__.py b/mutalyzer/website/__init__.py
index 730c33e86f6ee5be9edd5afcb13166d4c58d907d..2ce0450bf8765e9197c37545aeef9b3281315c43 100644
--- a/mutalyzer/website/__init__.py
+++ b/mutalyzer/website/__init__.py
@@ -3,6 +3,8 @@ Mutalyzer website interface using the Flask framework.
 """
 
 
+from __future__ import unicode_literals
+
 import logging
 import os
 import pkg_resources
diff --git a/mutalyzer/website/templates/base.html b/mutalyzer/website/templates/base.html
index 2f45caf9f0a7a4be3f98721736c785861b620dad..270e3bdfd75d1bd69e692de91f876300b26f4066 100644
--- a/mutalyzer/website/templates/base.html
+++ b/mutalyzer/website/templates/base.html
@@ -22,7 +22,7 @@
       src="{{ url_for('static', filename='js/generator.js') }}">
     </script>
     <meta http-equiv="Content-Type"
-      content="text/html; charset=iso-8859-1">
+      content="text/html; charset=utf-8">
     <title>Mutalyzer {{ mutalyzer_version }} &mdash; {{ page_title }}</title>
   </head>
   <body
diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py
index 475330b7ce084273bc61627e5a24221d2b0ccff0..84b5cf857b424bb6e42b6c2f41d97877459df588 100644
--- a/mutalyzer/website/views.py
+++ b/mutalyzer/website/views.py
@@ -3,16 +3,17 @@ Mutalyzer website views.
 """
 
 
+from __future__ import unicode_literals
+
 import bz2
 import os
 import pkg_resources
 import re
-from cStringIO import StringIO
 import urllib
 
 from flask import Blueprint
-from flask import (abort, current_app, jsonify, make_response, redirect,
-                   render_template, request, send_from_directory, url_for)
+from flask import (abort, jsonify, make_response, redirect, render_template,
+                   request, send_from_directory, url_for)
 import jinja2
 from lxml import etree
 from spyne.server.http import HttpBase
@@ -22,9 +23,8 @@ import mutalyzer
 from mutalyzer import (announce, describe, File, Retriever, Scheduler, stats,
                        util, variantchecker)
 from mutalyzer.config import settings
-from mutalyzer.db import session
 from mutalyzer.db.models import BATCH_JOB_TYPES
-from mutalyzer.db.models import Assembly, BatchJob, BatchQueueItem
+from mutalyzer.db.models import Assembly, BatchJob
 from mutalyzer.grammar import Grammar
 from mutalyzer.mapping import Converter
 from mutalyzer.output import Output
@@ -135,16 +135,16 @@ def soap_api():
     """
     soap_server = HttpBase(soap.application)
     soap_server.doc.wsdl11.build_interface_document(settings.SOAP_WSDL_URL)
-    wsdl_handle = StringIO(soap_server.doc.wsdl11.get_interface_document())
+    wsdl_string = soap_server.doc.wsdl11.get_interface_document()
 
-    xsl_handle = open(os.path.join(
-            pkg_resources.resource_filename('mutalyzer', 'website/templates'),
-            'wsdl-viewer.xsl'), 'r')
-    wsdl_doc = etree.parse(wsdl_handle)
-    xsl_doc = etree.parse(xsl_handle)
+    xsl_file = os.path.join(
+        pkg_resources.resource_filename('mutalyzer', 'website/templates'),
+        'wsdl-viewer.xsl')
+    wsdl_doc = etree.fromstring(wsdl_string)
+    xsl_doc = etree.parse(xsl_file)
     transform = etree.XSLT(xsl_doc)
 
-    return make_response(str(transform(wsdl_doc)))
+    return make_response(unicode(transform(wsdl_doc)))
 
 
 @website.route('/downloads/<string:filename>')
@@ -159,7 +159,7 @@ def downloads(filename):
     except jinja2.exceptions.TemplateNotFound:
         abort(404)
 
-    response.headers['Content-Type'] = 'text/plain'
+    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
     response.headers['Content-Disposition'] = ('attachment; filename="%s"'
                                                % filename)
     return response
@@ -233,10 +233,7 @@ def name_checker():
                       % (description, request.remote_addr))
     stats.increment_counter('name-checker/website')
 
-    # Todo: The following is probably a problem elsewhere too. We stringify
-    #   the variant, because a unicode string crashes BioPython's
-    #   `reverse_complement`.
-    variantchecker.check_variant(str(description), output)
+    variantchecker.check_variant(description, output)
 
     errors, warnings, summary = output.Summary()
     parse_error = output.getOutput('parseError')
@@ -272,18 +269,20 @@ def name_checker():
     # Experimental description extractor.
     if (output.getIndexedOutput('original', 0) and
         output.getIndexedOutput('mutated', 0)):
+        extracted = extractedProt = '(skipped)'
+
         allele = describe.describe(output.getIndexedOutput('original', 0),
                                    output.getIndexedOutput('mutated', 0))
-        prot_allele = describe.describe(
-            output.getIndexedOutput('oldprotein', 0),
-            output.getIndexedOutput('newprotein', 0, default=''),
-            DNA=False)
-
-        extracted = extractedProt = '(skipped)'
         if allele:
             extracted = describe.alleleDescription(allele)
-        if prot_allele:
-            extractedProt = describe.alleleDescription(prot_allele)
+
+        if output.getIndexedOutput('oldprotein', 0):
+            prot_allele = describe.describe(
+                output.getIndexedOutput('oldprotein', 0),
+                output.getIndexedOutput('newprotein', 0, default=''),
+                DNA=False)
+            if prot_allele:
+                extractedProt = describe.alleleDescription(prot_allele)
 
     else:
         extracted = extractedProt = ''
@@ -350,11 +349,10 @@ def bed():
 
     if not description:
         abort(404)
-        return render_template('name-checker.html')
 
     output = Output(__file__)
 
-    variantchecker.check_variant(str(description), output)
+    variantchecker.check_variant(description, output)
 
     raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0)
     if not raw_variants:
@@ -376,14 +374,14 @@ def bed():
 
     for descr, positions in raw_variants[2]:
         bed += '\t'.join([raw_variants[0],
-                          str(min(positions) - 1),
-                          str(max(positions)),
+                          unicode(min(positions) - 1),
+                          unicode(max(positions)),
                           descr,
                           '0',
                           raw_variants[1]]) + '\n'
 
     response = make_response(bed)
-    response.headers['Content-Type'] = 'text/plain'
+    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
     return response
 
 
@@ -579,7 +577,7 @@ def reference_loader_submit():
     output = Output(__file__)
     output.addMessage(__file__, -1, 'INFO',
                       'Received request upload(%s) with arguments %s from %s'
-                      % (method, str(request.form), request.remote_addr))
+                      % (method, unicode(request.form), request.remote_addr))
 
     assemblies = Assembly.query \
         .order_by(Assembly.taxonomy_common_name.asc(),
@@ -668,11 +666,11 @@ def reference_loader_submit():
 
     if not ud:
         errors.append('The request could not be completed')
-        errors.extend(str(m) for m in output.getMessages())
+        errors.extend(unicode(m) for m in output.getMessages())
 
     output.addMessage(__file__, -1, 'INFO',
                       'Finished request upload(%s) with arguments %s from %s'
-                      % (method, str(request.form), request.remote_addr))
+                      % (method, unicode(request.form), request.remote_addr))
 
     return render_template('reference-loader.html',
                            assemblies=assemblies,
@@ -737,7 +735,7 @@ def reference(filename):
 
     response = make_response(bz2.BZ2File(file_path, 'r').read())
 
-    response.headers['Content-Type'] = 'text/plain'
+    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
     response.headers['Content-Disposition'] = ('attachment; filename="%s"'
                                                % filename)
     return response
@@ -773,7 +771,9 @@ def batch_jobs_submit():
     """
     job_type = request.form.get('job_type')
     email = request.form.get('email')
-    file = request.files.get('file')
+
+    # Note that this is always a seekable binary file object.
+    batch_file = request.files.get('file')
 
     assemblies = Assembly.query \
         .order_by(Assembly.taxonomy_common_name.asc(),
@@ -809,7 +809,7 @@ def batch_jobs_submit():
 
         scheduler = Scheduler.Scheduler()
         file_instance = File.File(output)
-        job, columns = file_instance.parseBatchFile(file)
+        job, columns = file_instance.parseBatchFile(batch_file)
 
         if job is None:
             errors.append('Could not parse input file, please check your '
@@ -894,7 +894,7 @@ def batch_job_result(result_id):
 
     return send_from_directory(settings.CACHE_DIR,
                                'batch-job-%s.txt' % result_id,
-                               mimetype='text/plain',
+                               mimetype='text/plain; charset=utf-8',
                                as_attachment=True)
 
 
@@ -933,10 +933,7 @@ def lovd_get_gs():
                       % (mutation_name, variant_record, forward,
                          request.remote_addr))
 
-    # Todo: The following is probably a problem elsewhere too.
-    # We stringify the variant, because a unicode string crashes
-    # Bio.Seq.reverse_complement in mapping.py:607.
-    variantchecker.check_variant(str(mutation_name), output)
+    variantchecker.check_variant(mutation_name, output)
 
     output.addMessage(__file__, -1, 'INFO',
                       'Finished request getGS(%s, %s, %s)'
@@ -955,11 +952,11 @@ def lovd_get_gs():
                                         standalone=1))
             else:
                 response = make_response(l[0])
-                response.headers['Content-Type'] = 'text/plain'
+                response.headers['Content-Type'] = 'text/plain; charset=utf-8'
                 return response
 
     response = make_response('Transcript not found')
-    response.headers['Content-Type'] = 'text/plain'
+    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
     return response
 
 
@@ -1041,7 +1038,7 @@ def lovd_variant_info():
         assembly = Assembly.by_name_or_alias(build)
     except NoResultFound:
         response = make_response('invalid build')
-        response.headers['Content-Type'] = 'text/plain'
+        response.headers['Content-Type'] = 'text/plain; charset=utf-8'
         return response
 
     converter = Converter(assembly, output)
@@ -1079,7 +1076,7 @@ def lovd_variant_info():
         response = re.sub('^Error \(.*\):', 'Error:', result)
 
     response = make_response(result)
-    response.headers['Content-Type'] = 'text/plain'
+    response.headers['Content-Type'] = 'text/plain; charset=utf-8'
     return response
 
 
diff --git a/requirements.txt b/requirements.txt
index ab361e7d283147c643757e10f9d2ee1e212d0c3e..63d953eace27346d46a9a1a03088b42509c7b87e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ pyparsing==2.0.1
 pytz==2013.9
 requests==2.2.1
 simplejson==3.3.3
--e git+https://github.com/LUMC/spyne.git@spyne-2.11.0-mutalyzer#egg=spyne
+spyne==2.11.0
 suds==0.4
 wsgiref==0.1.2
 xlrd==0.9.2
@@ -21,3 +21,5 @@ mock==1.0.1
 alembic==0.6.3
 Sphinx==1.2.1
 sphinx-rtd-theme==0.1.5
+cchardet==0.3.5
+Werkzeug==0.9.6
diff --git a/tests/data/batch_input.ods b/tests/data/batch_input.ods
new file mode 100644
index 0000000000000000000000000000000000000000..ea08744237a58f80386e041f23583e6555b459ed
Binary files /dev/null and b/tests/data/batch_input.ods differ
diff --git a/tests/data/batch_input.sxc b/tests/data/batch_input.sxc
new file mode 100644
index 0000000000000000000000000000000000000000..942282e2acc2e68f5ac7e496c0f48db6f2d1870b
Binary files /dev/null and b/tests/data/batch_input.sxc differ
diff --git a/tests/data/batch_input.xls b/tests/data/batch_input.xls
new file mode 100644
index 0000000000000000000000000000000000000000..e795855d7ae0856f3b91da7b2732245274073f75
Binary files /dev/null and b/tests/data/batch_input.xls differ
diff --git a/tests/data/batch_input.xlsx b/tests/data/batch_input.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..b2a5a87674b7eb49eed8c7ed53e227762e8bf17f
Binary files /dev/null and b/tests/data/batch_input.xlsx differ
diff --git a/tests/data/image.zip b/tests/data/image.zip
new file mode 100644
index 0000000000000000000000000000000000000000..df09158894dfb403f0edb2e5dc24a2749bee6c0d
Binary files /dev/null and b/tests/data/image.zip differ
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 595d72a663e3ec06a6df748f3d21e6aa4a8019ee..71b1ae1bfc7bba9bc17a56f8c1431f56b2eddde7 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -7,6 +7,8 @@ as :func:`hg19` must be called after the :func:`database` fixture).
 """
 
 
+from __future__ import unicode_literals
+
 import os
 import shutil
 
diff --git a/tests/old/lrgtest.py b/tests/old/lrgtest.py
index afeefc3324596c39bf3723f40d119a4f0df90d0c..d2dae2bca774fad39d9cdeb7bc8e888db7083075 100644
--- a/tests/old/lrgtest.py
+++ b/tests/old/lrgtest.py
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
 import sys, os, unittest, types
 
 #make it possible to import the Modules
diff --git a/tests/old/maptest.py b/tests/old/maptest.py
index 7f3105a46eac5c4af99025728b736fa08c49b4e1..40dc1d15dfee1df5b4cfcc46c2d6948dd6796423 100644
--- a/tests/old/maptest.py
+++ b/tests/old/maptest.py
@@ -1,3 +1,4 @@
+from __future__ import unicode_literals
 import sys, os, unittest, types
 
 #make it possible to import the Modules
diff --git a/tests/old/recordtest.py b/tests/old/recordtest.py
index d55bd58c0df440a4774ceaf59a44444f388740dd..a9cc9354557e3ba32f299bd23718cddab3ff7b46 100644
--- a/tests/old/recordtest.py
+++ b/tests/old/recordtest.py
@@ -2,6 +2,7 @@
 recordtest.py contains
     TestRecord - a BaseClass for testing GenRecord.Record instances
 """
+from __future__ import unicode_literals
 import unittest, types
 from Modules import GenRecord        #test class-types
 
@@ -56,7 +57,7 @@ class TestRecord(unittest.TestCase):
                     self.assertTrue(isinstance(plist,
                         (types.NoneType, GenRecord.PList)))
 
-                #self.assertTrue(any(map(isinstance, 
+                #self.assertTrue(any(map(isinstance,
 
 
     def _test_if_loc(self, loc):
@@ -76,7 +77,5 @@ class TestRecord(unittest.TestCase):
 
 
 if __name__ == "__main__":
-    # This file should be imported 
+    # This file should be imported
     pass
-
-
diff --git a/tests/test_crossmap.py b/tests/test_crossmap.py
index ff9d6d75928918b19d01b769b5a099d864408b11..990f93fe877dfcc6d8945187a4d559238f7f9a45 100644
--- a/tests/test_crossmap.py
+++ b/tests/test_crossmap.py
@@ -3,6 +3,8 @@ Tests for the Crossmap module.
 """
 
 
+from __future__ import unicode_literals
+
 #import logging; logging.basicConfig()
 
 from mutalyzer.Crossmap import Crossmap
diff --git a/tests/test_describe.py b/tests/test_describe.py
index 8315213eb49cc5c688d1d4816841dcc5c7dcb02b..e81c7ce45bf6dbb5776326d75e3f7f410179db6d 100644
--- a/tests/test_describe.py
+++ b/tests/test_describe.py
@@ -3,6 +3,8 @@ Tests for the mutalyzer.describe module.
 """
 
 
+from __future__ import unicode_literals
+
 #import logging; logging.basicConfig()
 import os
 
diff --git a/tests/test_grammar.py b/tests/test_grammar.py
index 1ebaa399e372155291f33ce1c6de21b22682c5ad..dad9a9c64c959cd91433b0324f0c7eb346f3e58c 100644
--- a/tests/test_grammar.py
+++ b/tests/test_grammar.py
@@ -3,6 +3,8 @@ Tests for the mutalyzer.grammar module.
 """
 
 
+from __future__ import unicode_literals
+
 #import logging; logging.basicConfig()
 import os
 
diff --git a/tests/test_mapping.py b/tests/test_mapping.py
index 5ebdc60e667cc3ec46cd46cda7c71e29561061ee..620f9d757f388579381edbf0eb3c64d032db51a3 100644
--- a/tests/test_mapping.py
+++ b/tests/test_mapping.py
@@ -3,6 +3,8 @@ Tests for the mapping module.
 """
 
 
+from __future__ import unicode_literals
+
 #import logging; logging.basicConfig()
 from sqlalchemy import or_
 
diff --git a/tests/test_mutator.py b/tests/test_mutator.py
index 36c5b8d152ebfa553e859b9ef11dae3e3a40bd43..05e2c685fb33f29978839b17236c933c4b232016 100644
--- a/tests/test_mutator.py
+++ b/tests/test_mutator.py
@@ -3,6 +3,8 @@ Tests for the mutalyzer.mutator module.
 """
 
 
+from __future__ import unicode_literals
+
 #import logging; logging.basicConfig()
 import re
 import os
@@ -666,7 +668,7 @@ class TestMutator(MutalyzerTest):
         """
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 2)
-        assert str(m.mutated) == str(Seq('ACGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ACGATCG'))
 
     def test_largedel(self):
         """
@@ -674,7 +676,7 @@ class TestMutator(MutalyzerTest):
         """
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 7)
-        assert str(m.mutated) == str(Seq('AG'))
+        assert unicode(m.mutated) == unicode(Seq('AG'))
 
     def test_ins(self):
         """
@@ -682,7 +684,7 @@ class TestMutator(MutalyzerTest):
         """
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'A')
-        assert str(m.mutated) == str(Seq('ATACGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATACGATCG'))
 
     def test_largeins(self):
         """
@@ -690,7 +692,7 @@ class TestMutator(MutalyzerTest):
         """
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'ATCG')
-        assert str(m.mutated) == str(Seq('ATATCGCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATATCGCGATCG'))
 
     def test_sub(self):
         """
@@ -698,7 +700,7 @@ class TestMutator(MutalyzerTest):
         """
         m = self._mutator(Seq('ATCGATCG'))
         m.substitution(3, 'G')
-        assert str(m.mutated) == str(Seq('ATGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGGATCG'))
 
     def test_adjecent_del_sub_1(self):
         """
@@ -709,7 +711,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 2)
         m.substitution(3, 'G')
-        assert str(m.mutated) == str(Seq('AGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGGATCG'))
 
     def test_adjecent_del_sub_2(self):
         """
@@ -718,7 +720,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(3, 3)
         m.substitution(2, 'G')
-        assert str(m.mutated) == str(Seq('AGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGGATCG'))
 
     def test_near_adjecent_del_sub_1(self):
         """
@@ -727,7 +729,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 2)
         m.substitution(4, 'T')
-        assert str(m.mutated) == str(Seq('ACTATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ACTATCG'))
 
     def test_near_adjecent_del_sub_2(self):
         """
@@ -736,7 +738,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(4, 4)
         m.substitution(2, 'G')
-        assert str(m.mutated) == str(Seq('AGCATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGCATCG'))
 
     def test_adjecent_largedel_sub_1(self):
         """
@@ -746,7 +748,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 6)
         m.substitution(7, 'T')
-        assert str(m.mutated) == str(Seq('ATG'))
+        assert unicode(m.mutated) == unicode(Seq('ATG'))
 
     def test_adjecent_largedel_sub_2(self):
         """
@@ -756,7 +758,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(3, 7)
         m.substitution(2, 'C')
-        assert str(m.mutated) == str(Seq('ACG'))
+        assert unicode(m.mutated) == unicode(Seq('ACG'))
 
     def test_near_adjecent_largedel_sub_1(self):
         """
@@ -765,7 +767,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 5)
         m.substitution(7, 'T')
-        assert str(m.mutated) == str(Seq('ATTG'))
+        assert unicode(m.mutated) == unicode(Seq('ATTG'))
 
     def test_near_adjecent_largedel_sub_2(self):
         """
@@ -774,7 +776,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(4, 7)
         m.substitution(2, 'C')
-        assert str(m.mutated) == str(Seq('ACCG'))
+        assert unicode(m.mutated) == unicode(Seq('ACCG'))
 
     def test_adjectent_del_ins_1(self):
         """
@@ -783,7 +785,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 2)
         m.insertion(2, 'G')
-        assert str(m.mutated) == str(Seq('AGCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGCGATCG'))
 
     def test_adjectent_del_ins_2(self):
         """
@@ -792,7 +794,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(3, 3)
         m.insertion(2, 'A')
-        assert str(m.mutated) == str(Seq('ATAGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATAGATCG'))
 
     def test_near_adjectent_del_ins(self):
         """
@@ -801,7 +803,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 2)
         m.insertion(3, 'T')
-        assert str(m.mutated) == str(Seq('ACTGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ACTGATCG'))
 
     def test_adjecent_ins_sub_1(self):
         """
@@ -811,7 +813,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'A')
         m.substitution(3, 'G')
-        assert str(m.mutated) == str(Seq('ATAGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATAGGATCG'))
 
     def test_adjecent_ins_sub_2(self):
         """
@@ -821,7 +823,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'A')
         m.substitution(2, 'G')
-        assert str(m.mutated) == str(Seq('AGACGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGACGATCG'))
 
     def test_near_adjecent_ins_sub(self):
         """
@@ -831,7 +833,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'A')
         m.substitution(4, 'T')
-        assert str(m.mutated) == str(Seq('ATACTATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATACTATCG'))
 
     def test_adjecent_largeins_sub_1(self):
         """
@@ -841,7 +843,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'ATCG')
         m.substitution(3, 'G')
-        assert str(m.mutated) == str(Seq('ATATCGGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATATCGGGATCG'))
 
     def test_adjecent_largeins_sub_2(self):
         """
@@ -851,7 +853,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'ATCG')
         m.substitution(2, 'G')
-        assert str(m.mutated) == str(Seq('AGATCGCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGATCGCGATCG'))
 
     def test_near_adjecent_largeins_sub(self):
         """
@@ -861,7 +863,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'ATCG')
         m.substitution(4, 'T')
-        assert str(m.mutated) == str(Seq('ATATCGCTATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATATCGCTATCG'))
 
     def test_adjecent_del_del_1(self):
         """
@@ -870,7 +872,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 2)
         m.deletion(3, 3)
-        assert str(m.mutated) == str(Seq('AGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGATCG'))
 
     def test_adjecent_del_del_2(self):
         """
@@ -879,7 +881,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(3, 3)
         m.deletion(2, 2)
-        assert str(m.mutated) == str(Seq('AGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGATCG'))
 
     def test_adjecent_delins_snp_1(self):
         """
@@ -888,7 +890,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 2, 'A')
         m.substitution(3, 'G')
-        assert str(m.mutated) == str(Seq('AAGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAGGATCG'))
 
     def test_adjecent_delins_snp_2(self):
         """
@@ -897,7 +899,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 3, 'A')
         m.substitution(2, 'G')
-        assert str(m.mutated) == str(Seq('AGAGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGAGATCG'))
 
     def test_adjecent_largedelins_eq_snp_1(self):
         """
@@ -907,7 +909,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAAAA')
         m.substitution(7, 'G')
-        assert str(m.mutated) == str(Seq('AAAAAAGG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAGG'))
 
     def test_adjecent_largedelins_min_snp_1(self):
         """
@@ -917,7 +919,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAA')
         m.substitution(7, 'G')
-        assert str(m.mutated) == str(Seq('AAAAGG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAGG'))
 
     def test_adjecent_largedelins_plus_snp_1(self):
         """
@@ -927,7 +929,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAAAAAA')
         m.substitution(7, 'G')
-        assert str(m.mutated) == str(Seq('AAAAAAAAGG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAAAGG'))
 
     def test_adjecent_largedelins_eq_snp_2(self):
         """
@@ -937,7 +939,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAAAA')
         m.substitution(2, 'G')
-        assert str(m.mutated) == str(Seq('AGAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AGAAAAAG'))
 
     def test_adjecent_largedelins_min_snp_2(self):
         """
@@ -947,7 +949,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAA')
         m.substitution(2, 'G')
-        assert str(m.mutated) == str(Seq('AGAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AGAAAG'))
 
     def test_adjecent_largedelins_plus_snp_2(self):
         """
@@ -957,7 +959,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAAAAAA')
         m.substitution(2, 'G')
-        assert str(m.mutated) == str(Seq('AGAAAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AGAAAAAAAG'))
 
     def test_adjecent_delins_del_1(self):
         """
@@ -966,7 +968,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 2, 'A')
         m.deletion(3, 3)
-        assert str(m.mutated) == str(Seq('AAGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAGATCG'))
 
     def test_adjecent_delins_del_2(self):
         """
@@ -975,7 +977,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 3, 'A')
         m.deletion(2, 2)
-        assert str(m.mutated) == str(Seq('AAGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAGATCG'))
 
     def test_adjecent_largedelins_eq_del_1(self):
         """
@@ -985,7 +987,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAAAA')
         m.deletion(7, 7)
-        assert str(m.mutated) == str(Seq('AAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAG'))
 
     def test_adjecent_largedelins_min_del_1(self):
         """
@@ -995,7 +997,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAA')
         m.deletion(7, 7)
-        assert str(m.mutated) == str(Seq('AAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAG'))
 
     def test_adjecent_largedelins_plus_del_1(self):
         """
@@ -1005,7 +1007,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAAAAAA')
         m.deletion(7, 7)
-        assert str(m.mutated) == str(Seq('AAAAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAAAG'))
 
     def test_adjecent_largedelins_eq_del_2(self):
         """
@@ -1015,7 +1017,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAAAA')
         m.deletion(2, 2)
-        assert str(m.mutated) == str(Seq('AAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAG'))
 
     def test_adjecent_largedelins_min_del_2(self):
         """
@@ -1025,7 +1027,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAA')
         m.deletion(2, 2)
-        assert str(m.mutated) == str(Seq('AAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAG'))
 
     def test_adjecent_largedelins_plus_del_2(self):
         """
@@ -1035,7 +1037,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAAAAAA')
         m.deletion(2, 2)
-        assert str(m.mutated) == str(Seq('AAAAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAAAG'))
 
     def test_adjectent_delins_ins_1(self):
         """
@@ -1044,7 +1046,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 2, 'A')
         m.insertion(2, 'G')
-        assert str(m.mutated) == str(Seq('AAGCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAGCGATCG'))
 
     def test_adjectent_delins_ins_2(self):
         """
@@ -1053,7 +1055,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 3, 'A')
         m.insertion(2, 'G')
-        assert str(m.mutated) == str(Seq('ATGAGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGAGATCG'))
 
     def test_adjectent_largedelins_eq_ins_1(self):
         """
@@ -1062,7 +1064,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAAAA')
         m.insertion(6, 'G')
-        assert str(m.mutated) == str(Seq('AAAAAAGCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAGCG'))
 
     def test_adjectent_largedelins_min_ins_1(self):
         """
@@ -1071,7 +1073,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAA')
         m.insertion(6, 'G')
-        assert str(m.mutated) == str(Seq('AAAAGCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAGCG'))
 
     def test_adjectent_largedelins_plus_ins_1(self):
         """
@@ -1080,7 +1082,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAAAAAA')
         m.insertion(6, 'G')
-        assert str(m.mutated) == str(Seq('AAAAAAAAGCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAAAGCG'))
 
     def test_adjectent_largedelins_eq_ins_2(self):
         """
@@ -1089,7 +1091,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAAAA')
         m.insertion(2, 'G')
-        assert str(m.mutated) == str(Seq('ATGAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGAAAAAG'))
 
     def test_adjectent_largedelins_min_ins_2(self):
         """
@@ -1098,7 +1100,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAA')
         m.insertion(2, 'G')
-        assert str(m.mutated) == str(Seq('ATGAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGAAAG'))
 
     def test_adjectent_largedelins_plus_ins_2(self):
         """
@@ -1107,7 +1109,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAAAAAA')
         m.insertion(2, 'G')
-        assert str(m.mutated) == str(Seq('ATGAAAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGAAAAAAAG'))
 
     def test_adjectent_delins_del_delins(self):
         """
@@ -1116,7 +1118,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 3, 'A')
         m.delins(4, 4, 'T')
-        assert str(m.mutated) == str(Seq('AATATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AATATCG'))
 
     def test_adjectent_largedelins_plus_delins_1(self):
         """
@@ -1125,7 +1127,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAAAAAA')
         m.delins(7, 7, 'T')
-        assert str(m.mutated) == str(Seq('AAAAAAAATG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAAAAAATG'))
 
     def test_adjectent_largedelins_plus_delins_2(self):
         """
@@ -1134,7 +1136,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAAAAAA')
         m.delins(2, 2, 'C')
-        assert str(m.mutated) == str(Seq('ACAAAAAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('ACAAAAAAAG'))
 
     def test_adjectent_largedelins_min_delins_1(self):
         """
@@ -1143,7 +1145,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(2, 6, 'AAA')
         m.delins(7, 7, 'T')
-        assert str(m.mutated) == str(Seq('AAAATG'))
+        assert unicode(m.mutated) == unicode(Seq('AAAATG'))
 
     def test_adjectent_largedelins_min_delins_2(self):
         """
@@ -1152,7 +1154,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.delins(3, 7, 'AAA')
         m.delins(2, 2, 'C')
-        assert str(m.mutated) == str(Seq('ACAAAG'))
+        assert unicode(m.mutated) == unicode(Seq('ACAAAG'))
 
     def test_adjectent_del_dup_1(self):
         """
@@ -1161,7 +1163,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 2)
         m.duplication(3, 3)
-        assert str(m.mutated) == str(Seq('ACCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ACCGATCG'))
 
     def test_adjectent_del_dup_2(self):
         """
@@ -1170,7 +1172,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(3, 3)
         m.duplication(2, 2)
-        assert str(m.mutated) == str(Seq('ATTGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATTGATCG'))
 
     def test_adjectent_ins_dup_1(self):
         """
@@ -1179,7 +1181,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'G')
         m.duplication(3, 3)
-        assert str(m.mutated) == str(Seq('ATGCCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGCCGATCG'))
 
     def test_adjectent_ins_dup_2(self):
         """
@@ -1188,7 +1190,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'G')
         m.duplication(2, 2)
-        assert str(m.mutated) == str(Seq('ATTGCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATTGCGATCG'))
 
     def test_adjectent_ins_ins_1(self):
         """
@@ -1197,7 +1199,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'G')
         m.insertion(3, 'A')
-        assert str(m.mutated) == str(Seq('ATGCAGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGCAGATCG'))
 
     def test_adjectent_ins_ins_2(self):
         """
@@ -1206,7 +1208,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(3, 'A')
         m.insertion(2, 'G')
-        assert str(m.mutated) == str(Seq('ATGCAGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGCAGATCG'))
 
     def test_ins_ins(self):
         """
@@ -1215,7 +1217,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'G')
         m.insertion(2, 'A')
-        assert str(m.mutated) in (str(Seq('ATGACGATCG')), str(Seq('ATAGCGATCG')))
+        assert unicode(m.mutated) in (unicode(Seq('ATGACGATCG')), unicode(Seq('ATAGCGATCG')))
 
     def test_adjecent_inv_inv_1(self):
         """
@@ -1224,7 +1226,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.inversion(2, 2)
         m.inversion(3, 3)
-        assert str(m.mutated) == str(Seq('AAGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAGGATCG'))
 
     def test_adjecent_inv_inv_2(self):
         """
@@ -1233,7 +1235,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.inversion(3, 3)
         m.inversion(2, 2)
-        assert str(m.mutated) == str(Seq('AAGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAGGATCG'))
 
     def test_adjecent_dup_dup_1(self):
         """
@@ -1242,7 +1244,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.duplication(2, 2)
         m.duplication(3, 3)
-        assert str(m.mutated) == str(Seq('ATTCCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATTCCGATCG'))
 
     def test_adjecent_dup_dup_2(self):
         """
@@ -1251,7 +1253,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.duplication(3, 3)
         m.duplication(2, 2)
-        assert str(m.mutated) == str(Seq('ATTCCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATTCCGATCG'))
 
     def test_adjecent_del_inv_1(self):
         """
@@ -1260,7 +1262,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(2, 2)
         m.inversion(3, 3)
-        assert str(m.mutated) == str(Seq('AGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AGGATCG'))
 
     def test_adjecent_del_inv_2(self):
         """
@@ -1269,7 +1271,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.deletion(3, 3)
         m.inversion(2, 2)
-        assert str(m.mutated) == str(Seq('AAGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAGATCG'))
 
     def test_adjecent_ins_inv_1(self):
         """
@@ -1278,7 +1280,7 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'G')
         m.inversion(3, 3)
-        assert str(m.mutated) == str(Seq('ATGGGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('ATGGGATCG'))
 
     def test_adjecent_ins_inv_2(self):
         """
@@ -1287,4 +1289,4 @@ class TestMutator(MutalyzerTest):
         m = self._mutator(Seq('ATCGATCG'))
         m.insertion(2, 'G')
         m.inversion(2, 2)
-        assert str(m.mutated) == str(Seq('AAGCGATCG'))
+        assert unicode(m.mutated) == unicode(Seq('AAGCGATCG'))
diff --git a/tests/test_parsers_genbank.py b/tests/test_parsers_genbank.py
index 7640c496af9aef1c871fa313b69e2ef836d1aace..f04b883971617ee9885ba3478bd667c674189650 100644
--- a/tests/test_parsers_genbank.py
+++ b/tests/test_parsers_genbank.py
@@ -3,6 +3,8 @@ Tests for the mutalyzer.parsers.genbank module.
 """
 
 
+from __future__ import unicode_literals
+
 #import logging; logging.basicConfig()
 
 from mutalyzer.parsers import genbank
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
index fc5e4abe498469e83f4986b40fb68967fd165d86..791f867ddad19b9a71ac333726e5f13ade37d782 100644
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -3,16 +3,18 @@ Tests for the Scheduler module.
 """
 
 
+from __future__ import unicode_literals
+
 import bz2
 import os
-import StringIO
+import io
 
 #import logging; logging.basicConfig()
 from Bio import Entrez
 from mock import patch
 
 from mutalyzer.config import settings
-from mutalyzer.db.models import BatchJob, BatchQueueItem
+from mutalyzer.db.models import BatchJob
 from mutalyzer import File
 from mutalyzer import output
 from mutalyzer import Scheduler
@@ -28,12 +30,10 @@ class TestScheduler(MutalyzerTest):
     """
     fixtures = (database, )
 
-    @staticmethod
-    def _batch_job(variants, expected, job_type, argument=None):
+    def _batch_job(self, batch_file, expected, job_type, argument=None):
         file_instance = File.File(output.Output('test'))
         scheduler = Scheduler.Scheduler()
 
-        batch_file = StringIO.StringIO('\n'.join(variants) + '\n')
         job, columns = file_instance.parseBatchFile(batch_file)
         result_id = scheduler.addJob('test@test.test', job, columns,
                                      job_type, argument=argument)
@@ -41,7 +41,7 @@ class TestScheduler(MutalyzerTest):
         batch_job = BatchJob.query.filter_by(result_id=result_id).one()
 
         left = batch_job.batch_queue_items.count()
-        assert left == len(variants)
+        assert left == len(expected)
 
         scheduler.process()
 
@@ -49,11 +49,16 @@ class TestScheduler(MutalyzerTest):
         assert left == 0
 
         filename = 'batch-job-%s.txt' % result_id
-        result = open(os.path.join(settings.CACHE_DIR, filename))
+        result = io.open(os.path.join(settings.CACHE_DIR, filename),
+                         encoding='utf-8')
 
         next(result) # Header.
         assert expected == [line.strip().split('\t') for line in result]
 
+    def _batch_job_plain_text(self, variants, expected, job_type, argument=None):
+        batch_file = io.BytesIO(('\n'.join(variants) + '\n').encode('utf-8'))
+        self._batch_job(batch_file, expected, job_type, argument=argument)
+
     def test_syntax_checker(self):
         """
         Simple syntax checker batch job.
@@ -64,7 +69,7 @@ class TestScheduler(MutalyzerTest):
                      'OK'],
                     ['AL449423.14(CDKN2A_v002):c.5_400del',
                      'OK']]
-        self._batch_job(variants, expected, 'syntax-checker')
+        self._batch_job_plain_text(variants, expected, 'syntax-checker')
 
     @fix(cache('AB026906.1', 'NM_000059.3'))
     def test_name_checker(self):
@@ -110,7 +115,7 @@ class TestScheduler(MutalyzerTest):
                      'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)',
                      '',
                      'BspHI,CviAII,FatI,Hpy188III,NlaIII']]
-        self._batch_job(variants, expected, 'name-checker')
+        self._batch_job_plain_text(variants, expected, 'name-checker')
 
     def test_name_checker_altered(self):
         """
@@ -187,7 +192,7 @@ class TestScheduler(MutalyzerTest):
             return bz2.BZ2File(path)
 
         with patch.object(Entrez, 'efetch', mock_efetch):
-            self._batch_job(variants, expected, 'name-checker')
+            self._batch_job_plain_text(variants, expected, 'name-checker')
 
     @fix(cache('NM_000059.3'))
     def test_name_checker_skipped(self):
@@ -228,7 +233,7 @@ class TestScheduler(MutalyzerTest):
             raise IOError()
 
         with patch.object(Entrez, 'efetch', mock_efetch):
-            self._batch_job(variants, expected, 'name-checker')
+            self._batch_job_plain_text(variants, expected, 'name-checker')
 
     @fix(hg19, hg19_transcript_mappings)
     def test_position_converter(self):
@@ -242,4 +247,89 @@ class TestScheduler(MutalyzerTest):
                      'NM_003002.2:c.274G>T',
                      'NM_012459.2:c.-2203C>A',
                      'NR_028383.1:n.-2173C>A']]
-        self._batch_job(variants, expected, 'position-converter', 'hg19')
+        self._batch_job_plain_text(variants, expected, 'position-converter', 'hg19')
+
+    def test_ods_file(self):
+        """
+        OpenDocument Spreadsheet input for batch job.
+        """
+        path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                            'data',
+                            'batch_input.ods')
+        batch_file = open(path, 'rb')
+        expected = [['AB026906.1:c.274G>T',
+                     'OK'],
+                    ['AL449423.14(CDKN2A_v002):c.5_400del',
+                     'OK']]
+
+        self._batch_job(batch_file, expected, 'syntax-checker')
+
+    def test_sxc_file(self):
+        """
+        OpenOffice.org 1.x Calc spreadsheet input for batch job.
+        """
+        path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                            'data',
+                            'batch_input.sxc')
+        batch_file = open(path, 'rb')
+        expected = [['AB026906.1:c.274G>T',
+                     'OK'],
+                    ['AL449423.14(CDKN2A_v002):c.5_400del',
+                     'OK']]
+
+        self._batch_job(batch_file, expected, 'syntax-checker')
+
+    def test_xls_file(self):
+        """
+        Microsoft Excel 97/2000/XP/2003 input for batch job.
+        """
+        path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                            'data',
+                            'batch_input.xls')
+        batch_file = open(path, 'rb')
+        expected = [['AB026906.1:c.274G>T',
+                     'OK'],
+                    ['AL449423.14(CDKN2A_v002):c.5_400del',
+                     'OK']]
+
+        self._batch_job(batch_file, expected, 'syntax-checker')
+
+    def test_xlsx_file(self):
+        """
+        Office Open XML Spreadsheet input for batch job.
+        """
+        path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                            'data',
+                            'batch_input.xlsx')
+        batch_file = open(path, 'rb')
+        expected = [['AB026906.1:c.274G>T',
+                     'OK'],
+                    ['AL449423.14(CDKN2A_v002):c.5_400del',
+                     'OK']]
+
+        self._batch_job(batch_file, expected, 'syntax-checker')
+
+    def test_invalid_zip_file(self):
+        """
+        Random zip file input for batch job (invalid).
+        """
+        path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                            'data',
+                            'image.zip')
+        batch_file = open(path, 'rb')
+
+        file_instance = File.File(output.Output('test'))
+        job, columns = file_instance.parseBatchFile(batch_file)
+        assert job is None
+
+    def test_unicode_input(self):
+        """
+        Simple input with some non-ASCII unicode characters.
+        """
+        variants = ['\u2026AB026906.1:c.274G>T',
+                    '\u2026AL449423.14(CDKN2A_v002):c.5_400del']
+        expected = [['\u2026AB026906.1:c.274G>T',
+                     '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'],
+                    ['\u2026AL449423.14(CDKN2A_v002):c.5_400del',
+                     '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']]
+        self._batch_job_plain_text(variants, expected, 'syntax-checker')
diff --git a/tests/test_services_json.py b/tests/test_services_json.py
index ce029ba764fab2c7cadd84ea730671abca41cca4..81833505e36ecee7436bde0f956e579ecd82c00e 100644
--- a/tests/test_services_json.py
+++ b/tests/test_services_json.py
@@ -3,10 +3,13 @@ Tests for the JSON interface to Mutalyzer.
 """
 
 
+from __future__ import unicode_literals
+
 import simplejson as json
 from spyne.server.null import NullServer
 import mutalyzer
 from mutalyzer import announce
+from mutalyzer import Scheduler
 from mutalyzer.services.json import application
 
 from fixtures import database, hg19, hg19_transcript_mappings
@@ -77,7 +80,7 @@ class TestServicesJson(MutalyzerTest):
         Running the info method should give us some version information.
         """
         r = self._call('info')
-        assert type(r['versionParts']) == list
+        assert isinstance(r['versionParts'], list)
         assert r['version'] == mutalyzer.__version__
 
     def test_info_announcement(self):
@@ -86,14 +89,56 @@ class TestServicesJson(MutalyzerTest):
         """
         announce.set_announcement('Test announcement')
         r = self._call('info')
-        assert type(r['announcement']) == str
+        assert isinstance(r['announcement'], unicode)
         assert r['announcement'] == 'Test announcement'
 
         announce.set_announcement('New announcement')
         r = self._call('info')
-        assert type(r['announcement']) == str
+        assert isinstance(r['announcement'], unicode)
         assert r['announcement'] == 'New announcement'
 
         announce.unset_announcement()
         r = self._call('info')
         assert not r.get('announcement')
+
+    def test_checksyntax_unicode(self):
+        """
+        Run checkSyntax with an invalid variant description containing
+        non-ASCII unicode characters.
+        """
+        r = self._call('checkSyntax', 'La Pe\xf1a')
+        assert r['valid'] == False
+        assert len(r['messages']) == 1
+        assert r['messages'][0]['errorcode'] == 'EPARSE'
+        assert r['messages'][0]['message'] ==  'Expected W:(0123...) (at char 2), (line:1, col:3)'
+
+    @fix(database)
+    def test_batchjob_unicode(self):
+        """
+        Submit a batch job with non-ASCII unicode characters in the input
+        file.
+        """
+        variants = ['\u2026AB026906.1:c.274G>T',
+                    '\u2026AL449423.14(CDKN2A_v002):c.5_400del']
+        expected = [['\u2026AB026906.1:c.274G>T',
+                     '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'],
+                    ['\u2026AL449423.14(CDKN2A_v002):c.5_400del',
+                     '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']]
+
+        data = '\n'.join(variants) + '\n' #.encode('base64')
+
+        result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker')
+        job_id = unicode(result)
+
+        result = self._call('monitorBatchJob', job_id)
+        assert int(result) == len(variants)
+
+        scheduler = Scheduler.Scheduler()
+        scheduler.process()
+
+        result = self._call('monitorBatchJob', job_id)
+        assert int(result) == 0
+
+        result = self._call('getBatchJob', job_id)
+        result = result.decode('base64').decode('utf-8').strip().split('\n')[1:]
+        assert expected == [line.split('\t') for line in result]
diff --git a/tests/test_services_soap.py b/tests/test_services_soap.py
index cc1ce8c00320164293fb03ac66f662b6e454941c..0a85844d07c0f5a95bcf9e00b1dbc183a591f6bf 100644
--- a/tests/test_services_soap.py
+++ b/tests/test_services_soap.py
@@ -3,6 +3,8 @@ Tests for the SOAP interface to Mutalyzer.
 """
 
 
+from __future__ import unicode_literals
+
 import bz2
 import datetime
 import logging
@@ -539,8 +541,8 @@ class TestServicesSoap(MutalyzerTest):
                     'AL449423.14(CDKN2A_v002):c.5_400del']
         data = '\n'.join(variants) + '\n' #.encode('base64')
 
-        result = self._call('submitBatchJob', data, 'NameChecker')
-        job_id = str(result)
+        result = self._call('submitBatchJob', data.encode('utf-8'), 'NameChecker')
+        job_id = unicode(result)
 
         result = self._call('monitorBatchJob', job_id)
         assert int(result) == len(variants)
@@ -564,8 +566,8 @@ class TestServicesSoap(MutalyzerTest):
                     'AL449423.14(CDKN2A_v002):c.5_400del']
         data = '\n'.join(variants) + '\n'
 
-        result = self._call('submitBatchJob', data, 'SyntaxChecker')
-        job_id = str(result)
+        result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker')
+        job_id = unicode(result)
 
         result = self._call('monitorBatchJob', job_id)
         assert int(result) == len(variants)
@@ -586,8 +588,8 @@ class TestServicesSoap(MutalyzerTest):
                     'AL449423.14(CDKN2A_v002):c.5_400del']
         data = '\r'.join(variants) + '\r'
 
-        result = self._call('submitBatchJob', data, 'SyntaxChecker')
-        job_id = str(result)
+        result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker')
+        job_id = unicode(result)
 
         result = self._call('monitorBatchJob', job_id)
         assert int(result) == len(variants)
@@ -608,8 +610,8 @@ class TestServicesSoap(MutalyzerTest):
                     'AL449423.14(CDKN2A_v002):c.5_400del']
         data = '\r\n'.join(variants) + '\r\n'
 
-        result = self._call('submitBatchJob', data, 'SyntaxChecker')
-        job_id = str(result)
+        result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker')
+        job_id = unicode(result)
 
         result = self._call('monitorBatchJob', job_id)
         assert int(result) == len(variants)
@@ -640,7 +642,7 @@ facilisi."""
             data += data
 
         try:
-            self._call('submitBatchJob', data.encode('base64'), 'NameChecker')
+            self._call('submitBatchJob', data.encode('utf-8'), 'NameChecker')
             assert False
         except Fault as e:
             # - senv:Client.RequestTooLong: Raised by Spyne, depending on
@@ -661,9 +663,51 @@ facilisi."""
             data = f.read()
 
         result = self._call('uploadGenBankLocalFile', data)
-        ud = str(result)
+        ud = unicode(result)
 
         r = self._call('runMutalyzer', ud + '(SDHD):g.7872G>T')
         assert r.errors == 0
         assert r.genomicDescription == ud + ':g.7872G>T'
         assert ud + '(SDHD_v001):c.274G>T' in r.transcriptDescriptions.string
+
+    def test_checksyntax_unicode(self):
+        """
+        Run checkSyntax with an invalid variant description containing
+        non-ASCII unicode characters.
+        """
+        r = self._call('checkSyntax', 'La Pe\xf1a')
+        assert r.valid == False
+        assert len(r.messages.SoapMessage) == 1
+        assert r.messages.SoapMessage[0]['errorcode'] == 'EPARSE'
+        assert r.messages.SoapMessage[0]['message'] ==  'Expected W:(0123...) (at char 2), (line:1, col:3)'
+
+    @fix(database)
+    def test_batchjob_unicode(self):
+        """
+        Submit a batch job with non-ASCII unicode characters in the input
+        file.
+        """
+        variants = ['\u2026AB026906.1:c.274G>T',
+                    '\u2026AL449423.14(CDKN2A_v002):c.5_400del']
+        expected = [['\u2026AB026906.1:c.274G>T',
+                     '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'],
+                    ['\u2026AL449423.14(CDKN2A_v002):c.5_400del',
+                     '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']]
+
+        data = '\n'.join(variants) + '\n' #.encode('base64')
+
+        result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker')
+        job_id = unicode(result)
+
+        result = self._call('monitorBatchJob', job_id)
+        assert int(result) == len(variants)
+
+        scheduler = Scheduler.Scheduler()
+        scheduler.process()
+
+        result = self._call('monitorBatchJob', job_id)
+        assert int(result) == 0
+
+        result = self._call('getBatchJob', job_id)
+        result = result.decode('base64').decode('utf-8').strip().split('\n')[1:]
+        assert expected == [line.split('\t') for line in result]
diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py
index 1b30786b27730bdc91ac5b39785c0f6fa9625d28..8c19421a9f0b8c891908b316d162a007b3d2733b 100644
--- a/tests/test_variantchecker.py
+++ b/tests/test_variantchecker.py
@@ -3,6 +3,8 @@ Tests for the variantchecker module.
 """
 
 
+from __future__ import unicode_literals
+
 #import logging; logging.basicConfig()
 
 from mutalyzer.output import Output
diff --git a/tests/test_website.py b/tests/test_website.py
index e579433a18f321a2fb2784530b8381111bc9b3e6..fd0f02e7725b2cd1dc53b6231a9ac01d70a4caca 100644
--- a/tests/test_website.py
+++ b/tests/test_website.py
@@ -5,23 +5,19 @@ Tests for the WSGI interface to Mutalyzer.
 """
 
 
+from __future__ import unicode_literals
+
 #import logging; logging.basicConfig()
 import bz2
-import cgi
-import logging
 from mock import patch
 import os
-import re
-from StringIO import StringIO
-import time
-import urllib
-import urllib2
+from io import BytesIO
 
 from Bio import Entrez
 import lxml.html
 
-import mutalyzer
 from mutalyzer import announce, Scheduler
+from mutalyzer.db import models
 from mutalyzer.website import create_app
 
 from fixtures import cache, database, hg19, hg19_transcript_mappings
@@ -264,7 +260,7 @@ class TestWebsite(MutalyzerTest):
         """
         data = {'job_type': job_type,
                 'email': 'test@test.test',
-                'file': (StringIO(file), 'test.txt')}
+                'file': (BytesIO(file.encode('utf-8')), 'test.txt')}
         if assembly_name_or_alias is not None:
             data['assembly_name_or_alias'] = assembly_name_or_alias
 
@@ -510,7 +506,7 @@ class TestWebsite(MutalyzerTest):
         Download a C# example client for the web service.
         """
         r = self.app.get('/downloads/client-mono.cs')
-        assert r.headers['Content-Type'] == 'text/plain'
+        assert 'text/plain' in r.headers['Content-Type']
         assert 'public static void Main(String [] args) {' in r.data
 
     def test_download_php(self):
@@ -634,7 +630,7 @@ class TestWebsite(MutalyzerTest):
                                        'build': 'hg19',
                                        'acc': 'NM_203473.1'})
         assert 'text/plain' in r.headers['Content-Type']
-        assert r.content_type == 'text/plain'
+        assert 'text/plain' in r.content_type
         expected = '\n'.join(['-158', '1709', '1371'])
         assert r.data == expected
 
@@ -678,7 +674,7 @@ class TestWebsite(MutalyzerTest):
         """
         r = self.app.post('/reference-loader',
                           data={'method': 'upload',
-                                'file': (StringIO('this is not a genbank file'), 'AB026906.1.gb')})
+                                'file': (BytesIO('this is not a genbank file'.encode('utf-8')), 'AB026906.1.gb')})
         assert 'Your reference sequence was loaded successfully.' not in r.data
         assert 'The file could not be parsed.' in r.data
 
@@ -737,3 +733,89 @@ class TestWebsite(MutalyzerTest):
         assert 'text/plain' in r.headers['Content-Type']
         assert '\t'.join(['chrX', '154157690', '154157691', '4374A>T', '0', '-']) in r.data
         assert '\t'.join(['chrX', '154157683', '154157685', '4380_4381del', '0', '-']) in r.data
+
+    def test_checksyntax_unicode(self):
+        """
+        Run check syntax form with an invalid variant description containing
+        non-ASCII unicode characters.
+        """
+        r = self.app.get('/syntax-checker',
+                         query_string={'description': 'La Pe\xf1a'})
+        body = r.get_data(as_text=True)
+        assert 'Fatal' in body
+        assert 'Details of the parse error' in body
+        assert 'Expected W:(0123...) (at char 2), (line:1, col:3)' in body
+
+    @fix(database)
+    def test_batch_unicode(self):
+        """
+        Submit a batch form with non-ASCII unicode characters in the input
+        file.
+        """
+        file = '\n'.join(['\u2026AB026906.1:c.274G>T',
+                          '\u2026AL449423.14(CDKN2A_v002):c.5_400del'])
+        expected = [['\u2026AB026906.1:c.274G>T',
+                     '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'],
+                    ['\u2026AL449423.14(CDKN2A_v002):c.5_400del',
+                     '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']]
+
+        data = {'job_type': 'syntax-checker',
+                'email': 'test@test.test',
+                'file': (BytesIO(file.encode('utf-8')), 'test.txt')}
+
+        r = self.app.post('/batch-jobs',
+                          data=data)
+        progress_url = '/' + r.location.split('/')[-1]
+
+        assert models.BatchJob.query.first().email == 'test@test.test'
+
+        scheduler = Scheduler.Scheduler()
+        scheduler.process()
+
+        r = self.app.get(progress_url)
+
+        dom = lxml.html.fromstring(r.data)
+        result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href']
+
+        r = self.app.get(result_url)
+        assert 'text/plain' in r.headers['Content-Type']
+
+        result = r.get_data(as_text=True).strip().split('\n')[1:]
+        assert expected == [line.split('\t') for line in result]
+
+    @fix(database)
+    def test_batch_unicode_email(self):
+        """
+        Submit a batch form with non-ASCII unicode characters in the email
+        address.
+        """
+        file = '\n'.join(['AB026906.1:c.274G>T',
+                          'AL449423.14(CDKN2A_v002):c.5_400del'])
+        expected = [['AB026906.1:c.274G>T',
+                     'OK'],
+                    ['AL449423.14(CDKN2A_v002):c.5_400del',
+                     'OK']]
+
+        data = {'job_type': 'syntax-checker',
+                'email': 'pe\xf1a@test.test',
+                'file': (BytesIO(file.encode('utf-8')), 'test.txt')}
+
+        r = self.app.post('/batch-jobs',
+                          data=data)
+        progress_url = '/' + r.location.split('/')[-1]
+
+        assert models.BatchJob.query.first().email == 'pe\xf1a@test.test'
+
+        scheduler = Scheduler.Scheduler()
+        scheduler.process()
+
+        r = self.app.get(progress_url)
+
+        dom = lxml.html.fromstring(r.data)
+        result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href']
+
+        r = self.app.get(result_url)
+        assert 'text/plain' in r.headers['Content-Type']
+
+        result = r.get_data(as_text=True).strip().split('\n')[1:]
+        assert expected == [line.split('\t') for line in result]
diff --git a/tests/utils.py b/tests/utils.py
index befa5d72859279140211ad412fa2920fce8961d6..f9cfce8bb44a2ce0e7bd09d9951e92d6b8ea1c34 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -3,6 +3,8 @@ Utilities for unit tests.
 """
 
 
+from __future__ import unicode_literals
+
 from functools import wraps
 import os
 import shutil