Skip to content
Snippets Groups Projects
Commit 63825a47 authored by Vermaat's avatar Vermaat
Browse files

Handle encoding for command line file arguments

parent 66629914
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ Entry points to Mutalyzer. ...@@ -5,6 +5,7 @@ Entry points to Mutalyzer.
from __future__ import unicode_literals from __future__ import unicode_literals
import locale
import sys import sys
...@@ -48,6 +49,8 @@ def _cli_string(argument): ...@@ -48,6 +49,8 @@ def _cli_string(argument):
Decode a command line argument byte string to unicode using our best Decode a command line argument byte string to unicode using our best
guess for the encoding (noop on unicode strings). guess for the encoding (noop on unicode strings).
""" """
encoding = sys.stdin.encoding or locale.getpreferredencoding()
if isinstance(argument, unicode): if isinstance(argument, unicode):
return argument return argument
return unicode(argument, encoding=sys.stdin.encoding) return unicode(argument, encoding=encoding)
...@@ -6,9 +6,10 @@ Command line interface to Mutalyzer administrative tools. ...@@ -6,9 +6,10 @@ Command line interface to Mutalyzer administrative tools.
from __future__ import unicode_literals from __future__ import unicode_literals
import argparse import argparse
import codecs
import json import json
import locale
import os import os
import sys
import alembic.command import alembic.command
import alembic.config import alembic.config
...@@ -29,10 +30,12 @@ class UserError(Exception): ...@@ -29,10 +30,12 @@ class UserError(Exception):
pass pass
def add_assembly(assembly_file): def add_assembly(assembly_file, encoding):
""" """
Add genome assembly definition from a JSON file. Add genome assembly definition from a JSON file.
""" """
assembly_file = codecs.getreader(encoding)(assembly_file)
try: try:
definition = json.load(assembly_file) definition = json.load(assembly_file)
except ValueError: except ValueError:
...@@ -87,10 +90,13 @@ def list_assemblies(): ...@@ -87,10 +90,13 @@ def list_assemblies():
assembly.taxonomy_id) assembly.taxonomy_id)
def import_mapview(assembly_name_or_alias, mapview_file, group_label): def import_mapview(assembly_name_or_alias, mapview_file, encoding,
group_label):
""" """
Import transcript mappings from an NCBI mapview file. Import transcript mappings from an NCBI mapview file.
""" """
mapview_file = codecs.getreader(encoding)(mapview_file)
try: try:
assembly = Assembly.by_name_or_alias(assembly_name_or_alias) assembly = Assembly.by_name_or_alias(assembly_name_or_alias)
except NoResultFound: except NoResultFound:
...@@ -185,6 +191,8 @@ def main(): ...@@ -185,6 +191,8 @@ def main():
""" """
Command-line interface to Mutalyzer administrative tools. Command-line interface to Mutalyzer administrative tools.
""" """
default_encoding = locale.getpreferredencoding()
assembly_parser = argparse.ArgumentParser(add_help=False) assembly_parser = argparse.ArgumentParser(add_help=False)
assembly_parser.add_argument( assembly_parser.add_argument(
'-a', '--assembly', metavar='ASSEMBLY', type=_cli_string, '-a', '--assembly', metavar='ASSEMBLY', type=_cli_string,
...@@ -214,9 +222,13 @@ def main(): ...@@ -214,9 +222,13 @@ def main():
description=add_assembly.__doc__.split('\n\n')[0]) description=add_assembly.__doc__.split('\n\n')[0])
p.set_defaults(func=add_assembly) p.set_defaults(func=add_assembly)
p.add_argument( p.add_argument(
'assembly_file', metavar='FILE', type=argparse.FileType('r'), 'assembly_file', metavar='FILE', type=argparse.FileType('rb'),
help='genome assembly definition JSON file (example: ' help='genome assembly definition JSON file (example: '
'extras/assemblies/GRCh37.json)') 'extras/assemblies/GRCh37.json)')
p.add_argument(
'--encoding', metavar='ENCODING', type=_cli_string,
default=default_encoding,
help='input file encoding (default: %s)' % default_encoding)
# Subparser 'assemblies import-mapview'. # Subparser 'assemblies import-mapview'.
p = s.add_parser( p = s.add_parser(
...@@ -228,8 +240,12 @@ def main(): ...@@ -228,8 +240,12 @@ def main():
'`sort -t $\'\\t\' -k 11,11 -k 2,2` command.') '`sort -t $\'\\t\' -k 11,11 -k 2,2` command.')
p.set_defaults(func=import_mapview) p.set_defaults(func=import_mapview)
p.add_argument( p.add_argument(
'mapview_file', metavar='FILE', type=argparse.FileType('r'), 'mapview_file', metavar='FILE', type=argparse.FileType('rb'),
help='file from NCBI mapview (example: seq_gene.md), see note below') help='file from NCBI mapview (example: seq_gene.md), see note below')
p.add_argument(
'--encoding', metavar='ENCODING', type=_cli_string,
default=default_encoding,
help='input file encoding (default: %s)' % default_encoding)
p.add_argument( p.add_argument(
'group_label', metavar='GROUP_LABEL', type=_cli_string, 'group_label', metavar='GROUP_LABEL', type=_cli_string,
help='use only entries with this group label (example: ' help='use only entries with this group label (example: '
......
...@@ -883,7 +883,6 @@ def import_from_reference(assembly, reference): ...@@ -883,7 +883,6 @@ def import_from_reference(assembly, reference):
session.commit() session.commit()
# Todo: File must be opened with the correct encoding.
def import_from_mapview_file(assembly, mapview_file, group_label): def import_from_mapview_file(assembly, mapview_file, group_label):
""" """
Import transcript mappings from an NCBI mapview file. Import transcript mappings from an NCBI mapview file.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment