From 8c326b3f08b9f6ee50a78907be30938a23b7dd07 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Tue, 13 May 2014 17:40:29 +0200
Subject: [PATCH] Fix mapview import preprocessing

---
 doc/admin.rst                  | 2 +-
 mutalyzer/entrypoints/admin.py | 2 +-
 mutalyzer/mapping.py           | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/admin.rst b/doc/admin.rst
index e169430b..fca9cc8d 100644
--- a/doc/admin.rst
+++ b/doc/admin.rst
@@ -63,7 +63,7 @@ For example, to import transcript mappings for the GRCh37 assembly, run the
 following::
 
     $ wget ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.37.2/mapview/seq_gene.md.gz
-    $ zcat seq_gene.md.gz | sort -k 11,11 -k 2,2 > seq_gene.sorted.md
+    $ zcat seq_gene.md.gz | sort -t $'\t' -k 11,11 -k 2,2 > seq_gene.sorted.md
     $ mutalyzer-admin assemblies import-mapview seq_gene.sorted.md 'GRCh37.p2-Primary Assembly'
 
 .. note:: The last argument, ``GRCh37.p2-Primary Assembly``, defines the group
diff --git a/mutalyzer/entrypoints/admin.py b/mutalyzer/entrypoints/admin.py
index 329e13ea..07ee52e8 100644
--- a/mutalyzer/entrypoints/admin.py
+++ b/mutalyzer/entrypoints/admin.py
@@ -217,7 +217,7 @@ def main():
         description=import_mapview.__doc__.split('\n\n')[0],
         epilog='Note: We require that FILE is sorted on the `feature_id` '
         '(#11) and `chromosome` (#2) columns. This can be done with a '
-        '`sort -k 11,11 -k 2,2` command.')
+        '`sort -t $\'\\t\' -k 11,11 -k 2,2` command.')
     p.set_defaults(func=import_mapview)
     p.add_argument(
         'mapview_file', metavar='FILE', type=argparse.FileType('r'),
diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py
index 9a059a68..693294d3 100644
--- a/mutalyzer/mapping.py
+++ b/mutalyzer/mapping.py
@@ -886,7 +886,7 @@ def import_from_mapview_file(assembly, mapview_file, group_label):
     (#11), which always contains the gene identifier, and then on the
     `chromosome` column (#2).
 
-        sort -k 11,11 -k 2,2 seq_gene.md > seq_gene.by_gene.md
+        sort -t $'\t' -k 11,11 -k 2,2 seq_gene.md > seq_gene.by_gene.md
 
     Raises :exc:`ValueError` if `mapview_file` is not sorted this way.
 
@@ -1004,7 +1004,7 @@ def import_from_mapview_file(assembly, mapview_file, group_label):
     processed_keys = set()
 
     for key, records in groupby(read_records(mapview_file),
-                              itemgetter('feature_id', 'chromosome')):
+                                itemgetter('feature_id', 'chromosome')):
         if key in processed_keys:
             raise MapviewSortError('Mapview file must be sorted by feature_id '
                                    'and chromosome (try `sort -k 11,11 -k '
-- 
GitLab