From 8c326b3f08b9f6ee50a78907be30938a23b7dd07 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Tue, 13 May 2014 17:40:29 +0200 Subject: [PATCH] Fix mapview import preprocessing --- doc/admin.rst | 2 +- mutalyzer/entrypoints/admin.py | 2 +- mutalyzer/mapping.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/admin.rst b/doc/admin.rst index e169430b..fca9cc8d 100644 --- a/doc/admin.rst +++ b/doc/admin.rst @@ -63,7 +63,7 @@ For example, to import transcript mappings for the GRCh37 assembly, run the following:: $ wget ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.37.2/mapview/seq_gene.md.gz - $ zcat seq_gene.md.gz | sort -k 11,11 -k 2,2 > seq_gene.sorted.md + $ zcat seq_gene.md.gz | sort -t $'\t' -k 11,11 -k 2,2 > seq_gene.sorted.md $ mutalyzer-admin assemblies import-mapview seq_gene.sorted.md 'GRCh37.p2-Primary Assembly' .. note:: The last argument, ``GRCh37.p2-Primary Assembly``, defines the group diff --git a/mutalyzer/entrypoints/admin.py b/mutalyzer/entrypoints/admin.py index 329e13ea..07ee52e8 100644 --- a/mutalyzer/entrypoints/admin.py +++ b/mutalyzer/entrypoints/admin.py @@ -217,7 +217,7 @@ def main(): description=import_mapview.__doc__.split('\n\n')[0], epilog='Note: We require that FILE is sorted on the `feature_id` ' '(#11) and `chromosome` (#2) columns. This can be done with a ' - '`sort -k 11,11 -k 2,2` command.') + '`sort -t $\'\\t\' -k 11,11 -k 2,2` command.') p.set_defaults(func=import_mapview) p.add_argument( 'mapview_file', metavar='FILE', type=argparse.FileType('r'), diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py index 9a059a68..693294d3 100644 --- a/mutalyzer/mapping.py +++ b/mutalyzer/mapping.py @@ -886,7 +886,7 @@ def import_from_mapview_file(assembly, mapview_file, group_label): (#11), which always contains the gene identifier, and then on the `chromosome` column (#2). - sort -k 11,11 -k 2,2 seq_gene.md > seq_gene.by_gene.md + sort -t $'\t' -k 11,11 -k 2,2 seq_gene.md > seq_gene.by_gene.md Raises :exc:`ValueError` if `mapview_file` is not sorted this way. @@ -1004,7 +1004,7 @@ def import_from_mapview_file(assembly, mapview_file, group_label): processed_keys = set() for key, records in groupby(read_records(mapview_file), - itemgetter('feature_id', 'chromosome')): + itemgetter('feature_id', 'chromosome')): if key in processed_keys: raise MapviewSortError('Mapview file must be sorted by feature_id ' 'and chromosome (try `sort -k 11,11 -k ' -- GitLab