diff --git a/doc/admin.rst b/doc/admin.rst index e169430b477a21cc692197912663f61fa297fc29..fca9cc8d6a509486b6849339b553072ff6f362d7 100644 --- a/doc/admin.rst +++ b/doc/admin.rst @@ -63,7 +63,7 @@ For example, to import transcript mappings for the GRCh37 assembly, run the following:: $ wget ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.37.2/mapview/seq_gene.md.gz - $ zcat seq_gene.md.gz | sort -k 11,11 -k 2,2 > seq_gene.sorted.md + $ zcat seq_gene.md.gz | sort -t $'\t' -k 11,11 -k 2,2 > seq_gene.sorted.md $ mutalyzer-admin assemblies import-mapview seq_gene.sorted.md 'GRCh37.p2-Primary Assembly' .. note:: The last argument, ``GRCh37.p2-Primary Assembly``, defines the group diff --git a/mutalyzer/entrypoints/admin.py b/mutalyzer/entrypoints/admin.py index 329e13eaa5b20551af4d35e38bcfc3a5cb275d1c..07ee52e8bb10f440ebf1d84cb7148557fe1582b5 100644 --- a/mutalyzer/entrypoints/admin.py +++ b/mutalyzer/entrypoints/admin.py @@ -217,7 +217,7 @@ def main(): description=import_mapview.__doc__.split('\n\n')[0], epilog='Note: We require that FILE is sorted on the `feature_id` ' '(#11) and `chromosome` (#2) columns. This can be done with a ' - '`sort -k 11,11 -k 2,2` command.') + '`sort -t $\'\\t\' -k 11,11 -k 2,2` command.') p.set_defaults(func=import_mapview) p.add_argument( 'mapview_file', metavar='FILE', type=argparse.FileType('r'), diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py index 9a059a68441631c6a645d2c93201385e2ab80372..693294d31b5a2a06319c24566d3e98259657882d 100644 --- a/mutalyzer/mapping.py +++ b/mutalyzer/mapping.py @@ -886,7 +886,7 @@ def import_from_mapview_file(assembly, mapview_file, group_label): (#11), which always contains the gene identifier, and then on the `chromosome` column (#2). - sort -k 11,11 -k 2,2 seq_gene.md > seq_gene.by_gene.md + sort -t $'\t' -k 11,11 -k 2,2 seq_gene.md > seq_gene.by_gene.md Raises :exc:`ValueError` if `mapview_file` is not sorted this way. @@ -1004,7 +1004,7 @@ def import_from_mapview_file(assembly, mapview_file, group_label): processed_keys = set() for key, records in groupby(read_records(mapview_file), - itemgetter('feature_id', 'chromosome')): + itemgetter('feature_id', 'chromosome')): if key in processed_keys: raise MapviewSortError('Mapview file must be sorted by feature_id ' 'and chromosome (try `sort -k 11,11 -k '