...
 
Commits (4)
*.pyc
.cache/
.pytest_cache/
.tox/
build/
dist/
......
Copyright (c) 2017 Jeroen F.J. Laros
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
......@@ -3,6 +3,7 @@ This library provides a [trie](https://en.wikipedia.org/wiki/Trie)
implementation using nested dictionaries. Apart from the basic operations, a
number of functions for *approximate matching* are implemented.
## Installation
Via [pypi](https://pypi.python.org/pypi/dict-trie):
......@@ -10,13 +11,13 @@ Via [pypi](https://pypi.python.org/pypi/dict-trie):
From source:
git clone https://git.lumc.nl/j.f.j.laros/dict-trie.git
git clone https://github.com/jfjlaros/dict-trie.git
cd dict-trie
pip install .
## Usage
The library provides the `Trie` class. Full documentation can be found
[here](https://git.lumc.nl/j.f.j.laros/dict-trie)
The library provides the `Trie` class.
### Basic operations
Initialisation of the trie is done via the constructor by providing a list of
......
"""dict-trie: Basic implementation of a trie.
Copyright (c) 2017 Leiden University Medical Center <humgen@lumc.nl>
Copyright (c) 2017 Jeroen F.J. Laros <J.F.J.Laros@lumc.nl>
Licensed under the MIT license, see the LICENSE file.
......@@ -12,9 +11,9 @@ from .dict_trie import Trie
__version_info__ = ('0', '0', '3')
__version__ = '.'.join(__version_info__)
__author__ = 'LUMC, Jeroen F.J. Laros'
__author__ = 'Jeroen F.J. Laros'
__contact__ = 'J.F.J.Laros@lumc.nl'
__homepage__ = 'https://git.lumc.nl/j.f.j.laros/dict-trie'
__homepage__ = 'https://github.com/jfjlaros/dict-trie.git'
usage = __doc__.split('\n\n\n')
......
from itertools import imap
class iMap(imap):
def __next__(cls):
return cls.next()
map = iMap
import itertools
import sys
if sys.version_info.major < 3:
from .compatibility import map
def _add(root, word, count):
"""Add a word to the trie.
"""Add a word to a trie.
:arg dict root: Root of the trie.
:arg str word: A word.
......@@ -21,7 +25,7 @@ def _add(root, word, count):
def _find(root, word):
"""Find the node after following the path in the trie given by {word}.
"""Find the node after following the path in a trie given by {word}.
:arg dict root: Root of the trie.
:arg str word: A word.
......@@ -74,7 +78,7 @@ def _iterate(path, node, unique):
:arg dict node: Current node.
:arg bool unique: Do not list multiplicities.
:returns iter: All words in the trie.
:returns iter: All words in a trie.
"""
if '' in node:
if not unique:
......@@ -108,7 +112,7 @@ def _fill(node, alphabet, length):
def _hamming(path, node, word, distance, cigar):
"""Find all paths in the trie that are within a certain hamming distance of
"""Find all paths in a trie that are within a certain hamming distance of
{word}.
:arg str path: Path taken so far to reach the current node.
......@@ -116,7 +120,7 @@ def _hamming(path, node, word, distance, cigar):
:arg str word: Query word.
:arg int distance: Amount of allowed errors.
:returns iter: All word in the trie that have Hamming distance of at most
:returns iter: All word in a trie that have Hamming distance of at most
{distance} to {word}.
"""
if distance < 0:
......@@ -142,7 +146,7 @@ def _hamming(path, node, word, distance, cigar):
def _levenshtein(path, node, word, distance, cigar):
"""Find all paths in the trie that are within a certain Levenshtein
"""Find all paths in a trie that are within a certain Levenshtein
distance of {word}.
:arg str path: Path taken so far to reach the current node.
......@@ -150,7 +154,7 @@ def _levenshtein(path, node, word, distance, cigar):
:arg str word: Query word.
:arg int distance: Amount of allowed errors.
:returns iter: All word in the trie that have Hamming distance of at most
:returns iter: All word in a trie that have Hamming distance of at most
{distance} to {word}.
"""
if distance < 0:
......@@ -226,22 +230,22 @@ class Trie(object):
_fill(self.root, alphabet, length)
def all_hamming_(self, word, distance):
return itertools.imap(
return map(
lambda x: (x[0], distance - x[1], x[2]),
_hamming('', self.root, word, distance, ''))
def all_hamming(self, word, distance):
return itertools.imap(
return map(
lambda x: x[0], _hamming('', self.root, word, distance, ''))
def hamming(self, word, distance):
try:
return self.all_hamming(word, distance).next()
return self.all_hamming(word, distance).__next__()
except StopIteration:
return ''
def best_hamming(self, word, distance):
"""Find the best match with {word} in the trie.
"""Find the best match with {word} in a trie.
:arg str word: Query word.
:arg int distance: Maximum allowed distance.
......@@ -259,22 +263,22 @@ class Trie(object):
return ''
def all_levenshtein_(self, word, distance):
return itertools.imap(
return map(
lambda x: (x[0], distance - x[1], x[2]),
_levenshtein('', self.root, word, distance, ''))
def all_levenshtein(self, word, distance):
return itertools.imap(
return map(
lambda x: x[0], _levenshtein('', self.root, word, distance, ''))
def levenshtein(self, word, distance):
try:
return self.all_levenshtein(word, distance).next()
return self.all_levenshtein(word, distance).__next__()
except StopIteration:
return ''
def best_levenshtein(self, word, distance):
"""Find the best match with {word} in the trie.
"""Find the best match with {word} in a trie.
:arg str word: Query word.
:arg int distance: Maximum allowed distance.
......
......@@ -108,26 +108,26 @@ class TestTrie(object):
assert 'abc' not in self._trie
def test_iter(self):
assert list(self._trie) == ['abc', 'abd', 'te', 'test']
assert set(self._trie) == set(['abc', 'abd', 'te', 'test'])
def test_list(self):
assert list(self._trie.list()) == list(self._trie)
def test_list_non_unique(self):
assert list(self._trie.list(False)) == [
'abc', 'abd', 'abd', 'te', 'test']
assert set(self._trie.list(False)) == set(
['abc', 'abd', 'abd', 'te', 'test'])
def test_fill(self):
trie = Trie()
trie.fill(('a', 'b'), 3)
assert list(trie) == [
'aaa', 'aab', 'aba', 'abb', 'baa', 'bab', 'bba', 'bbb']
assert set(trie) == set(
['aaa', 'aab', 'aba', 'abb', 'baa', 'bab', 'bba', 'bbb'])
def test_all_hamming_1_perfect(self):
assert list(self._trie.all_hamming('abc', 1)) == ['abc', 'abd']
assert set(self._trie.all_hamming('abc', 1)) == set(['abc', 'abd'])
def test_all_hamming_1_not_perfect(self):
assert list(self._trie.all_hamming('abx', 1)) == ['abc', 'abd']
assert set(self._trie.all_hamming('abx', 1)) == set(['abc', 'abd'])
def test_all_hamming_1_no_match(self):
assert not list(self._trie.all_hamming('xbx', 1))
......@@ -145,7 +145,7 @@ class TestTrie(object):
assert self._trie.hamming('abcd', 0) == ''
def test_hamming_1_match(self):
assert self._trie.hamming('abc', 1) == 'abc'
assert self._trie.hamming('abc', 1) in ['abc', 'abd']
def test_hamming_1_match_sub(self):
assert self._trie.hamming('te', 1) == 'te'
......@@ -157,10 +157,10 @@ class TestTrie(object):
assert self._trie.hamming('axc', 1) == 'abc'
def test_hamming_1_match_3(self):
assert self._trie.hamming('abx', 1) == 'abc'
assert self._trie.hamming('abx', 1) in ['abc', 'abd']
def test_hamming_1_match_4(self):
assert self._trie.hamming('abd', 1) == 'abc'
assert self._trie.hamming('abd', 1) in ['abc', 'abd']
def test_hamming_1_no_prefix(self):
assert self._trie.hamming('ab', 1) == ''
......@@ -187,7 +187,7 @@ class TestTrie(object):
assert list(self._trie.all_levenshtein('tes', 1)) == ['te', 'test']
def test_levenshtein_0_match_1(self):
assert self._trie.levenshtein('abc', 0) == 'abc'
assert self._trie.levenshtein('abc', 0) in ['abc', 'abd']
def test_levenshtein_0_match_2(self):
assert self._trie.levenshtein('te', 0) == 'te'
......@@ -199,16 +199,16 @@ class TestTrie(object):
assert self._trie.levenshtein('ac', 1) == 'abc'
def test_levenshtein_1_prefix(self):
assert self._trie.levenshtein('ab', 1) == 'abc'
assert self._trie.levenshtein('ab', 1) in ['abc', 'abd']
def test_levenshtein_1_ins(self):
assert self._trie.levenshtein('abbc', 1) == 'abc'
def test_all_hamming__2(self):
assert list(self._trie.all_hamming_('acb', 2)) == [
('abc', 2, '=XX'), ('abd', 2, '=XX')]
assert set(self._trie.all_hamming_('acb', 2)) == set(
[('abc', 2, '=XX'), ('abd', 2, '=XX')])
def test_all_levenshtein__2(self):
assert list(self._trie.all_levenshtein_('acb', 2)) == [
assert set(self._trie.all_levenshtein_('acb', 2)) == set([
('abc', 2, '=D=I'), ('abd', 2, '=D=I'), ('abc', 2, '=XX'),
('abd', 2, '=XX'), ('abc', 2, '=I=D')]
('abd', 2, '=XX'), ('abc', 2, '=I=D')])