Dropped Python 2 support, added type hinting.

parent fd6b9bf9
language: python
python:
- "2.7"
- "3.5"
- "3.6"
- "3.7"
install: pip install tox-travis
script: tox
import sys
if sys.version_info.major < 3:
from itertools import imap as map
def _add(root, word, count):
def _add(root: dict, word: str, count: int) -> None:
"""Add a word to a trie.
:arg dict root: Root of the trie.
:arg str word: A word.
:arg int count: Multiplicity of `word`.
:arg root: Root of the trie.
:arg word: A word.
:arg count: Multiplicity of `word`.
"""
node = root
......@@ -24,13 +20,13 @@ def _add(root, word, count):
node[''] += count
def _find(root, word):
def _find(root: dict, word: str) -> dict:
"""Find the node after following the path in a trie given by {word}.
:arg dict root: Root of the trie.
:arg str word: A word.
:arg root: Root of the trie.
:arg word: A word.
:returns dict: The node if found, {} otherwise.
:returns: The node if found, {} otherwise.
"""
node = root
......@@ -42,14 +38,14 @@ def _find(root, word):
return node
def _remove(node, word, count):
def _remove(node: dict, word: str, count: int) -> bool:
"""Remove a word from a trie.
:arg dict node: Current node.
:arg str word: Word to be removed.
:arg int count: Multiplicity of `word`, force remove if this is -1.
:arg node: Current node.
:arg word: Word to be removed.
:arg count: Multiplicity of `word`, force remove if this is -1.
:returns bool: True if the last occurrence of `word` is removed.
:returns: True if the last occurrence of `word` is removed.
"""
if not word:
if '' in node:
......@@ -71,14 +67,14 @@ def _remove(node, word, count):
return result
def _iterate(path, node, unique):
def _iterate(path: str, node: dict, unique: bool) -> iter:
"""Convert a trie into a list.
:arg str path: Path taken so far to reach the current node.
:arg dict node: Current node.
:arg bool unique: Do not list multiplicities.
:arg path: Path taken so far to reach the current node.
:arg node: Current node.
:arg unique: Do not list multiplicities.
:returns iter: All words in a trie.
:returns: All words in a trie.
"""
if '' in node:
if not unique:
......@@ -92,14 +88,14 @@ def _iterate(path, node, unique):
yield result
def _fill(node, alphabet, length):
def _fill(node: dict, alphabet: tuple, length: int) -> iter:
"""Make a full trie using the characters in {alphabet}.
:arg dict node: Current node.
:arg tuple alphabet: Used alphabet.
:arg int length: Length of the words to be generated.
:arg node: Current node.
:arg alphabet: Used alphabet.
:arg length: Length of the words to be generated.
:returns iter: Trie containing all words of length {length} over alphabet
:returns: Trie containing all words of length {length} over alphabet
{alphabet}.
"""
if not length:
......@@ -111,16 +107,18 @@ def _fill(node, alphabet, length):
_fill(node[char], alphabet, length - 1)
def _hamming(path, node, word, distance, cigar):
def _hamming(
path: str, node: dict, word: str, distance: int, cigar: str) -> iter:
"""Find all paths in a trie that are within a certain hamming distance of
{word}.
:arg str path: Path taken so far to reach the current node.
:arg dict node: Current node.
:arg str word: Query word.
:arg int distance: Amount of allowed errors.
:arg path: Path taken so far to reach the current node.
:arg node: Current node.
:arg word: Query word.
:arg distance: Amount of allowed errors.
:arg cigar: CIGAR string.
:returns iter: All words in a trie that have Hamming distance of at most
:returns: All words in a trie that have Hamming distance of at most
{distance} to {word}.
"""
if distance < 0:
......@@ -145,16 +143,17 @@ def _hamming(path, node, word, distance, cigar):
yield result
def _levenshtein(path, node, word, distance, cigar):
"""Find all paths in a trie that are within a certain Levenshtein
distance of {word}.
def _levenshtein(
path: str, node: dict, word: str, distance: int, cigar: str) -> iter:
"""Find all paths in a trie that are within a certain Levenshtein distance
of {word}.
:arg str path: Path taken so far to reach the current node.
:arg dict node: Current node.
:arg str word: Query word.
:arg int distance: Amount of allowed errors.
:returns iter: All words in a trie that have Hamming distance of at most
:returns: All words in a trie that have Hamming distance of at most
{distance} to {word}.
"""
if distance < 0:
......@@ -191,10 +190,10 @@ def _levenshtein(path, node, word, distance, cigar):
class Trie(object):
def __init__(self, words=None):
def __init__(self: object, words: list=None) -> None:
"""Initialise the class.
:arg list words: List of words.
:argwords: List of words.
"""
self.root = {}
......@@ -202,55 +201,55 @@ class Trie(object):
for word in words:
self.add(word)
def __contains__(self, word):
def __contains__(self: object, word: str) -> bool:
return '' in _find(self.root, word)
def __iter__(self):
def __iter__(self: object) -> iter:
return _iterate('', self.root, True)
def list(self, unique=True):
def list(self: object, unique: bool=True) -> iter:
return _iterate('', self.root, unique)
def add(self, word, count=1):
def add(self: object, word: str, count: int=1) -> None:
_add(self.root, word, count)
def get(self, word):
def get(self: object, word: str) -> dict:
node = _find(self.root, word)
if '' in node:
return node['']
return None
def remove(self, word, count=1):
def remove(self: object, word: str, count: int=1) -> bool:
return _remove(self.root, word, count)
def has_prefix(self, word):
def has_prefix(self: object, word: str) -> bool:
return _find(self.root, word) != {}
def fill(self, alphabet, length):
def fill(self: object, alphabet: tuple, length: int) -> None:
_fill(self.root, alphabet, length)
def all_hamming_(self, word, distance):
def all_hamming_(self: object, word: str, distance: int) -> iter:
return map(
lambda x: (x[0], distance - x[1], x[2]),
_hamming('', self.root, word, distance, ''))
def all_hamming(self, word, distance):
def all_hamming(self: object, word: str, distance: int) -> iter:
return map(
lambda x: x[0], _hamming('', self.root, word, distance, ''))
def hamming(self, word, distance):
def hamming(self: object, word: str, distance: int) -> str:
try:
return next(self.all_hamming(word, distance))
except StopIteration:
return None
def best_hamming(self, word, distance):
def best_hamming(self: object, word: str, distance: int) -> str:
"""Find the best match with {word} in a trie.
:arg str word: Query word.
:arg int distance: Maximum allowed distance.
:arg word: Query word.
:arg distance: Maximum allowed distance.
:returns str: Best match with {word}.
:returns: Best match with {word}.
"""
if self.get(word):
return word
......@@ -262,28 +261,28 @@ class Trie(object):
return None
def all_levenshtein_(self, word, distance):
def all_levenshtein_(self: object, word: str, distance: int) -> iter:
return map(
lambda x: (x[0], distance - x[1], x[2]),
_levenshtein('', self.root, word, distance, ''))
def all_levenshtein(self, word, distance):
def all_levenshtein(self: object, word: str, distance: int) -> iter:
return map(
lambda x: x[0], _levenshtein('', self.root, word, distance, ''))
def levenshtein(self, word, distance):
def levenshtein(self: object, word: str, distance) -> str:
try:
return next(self.all_levenshtein(word, distance))
return next(self.all_levenshtein(word, distance: int))
except StopIteration:
return None
def best_levenshtein(self, word, distance):
def best_levenshtein(self: object, word: str, distance: int) -> str:
"""Find the best match with {word} in a trie.
:arg str word: Query word.
:arg int distance: Maximum allowed distance.
:arg word: Query word.
:arg distance: Maximum allowed distance.
:returns str: Best match with {word}.
:returns: Best match with {word}.
"""
if self.get(word):
return word
......
[metadata]
name = dict-trie
version = 1.0.1
version = 1.0.2
description = Trie implementation for approximate string matching.
long_description = file: README.rst
author = Jeroen F.J. Laros
......
......@@ -2,14 +2,14 @@
from dict_trie import Trie
class TestTrie(object):
def setup(self):
class TestTrie(object) -> None:
def setup(self: object) -> None:
self._trie = Trie(['abc', 'abd', 'abd', 'test', 'te'])
def test_empty(self):
def test_empty(self: object) -> None:
assert Trie().root == {}
def test_root(self):
def test_root(self: object) -> None:
assert self._trie.root == {
'a': {
'b': {
......@@ -19,222 +19,222 @@ class TestTrie(object):
'': 1,
's': {'t': {'': 1}}}}}
def test_word_present(self):
def test_word_present(self: object) -> None:
assert 'abc' in self._trie
def test_word_absent(self):
def test_word_absent(self: object) -> None:
assert 'abx' not in self._trie
def test_empty_string_present(self):
def test_empty_string_present(self: object) -> None:
assert '' in Trie([''])
def test_empty_string_absent(self):
def test_empty_string_absent(self: object) -> None:
assert '' not in self._trie
def test_prefix_absent_as_word(self):
def test_prefix_absent_as_word(self: object) -> None:
assert 'ab' not in self._trie
def test_too_long_absent(self):
def test_too_long_absent(self: object) -> None:
assert 'abcd' not in self._trie
def test_prefix_present(self):
def test_prefix_present(self: object) -> None:
assert self._trie.has_prefix('ab')
def test_prefix_absent(self):
def test_prefix_absent(self: object) -> None:
assert not self._trie.has_prefix('ac')
def test_word_is_prefix(self):
def test_word_is_prefix(self: object) -> None:
assert self._trie.has_prefix('abc')
def test_too_long_prefix_absent(self):
def test_too_long_prefix_absent(self: object) -> None:
assert not self._trie.has_prefix('abcd')
def test_prefix_order(self):
def test_prefix_order(self: object) -> None:
assert Trie(['test', 'te']).root == Trie(['te', 'test']).root
def test_add(self):
def test_add(self: object) -> None:
self._trie.add('abx')
assert 'abx' in self._trie
def test_get_present(self):
def test_get_present(self: object) -> None:
assert self._trie.get('abc') == 1
def test_get_absent(self):
def test_get_absent(self: object) -> None:
assert not self._trie.get('abx')
def test_add_twice(self):
def test_add_twice(self: object) -> None:
self._trie.add('abc')
assert self._trie.get('abc') == 2
def test_add_multiple(self):
def test_add_multiple(self: object) -> None:
self._trie.add('abc', 2)
assert self._trie.get('abc') == 3
def test_remove_present(self):
def test_remove_present(self: object) -> None:
assert self._trie.remove('test')
assert 'test' not in self._trie
assert 'te' in self._trie
def test_remove_prefix_present(self):
def test_remove_prefix_present(self: object) -> None:
assert self._trie.remove('te')
assert 'te' not in self._trie
assert 'test' in self._trie
def test_remove_absent(self):
def test_remove_absent(self: object) -> None:
assert not self._trie.remove('xxxx')
def test_remove_prefix_absent(self):
def test_remove_prefix_absent(self: object) -> None:
assert not self._trie.remove('ab')
def test_remove_twice(self):
def test_remove_twice(self: object) -> None:
self._trie.add('abc')
assert not self._trie.remove('abc')
assert self._trie.get('abc') == 1
assert self._trie.remove('abc')
assert 'abc' not in self._trie
def test_remove_multile(self):
def test_remove_multile(self: object) -> None:
self._trie.add('abc', 3)
assert not self._trie.remove('abc', 2)
assert self._trie.get('abc') == 2
def test_remove_force(self):
def test_remove_force(self: object) -> None:
self._trie.add('abc')
assert self._trie.remove('abc', -1)
assert 'abc' not in self._trie
def test_iter(self):
def test_iter(self: object) -> None:
assert set(self._trie) == set(['abc', 'abd', 'te', 'test'])
def test_list(self):
def test_list(self: object) -> None:
assert list(self._trie.list()) == list(self._trie)
def test_list_non_unique(self):
def test_list_non_unique(self: object) -> None:
assert set(self._trie.list(False)) == set(
['abc', 'abd', 'abd', 'te', 'test'])
def test_fill(self):
def test_fill(self: object) -> None:
trie = Trie()
trie.fill(('a', 'b'), 3)
assert set(trie) == set(
['aaa', 'aab', 'aba', 'abb', 'baa', 'bab', 'bba', 'bbb'])
def test_all_hamming_1_perfect(self):
def test_all_hamming_1_perfect(self: object) -> None:
assert set(self._trie.all_hamming('abc', 1)) == set(['abc', 'abd'])
def test_all_hamming_1_not_perfect(self):
def test_all_hamming_1_not_perfect(self: object) -> None:
assert set(self._trie.all_hamming('abx', 1)) == set(['abc', 'abd'])
def test_all_hamming_1_no_match(self):
def test_all_hamming_1_no_match(self: object) -> None:
assert not list(self._trie.all_hamming('xbx', 1))
def test_hamming_0_no_prefix(self):
def test_hamming_0_no_prefix(self: object) -> None:
assert self._trie.hamming('ab', 0) is None
def test_hamming_0_match(self):
def test_hamming_0_match(self: object) -> None:
assert self._trie.hamming('abc', 0) == 'abc'
def test_hamming_0_match_empty_word(self):
def test_hamming_0_match_empty_word(self: object) -> None:
assert Trie(['']).hamming('', 0) == ''
def test_hamming_0_match_sub(self):
def test_hamming_0_match_sub(self: object) -> None:
assert self._trie.hamming('te', 0) == 'te'
def test_hamming_0_too_long(self):
def test_hamming_0_too_long(self: object) -> None:
assert self._trie.hamming('abcd', 0) is None
def test_hamming_1_match(self):
def test_hamming_1_match(self: object) -> None:
assert self._trie.hamming('abc', 1) in ['abc', 'abd']
def test_hamming_1_match_sub(self):
def test_hamming_1_match_sub(self: object) -> None:
assert self._trie.hamming('te', 1) == 'te'
def test_hamming_1_match_1(self):
def test_hamming_1_match_1(self: object) -> None:
assert self._trie.hamming('xbc', 1) == 'abc'
def test_hamming_1_match_2(self):
def test_hamming_1_match_2(self: object) -> None:
assert self._trie.hamming('axc', 1) == 'abc'
def test_hamming_1_match_3(self):
def test_hamming_1_match_3(self: object) -> None:
assert self._trie.hamming('abx', 1) in ['abc', 'abd']
def test_hamming_1_match_4(self):
def test_hamming_1_match_4(self: object) -> None:
assert self._trie.hamming('abd', 1) in ['abc', 'abd']
def test_hamming_1_no_prefix(self):
def test_hamming_1_no_prefix(self: object) -> None:
assert self._trie.hamming('ab', 1) is None
def test_hamming_1_too_long(self):
def test_hamming_1_too_long(self: object) -> None:
assert self._trie.hamming('abcd', 1) is None
def test_hamming_1_match_sub_1(self):
def test_hamming_1_match_sub_1(self: object) -> None:
assert self._trie.hamming('tx', 1) == 'te'
def test_hamming_1_match_sub_2(self):
def test_hamming_1_match_sub_2(self: object) -> None:
assert self._trie.hamming('xe', 1) == 'te'
def test_hamming_1_mismatch(self):
def test_hamming_1_mismatch(self: object) -> None:
assert self._trie.hamming('txxt', 1) is None
def test_hamming_2_match(self):
def test_hamming_2_match(self: object) -> None:
assert self._trie.hamming('txxt', 2) == 'test'
def test_best_hamming_match(self):
def test_best_hamming_match(self: object) -> None:
assert self._trie.best_hamming('abd', 1) == 'abd'
def test_best_hamming_no_match(self):
def test_best_hamming_no_match(self: object) -> None:
assert self._trie.best_hamming('ab', 0) is None
def test_levenshtein_0_match_empty_word(self):
def test_levenshtein_0_match_empty_word(self: object) -> None:
assert Trie(['']).levenshtein('', 0) == ''
def test_levenshtein_0_no_match_empty_word(self):
def test_levenshtein_0_no_match_empty_word(self: object) -> None:
assert Trie(['']).levenshtein('a', 0) is None
def test_levenshtein_1_match_empty_word(self):
def test_levenshtein_1_match_empty_word(self: object) -> None:
assert Trie(['']).levenshtein('a', 1) == ''
def test_levenshtein_1_no_match_empty_word(self):
def test_levenshtein_1_no_match_empty_word(self: object) -> None:
assert Trie(['']).levenshtein('ab', 1) is None
def test_all_levenshtein_1_not_perfect(self):
def test_all_levenshtein_1_not_perfect(self: object) -> None:
assert list(self._trie.all_levenshtein('tes', 1)) == ['te', 'test']
def test_levenshtein_0_match_1(self):
def test_levenshtein_0_match_1(self: object) -> None:
assert self._trie.levenshtein('abc', 0) in ['abc', 'abd']
def test_levenshtein_0_match_2(self):
def test_levenshtein_0_match_2(self: object) -> None:
assert self._trie.levenshtein('te', 0) == 'te'
def test_levenshtein_1_subst(self):
def test_levenshtein_1_subst(self: object) -> None:
assert self._trie.levenshtein('axc', 1) == 'abc'
def test_levenshtein_1_del(self):
def test_levenshtein_1_del(self: object) -> None:
assert self._trie.levenshtein('ac', 1) == 'abc'
def test_levenshtein_1_prefix(self):
def test_levenshtein_1_prefix(self: object) -> None:
assert self._trie.levenshtein('ab', 1) in ['abc', 'abd']
def test_levenshtein_1_ins(self):
def test_levenshtein_1_ins(self: object) -> None:
assert self._trie.levenshtein('abbc', 1) == 'abc'
def test_all_hamming_2(self):
def test_all_hamming_2(self: object) -> None:
assert set(self._trie.all_hamming_('acb', 2)) == set(
[('abc', 2, '=XX'), ('abd', 2, '=XX')])
def test_all_levenshtein_2(self):
def test_all_levenshtein_2(self: object) -> None:
assert set(self._trie.all_levenshtein_('acb', 2)) == set([
('abc', 2, '=D=I'), ('abd', 2, '=D=I'), ('abc', 2, '=XX'),
('abd', 2, '=XX'), ('abc', 2, '=I=D')])
def test_best_levenshtein_match_emty_word(self):
def test_best_levenshtein_match_emty_word(self: object) -> None:
assert Trie(['']).best_levenshtein('a', 1) == ''
def test_best_levenshtein_no_match_emty_word(self):
def test_best_levenshtein_no_match_emty_word(self: object) -> None:
assert Trie(['']).best_levenshtein('ab', 1) is None
def test_best_levenshtein_match(self):
def test_best_levenshtein_match(self: object) -> None:
assert self._trie.best_levenshtein('abd', 1) == 'abd'
def test_best_levenshtein_no_match(self):
def test_best_levenshtein_no_match(self: object) -> None:
assert self._trie.best_levenshtein('ab', 0) is None
[tox]
envlist = py27,py35
envlist = py35,py36,py37
[testenv]