From 8258d45c640153ecd98f63b5fda47a2756430a55 Mon Sep 17 00:00:00 2001 From: "J.F.J. Laros" Date: Thu, 27 Apr 2017 17:16:51 +0200 Subject: [PATCH] Added iterable trie searching. --- dict_trie/dict_trie.py | 63 ++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/dict_trie/dict_trie.py b/dict_trie/dict_trie.py index f12fa5c..05e1c77 100644 --- a/dict_trie/dict_trie.py +++ b/dict_trie/dict_trie.py @@ -1,56 +1,53 @@ def _hamming(path, node, word, distance): - """Find the first path in the trie that is within a certain hamming - distance of {word}. Note that this does not necessarily the one with the - smallest distance. + """Find all paths in the trie that are within a certain hamming distance of + {word}. :arg str path: Path taken so far to reach the current node. :arg dict node: Current node. :arg str word: Query word. :arg int distance: Amount of errors we can still make. - :returns str: A word in the trie that has Hamming distance of at most + :returns iter: All word in the trie that have Hamming distance of at most {distance} to {word}. """ if distance < 0: - return '' + return if not word: - return path if '' in node else '' + if '' in node: + yield path + return car, cdr = word[0], word[1:] for char in node: - result = _hamming( - path + char, node[char], cdr, distance - int(char != car)) - if result: - return result - - return '' + for result in _hamming( + path + char, node[char], cdr, distance - int(char != car)): + yield result def _levenshtein(path, node, word, distance): """ """ if distance < 0: - return '' + return if not word: - return path if '' in node else '' + if '' in node: + yield path + return car, cdr = word[0], word[1:] # Deletion. - result = _levenshtein(path, node, cdr, distance - 1) - if result: - return result + for result in _levenshtein(path, node, cdr, distance - 1): + yield result for char in node: # Substitution and insertion. - result = ( - _levenshtein( - path + char, node[char], cdr, distance - int(char != car)) or - _levenshtein(path + char, node[char], word, distance - 1)) - if result: - return result - - return '' + for result in _levenshtein( + path + char, node[char], cdr, distance - int(char != car)): + yield result + for result in _levenshtein( + path + char, node[char], word, distance - 1): + yield result class Trie(object): @@ -107,9 +104,15 @@ class Trie(object): def has_prefix(self, word): return self._find(word) != {} - def hamming(self, word, distance): + def all_hamming(self, word, distance): return _hamming('', self.root, word, distance) + def hamming(self, word, distance): + try: + return self.all_hamming(word, distance).next() + except StopIteration: + return '' + def best_hamming(self, word, distance): """Find the best match with {word} in the trie. @@ -128,9 +131,15 @@ class Trie(object): return '' - def levenshtein(self, word, distance): + def all_levenshtein(self, word, distance): return _levenshtein('', self.root, word, distance) + def levenshtein(self, word, distance): + try: + return self.all_levenshtein(word, distance).next() + except StopIteration: + return '' + def best_levenshtein(self, word, distance): """Find the best match with {word} in the trie. -- 2.22.2