Added iterable trie searching.

parent fd053c81
 def _hamming(path, node, word, distance): def _hamming(path, node, word, distance): """Find the first path in the trie that is within a certain hamming """Find all paths in the trie that are within a certain hamming distance of distance of {word}. Note that this does not necessarily the one with the {word}. smallest distance. :arg str path: Path taken so far to reach the current node. :arg str path: Path taken so far to reach the current node. :arg dict node: Current node. :arg dict node: Current node. :arg str word: Query word. :arg str word: Query word. :arg int distance: Amount of errors we can still make. :arg int distance: Amount of errors we can still make. :returns str: A word in the trie that has Hamming distance of at most :returns iter: All word in the trie that have Hamming distance of at most {distance} to {word}. {distance} to {word}. """ """ if distance < 0: if distance < 0: return '' return if not word: if not word: return path if '' in node else '' if '' in node: yield path return car, cdr = word, word[1:] car, cdr = word, word[1:] for char in node: for char in node: result = _hamming( for result in _hamming( path + char, node[char], cdr, distance - int(char != car)) path + char, node[char], cdr, distance - int(char != car)): if result: yield result return result return '' def _levenshtein(path, node, word, distance): def _levenshtein(path, node, word, distance): """ """ """ """ if distance < 0: if distance < 0: return '' return if not word: if not word: return path if '' in node else '' if '' in node: yield path return car, cdr = word, word[1:] car, cdr = word, word[1:] # Deletion. # Deletion. result = _levenshtein(path, node, cdr, distance - 1) for result in _levenshtein(path, node, cdr, distance - 1): if result: yield result return result for char in node: for char in node: # Substitution and insertion. # Substitution and insertion. result = ( for result in _levenshtein( _levenshtein( path + char, node[char], cdr, distance - int(char != car)): path + char, node[char], cdr, distance - int(char != car)) or yield result _levenshtein(path + char, node[char], word, distance - 1)) for result in _levenshtein( if result: path + char, node[char], word, distance - 1): return result yield result return '' class Trie(object): class Trie(object): ... @@ -107,9 +104,15 @@ class Trie(object): ... @@ -107,9 +104,15 @@ class Trie(object): def has_prefix(self, word): def has_prefix(self, word): return self._find(word) != {} return self._find(word) != {} def hamming(self, word, distance): def all_hamming(self, word, distance): return _hamming('', self.root, word, distance) return _hamming('', self.root, word, distance) def hamming(self, word, distance): try: return self.all_hamming(word, distance).next() except StopIteration: return '' def best_hamming(self, word, distance): def best_hamming(self, word, distance): """Find the best match with {word} in the trie. """Find the best match with {word} in the trie. ... @@ -128,9 +131,15 @@ class Trie(object): ... @@ -128,9 +131,15 @@ class Trie(object): return '' return '' def levenshtein(self, word, distance): def all_levenshtein(self, word, distance): return _levenshtein('', self.root, word, distance) return _levenshtein('', self.root, word, distance) def levenshtein(self, word, distance): try: return self.all_levenshtein(word, distance).next() except StopIteration: return '' def best_levenshtein(self, word, distance): def best_levenshtein(self, word, distance): """Find the best match with {word} in the trie. """Find the best match with {word} in the trie. ... ...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment