Commit 8258d45c authored by Jeroen F.J. Laros's avatar Jeroen F.J. Laros

Added iterable trie searching.

parent fd053c81
def _hamming(path, node, word, distance): def _hamming(path, node, word, distance):
"""Find the first path in the trie that is within a certain hamming """Find all paths in the trie that are within a certain hamming distance of
distance of {word}. Note that this does not necessarily the one with the {word}.
smallest distance.
:arg str path: Path taken so far to reach the current node. :arg str path: Path taken so far to reach the current node.
:arg dict node: Current node. :arg dict node: Current node.
:arg str word: Query word. :arg str word: Query word.
:arg int distance: Amount of errors we can still make. :arg int distance: Amount of errors we can still make.
:returns str: A word in the trie that has Hamming distance of at most :returns iter: All word in the trie that have Hamming distance of at most
{distance} to {word}. {distance} to {word}.
""" """
if distance < 0: if distance < 0:
return '' return
if not word: if not word:
return path if '' in node else '' if '' in node:
yield path
return
car, cdr = word[0], word[1:] car, cdr = word[0], word[1:]
for char in node: for char in node:
result = _hamming( for result in _hamming(
path + char, node[char], cdr, distance - int(char != car)) path + char, node[char], cdr, distance - int(char != car)):
if result: yield result
return result
return ''
def _levenshtein(path, node, word, distance): def _levenshtein(path, node, word, distance):
""" """
""" """
if distance < 0: if distance < 0:
return '' return
if not word: if not word:
return path if '' in node else '' if '' in node:
yield path
return
car, cdr = word[0], word[1:] car, cdr = word[0], word[1:]
# Deletion. # Deletion.
result = _levenshtein(path, node, cdr, distance - 1) for result in _levenshtein(path, node, cdr, distance - 1):
if result: yield result
return result
for char in node: for char in node:
# Substitution and insertion. # Substitution and insertion.
result = ( for result in _levenshtein(
_levenshtein( path + char, node[char], cdr, distance - int(char != car)):
path + char, node[char], cdr, distance - int(char != car)) or yield result
_levenshtein(path + char, node[char], word, distance - 1)) for result in _levenshtein(
if result: path + char, node[char], word, distance - 1):
return result yield result
return ''
class Trie(object): class Trie(object):
...@@ -107,9 +104,15 @@ class Trie(object): ...@@ -107,9 +104,15 @@ class Trie(object):
def has_prefix(self, word): def has_prefix(self, word):
return self._find(word) != {} return self._find(word) != {}
def hamming(self, word, distance): def all_hamming(self, word, distance):
return _hamming('', self.root, word, distance) return _hamming('', self.root, word, distance)
def hamming(self, word, distance):
try:
return self.all_hamming(word, distance).next()
except StopIteration:
return ''
def best_hamming(self, word, distance): def best_hamming(self, word, distance):
"""Find the best match with {word} in the trie. """Find the best match with {word} in the trie.
...@@ -128,9 +131,15 @@ class Trie(object): ...@@ -128,9 +131,15 @@ class Trie(object):
return '' return ''
def levenshtein(self, word, distance): def all_levenshtein(self, word, distance):
return _levenshtein('', self.root, word, distance) return _levenshtein('', self.root, word, distance)
def levenshtein(self, word, distance):
try:
return self.all_levenshtein(word, distance).next()
except StopIteration:
return ''
def best_levenshtein(self, word, distance): def best_levenshtein(self, word, distance):
"""Find the best match with {word} in the trie. """Find the best match with {word} in the trie.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment