Commit ec278f2d authored by Jeroen F.J. Laros's avatar Jeroen F.J. Laros

First version of Levenshtein distance.

parent 57215bc0
......@@ -15,9 +15,7 @@ def _hamming(path, node, word, distance):
if distance < 0:
return ''
if not word:
if '' in node:
return path
return ''
return path if '' in node else ''
car, cdr = word[0], word[1:]
for char in node:
......@@ -29,8 +27,31 @@ def _hamming(path, node, word, distance):
return ''
def _edit(path, node, word, distance):
pass
def _levenshtein(path, node, word, distance):
"""
"""
if distance < 0:
return ''
if not word:
return path if '' in node else ''
car, cdr = word[0], word[1:]
# Deletion.
result = _levenshtein(path, node, cdr, distance - 1)
if result:
return result
for char in node:
# Substitution and insertion.
result = (
_levenshtein(
path + char, node[char], cdr, distance - int(char != car)) or
_levenshtein(path + char, node[char], word, distance - 1))
if result:
return result
return ''
class Trie(object):
......@@ -113,5 +134,5 @@ class Trie(object):
return ''
def edit(self, word, distance):
return _edit('', self.root, word, distance)
def levenshtein(self, word, distance):
return _levenshtein('', self.root, word, distance)
......@@ -104,3 +104,18 @@ class TestTrie(object):
def test_best_hamming_match(self):
assert self._trie.best_hamming('abd', 1) == 'abd'
def test_levenshtein_0_match_1(self):
assert self._trie.levenshtein('abc', 0) == 'abc'
def test_levenshtein_0_match_2(self):
assert self._trie.levenshtein('te', 0) == 'te'
def test_levenshtein_1_subst(self):
assert self._trie.levenshtein('axc', 1) == 'abc'
def test_levenshtein_1_del(self):
assert self._trie.levenshtein('ac', 1) == 'abc'
def test_levenshtein_1_ins(self):
assert self._trie.levenshtein('abbc', 1) == 'abc'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment