Commit ec278f2d authored by Jeroen F.J. Laros's avatar Jeroen F.J. Laros

First version of Levenshtein distance.

parent 57215bc0
...@@ -15,9 +15,7 @@ def _hamming(path, node, word, distance): ...@@ -15,9 +15,7 @@ def _hamming(path, node, word, distance):
if distance < 0: if distance < 0:
return '' return ''
if not word: if not word:
if '' in node: return path if '' in node else ''
return path
return ''
car, cdr = word[0], word[1:] car, cdr = word[0], word[1:]
for char in node: for char in node:
...@@ -29,8 +27,31 @@ def _hamming(path, node, word, distance): ...@@ -29,8 +27,31 @@ def _hamming(path, node, word, distance):
return '' return ''
def _edit(path, node, word, distance): def _levenshtein(path, node, word, distance):
pass """
"""
if distance < 0:
return ''
if not word:
return path if '' in node else ''
car, cdr = word[0], word[1:]
# Deletion.
result = _levenshtein(path, node, cdr, distance - 1)
if result:
return result
for char in node:
# Substitution and insertion.
result = (
_levenshtein(
path + char, node[char], cdr, distance - int(char != car)) or
_levenshtein(path + char, node[char], word, distance - 1))
if result:
return result
return ''
class Trie(object): class Trie(object):
...@@ -113,5 +134,5 @@ class Trie(object): ...@@ -113,5 +134,5 @@ class Trie(object):
return '' return ''
def edit(self, word, distance): def levenshtein(self, word, distance):
return _edit('', self.root, word, distance) return _levenshtein('', self.root, word, distance)
...@@ -104,3 +104,18 @@ class TestTrie(object): ...@@ -104,3 +104,18 @@ class TestTrie(object):
def test_best_hamming_match(self): def test_best_hamming_match(self):
assert self._trie.best_hamming('abd', 1) == 'abd' assert self._trie.best_hamming('abd', 1) == 'abd'
def test_levenshtein_0_match_1(self):
assert self._trie.levenshtein('abc', 0) == 'abc'
def test_levenshtein_0_match_2(self):
assert self._trie.levenshtein('te', 0) == 'te'
def test_levenshtein_1_subst(self):
assert self._trie.levenshtein('axc', 1) == 'abc'
def test_levenshtein_1_del(self):
assert self._trie.levenshtein('ac', 1) == 'abc'
def test_levenshtein_1_ins(self):
assert self._trie.levenshtein('abbc', 1) == 'abc'
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment