Commit bbb84ae2 authored by Jeroen F.J. Laros's avatar Jeroen F.J. Laros

Added more informative output to the approximate matching algorithms.

parent 64f36cef
import itertools
def _add(root, word):
"""Add a word to the trie.
......@@ -93,7 +96,7 @@ def _fill(node, alphabet, length):
_fill(node[char], alphabet, length - 1)
def _hamming(path, node, word, distance):
def _hamming(path, node, word, distance, cigar):
"""Find all paths in the trie that are within a certain hamming distance of
{word}.
......@@ -109,17 +112,24 @@ def _hamming(path, node, word, distance):
return
if not word:
if '' in node:
yield path
yield (path, distance, cigar)
return
car, cdr = word[0], word[1:]
for char in node:
if char == car:
penalty = 0
operation = '='
else:
penalty = 1
operation = 'X'
for result in _hamming(
path + char, node[char], cdr, distance - int(char != car)):
path + char, node[char], cdr, distance - penalty,
cigar + operation):
yield result
def _levenshtein(path, node, word, distance):
def _levenshtein(path, node, word, distance, cigar):
"""Find all paths in the trie that are within a certain Levenshtein
distance of {word}.
......@@ -135,24 +145,31 @@ def _levenshtein(path, node, word, distance):
return
if not word:
if '' in node:
yield path
yield (path, distance, cigar)
car, cdr = '', ''
else:
car, cdr = word[0], word[1:]
# Deletion.
for result in _levenshtein(path, node, cdr, distance - 1):
for result in _levenshtein(path, node, cdr, distance - 1, cigar + 'D'):
yield result
for char in node:
# Substitution.
if car:
if char == car:
penalty = 0
operation = '='
else:
penalty = 1
operation = 'X'
for result in _levenshtein(
path + char, node[char], cdr, distance - int(char != car)):
path + char, node[char], cdr, distance - penalty,
cigar + operation):
yield result
# Insertion.
for result in _levenshtein(
path + char, node[char], word, distance - 1):
path + char, node[char], word, distance - 1, cigar + 'I'):
yield result
......@@ -186,8 +203,14 @@ class Trie(object):
def fill(self, alphabet, length):
_fill(self.root, alphabet, length)
def all_hamming_(self, word, distance):
return itertools.imap(
lambda x: (x[0], distance - x[1], x[2]),
_hamming('', self.root, word, distance, ''))
def all_hamming(self, word, distance):
return _hamming('', self.root, word, distance)
return itertools.imap(
lambda x: x[0], _hamming('', self.root, word, distance, ''))
def hamming(self, word, distance):
try:
......@@ -213,8 +236,14 @@ class Trie(object):
return ''
def all_levenshtein_(self, word, distance):
return itertools.imap(
lambda x: (x[0], distance - x[1], x[2]),
_levenshtein('', self.root, word, distance, ''))
def all_levenshtein(self, word, distance):
return _levenshtein('', self.root, word, distance)
return itertools.imap(
lambda x: x[0], _levenshtein('', self.root, word, distance, ''))
def levenshtein(self, word, distance):
try:
......
......@@ -165,3 +165,12 @@ class TestTrie(object):
def test_levenshtein_1_ins(self):
assert self._trie.levenshtein('abbc', 1) == 'abc'
def test_all_hamming__2(self):
assert list(self._trie.all_hamming_('acb', 2)) == [
('abc', 2, '=XX'), ('abd', 2, '=XX')]
def test_all_levenshtein__2(self):
assert list(self._trie.all_levenshtein_('acb', 2)) == [
('abc', 2, '=D=I'), ('abd', 2, '=D=I'), ('abc', 2, '=XX'),
('abd', 2, '=XX'), ('abc', 2, '=I=D')]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment