Commit bbb84ae2 by Jeroen F.J. Laros

### Added more informative output to the approximate matching algorithms.

parent 64f36cef
 import itertools def _add(root, word): """Add a word to the trie. ... ... @@ -93,7 +96,7 @@ def _fill(node, alphabet, length): _fill(node[char], alphabet, length - 1) def _hamming(path, node, word, distance): def _hamming(path, node, word, distance, cigar): """Find all paths in the trie that are within a certain hamming distance of {word}. ... ... @@ -109,17 +112,24 @@ def _hamming(path, node, word, distance): return if not word: if '' in node: yield path yield (path, distance, cigar) return car, cdr = word[0], word[1:] for char in node: if char == car: penalty = 0 operation = '=' else: penalty = 1 operation = 'X' for result in _hamming( path + char, node[char], cdr, distance - int(char != car)): path + char, node[char], cdr, distance - penalty, cigar + operation): yield result def _levenshtein(path, node, word, distance): def _levenshtein(path, node, word, distance, cigar): """Find all paths in the trie that are within a certain Levenshtein distance of {word}. ... ... @@ -135,24 +145,31 @@ def _levenshtein(path, node, word, distance): return if not word: if '' in node: yield path yield (path, distance, cigar) car, cdr = '', '' else: car, cdr = word[0], word[1:] # Deletion. for result in _levenshtein(path, node, cdr, distance - 1): for result in _levenshtein(path, node, cdr, distance - 1, cigar + 'D'): yield result for char in node: # Substitution. if car: if char == car: penalty = 0 operation = '=' else: penalty = 1 operation = 'X' for result in _levenshtein( path + char, node[char], cdr, distance - int(char != car)): path + char, node[char], cdr, distance - penalty, cigar + operation): yield result # Insertion. for result in _levenshtein( path + char, node[char], word, distance - 1): path + char, node[char], word, distance - 1, cigar + 'I'): yield result ... ... @@ -186,8 +203,14 @@ class Trie(object): def fill(self, alphabet, length): _fill(self.root, alphabet, length) def all_hamming_(self, word, distance): return itertools.imap( lambda x: (x[0], distance - x[1], x[2]), _hamming('', self.root, word, distance, '')) def all_hamming(self, word, distance): return _hamming('', self.root, word, distance) return itertools.imap( lambda x: x[0], _hamming('', self.root, word, distance, '')) def hamming(self, word, distance): try: ... ... @@ -213,8 +236,14 @@ class Trie(object): return '' def all_levenshtein_(self, word, distance): return itertools.imap( lambda x: (x[0], distance - x[1], x[2]), _levenshtein('', self.root, word, distance, '')) def all_levenshtein(self, word, distance): return _levenshtein('', self.root, word, distance) return itertools.imap( lambda x: x[0], _levenshtein('', self.root, word, distance, '')) def levenshtein(self, word, distance): try: ... ...
 ... ... @@ -165,3 +165,12 @@ class TestTrie(object): def test_levenshtein_1_ins(self): assert self._trie.levenshtein('abbc', 1) == 'abc' def test_all_hamming__2(self): assert list(self._trie.all_hamming_('acb', 2)) == [ ('abc', 2, '=XX'), ('abd', 2, '=XX')] def test_all_levenshtein__2(self): assert list(self._trie.all_levenshtein_('acb', 2)) == [ ('abc', 2, '=D=I'), ('abd', 2, '=D=I'), ('abc', 2, '=XX'), ('abd', 2, '=XX'), ('abc', 2, '=I=D')]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment