dict_trie.py 5.77 KB
Newer Older
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
1 2
def _add(root, word):
    """Add a word to the trie.
3

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
4 5 6 7
    :arg dict root: Root of the trie.
    :arg str word: A word.
    """
    node = root
8

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
    for char in word:
        if char not in node:
            node[char] = {}
        node = node[char]

    node[''] = {}


def _find(root, word):
    """Find the node after following the path in the trie given by {word}.

    :arg dict root: Root of the trie.
    :arg str word: A word.

    :returns dict: The node if found, {} otherwise.
24
    """
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
25 26 27 28 29 30
    node = root

    for char in word:
        if char not in node:
            return {}
        node = node[char]
31

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
32
    return node
33 34


35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
def _remove(node, word):
    """Remove a word from a trie.

    :arg dict node: Current node.
    :arg str word: Word to be removed.

    :returns bool:
    """
    if not word:
        if '' in node:
            node.pop('')
            return True
        return False

    car, cdr = word[0], word[1:]
    if car not in node:
        return False

    result = _remove(node[car], cdr)
    if result:
        if not node[car]:
            node.pop(car)

    return result


Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
61
def _iterate(path, node):
62 63 64 65 66 67 68 69 70 71
    """Convert a trie into a list.

    :arg str path: Path taken so far to reach the current node.
    :arg dict node: Current node.

    :returns iter: All words in the trie.
    """
    if '' in node:
        yield path

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
72 73
    for char in node:
        for result in _iterate(path + char, node[char]):
74 75 76
            yield result


Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
def _fill(node, alphabet, length):
    """Make a full trie using the characters in {alphabet}.

    :arg dict node: Current node.
    :arg tuple alphabet: Used alphabet.
    :arg int length: Length of the words to be generated.

    :returns iter: Trie containing all words of length {length} over alphabet
        {alphabet}.
    """
    if not length:
        node[''] = {}
        return

    for char in alphabet:
        node[char] = {}
        _fill(node[char], alphabet, length - 1)


96
def _hamming(path, node, word, distance):
97 98
    """Find all paths in the trie that are within a certain hamming distance of
    {word}.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
99

100 101 102
    :arg str path: Path taken so far to reach the current node.
    :arg dict node: Current node.
    :arg str word: Query word.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
103
    :arg int distance: Amount of allowed errors.
104

105
    :returns iter: All word in the trie that have Hamming distance of at most
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
106
        {distance} to {word}.
107 108
    """
    if distance < 0:
109
        return
110
    if not word:
111 112 113
        if '' in node:
            yield path
        return
114 115 116

    car, cdr = word[0], word[1:]
    for char in node:
117
        for result in _hamming(
Jeroen F.J. Laros's avatar
PEP8.  
Jeroen F.J. Laros committed
118
                path + char, node[char], cdr, distance - int(char != car)):
119
            yield result
120 121


122
def _levenshtein(path, node, word, distance):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
123 124 125 126 127 128 129 130 131 132
    """Find all paths in the trie that are within a certain Levenshtein
    distance of {word}.

    :arg str path: Path taken so far to reach the current node.
    :arg dict node: Current node.
    :arg str word: Query word.
    :arg int distance: Amount of allowed errors.

    :returns iter: All word in the trie that have Hamming distance of at most
        {distance} to {word}.
133 134
    """
    if distance < 0:
135
        return
136
    if not word:
137 138
        if '' in node:
            yield path
139 140 141
        car, cdr = '', ''
    else:
        car, cdr = word[0], word[1:]
142 143

    # Deletion.
144 145
    for result in _levenshtein(path, node, cdr, distance - 1):
        yield result
146 147

    for char in node:
148 149 150 151 152 153
        # Substitution.
        if car:
            for result in _levenshtein(
                    path + char, node[char], cdr, distance - int(char != car)):
                yield result
        # Insertion.
154 155 156
        for result in _levenshtein(
                path + char, node[char], word, distance - 1):
            yield result
157 158


159
class Trie(object):
160
    def __init__(self, words=None):
Jeroen F.J. Laros's avatar
PEP8.  
Jeroen F.J. Laros committed
161
        """Initialise the class.
162 163 164 165 166

        :arg list words: List of words.
        """
        self.root = {}

167
        if words:
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
168 169
            for word in words:
                self.add(word)
170 171

    def __contains__(self, word):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
172
        return '' in _find(self.root, word)
173

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
174 175
    def __iter__(self):
        return _iterate('', self.root)
176

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
177 178
    def add(self, word):
        _add(self.root, word)
179

180 181 182
    def remove(self, word):
        return _remove(self.root, word)

183
    def has_prefix(self, word):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
184
        return _find(self.root, word) != {}
185

186 187 188
    def fill(self, alphabet, length):
        _fill(self.root, alphabet, length)

189
    def all_hamming(self, word, distance):
190
        return _hamming('', self.root, word, distance)
191

192 193 194 195 196 197
    def hamming(self, word, distance):
        try:
            return self.all_hamming(word, distance).next()
        except StopIteration:
            return ''

198
    def best_hamming(self, word, distance):
Jeroen F.J. Laros's avatar
PEP8.  
Jeroen F.J. Laros committed
199
        """Find the best match with {word} in the trie.
200 201

        :arg str word: Query word.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
202
        :arg int distance: Maximum allowed distance.
203 204 205

        :returns str: Best match with {word}.
        """
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
206
        if _find(self.root, word):
207 208 209 210 211 212 213 214 215
            return word

        for i in range(1, distance + 1):
            result = self.hamming(word, i)
            if result:
                return result

        return ''

216
    def all_levenshtein(self, word, distance):
217
        return _levenshtein('', self.root, word, distance)
218

219 220 221 222 223 224
    def levenshtein(self, word, distance):
        try:
            return self.all_levenshtein(word, distance).next()
        except StopIteration:
            return ''

225
    def best_levenshtein(self, word, distance):
Jeroen F.J. Laros's avatar
PEP8.  
Jeroen F.J. Laros committed
226
        """Find the best match with {word} in the trie.
227 228

        :arg str word: Query word.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
229
        :arg int distance: Maximum allowed distance.
230 231 232

        :returns str: Best match with {word}.
        """
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
233
        if _find(self.root, word):
234 235 236 237 238 239 240 241
            return word

        for i in range(1, distance + 1):
            result = self.levenshtein(word, i)
            if result:
                return result

        return ''