dict_trie.py 3.64 KB
Newer Older
1
def _hamming(path, node, word, distance):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
2 3 4
    """Find the first path in the trie that is within a certain hamming
    distance of {word}. Note that this does not necessarily the one with the
    smallest distance.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
5

6 7 8 9 10
    :arg str path: Path taken so far to reach the current node.
    :arg dict node: Current node.
    :arg str word: Query word.
    :arg int distance: Amount of errors we can still make.

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
11 12
    :returns str: A word in the trie that has Hamming distance of at most
        {distance} to {word}.
13 14 15 16
    """
    if distance < 0:
        return ''
    if not word:
17
        return path if '' in node else ''
18 19 20 21 22 23 24 25 26 27 28

    car, cdr = word[0], word[1:]
    for char in node:
        result = _hamming(
            path + char, node[char], cdr, distance - int(char != car))
        if result:
            return result

    return ''


29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
def _levenshtein(path, node, word, distance):
    """
    """
    if distance < 0:
        return ''
    if not word:
        return path if '' in node else ''

    car, cdr = word[0], word[1:]

    # Deletion.
    result = _levenshtein(path, node, cdr, distance - 1)
    if result:
        return result

    for char in node:
        # Substitution and insertion.
        result = (
            _levenshtein(
                path + char, node[char], cdr, distance - int(char != car)) or
            _levenshtein(path + char, node[char], word, distance - 1))
        if result:
            return result

    return ''
54 55


56 57
class Trie(object):
    def __init__(self, words):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
58
        """Initialise the class.
59 60 61 62 63 64 65 66

        :arg list words: List of words.
        """
        self.root = {}

        self._build(words)

    def _build(self, words):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
67
        """Build the trie.
68 69 70 71 72 73 74

        :arg list words: List of words.
        """
        for word in words:
            self.add(word)

    def _find(self, word):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
75
        """Find the node after following the path in the trie given by {word}.
76 77 78

        :arg str word: A word.

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
79
        :returns dict: The node if found, {} otherwise.
80 81 82
        """
        node = self.root

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
83 84
        for char in word:
            if char not in node:
85
                return {}
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
86
            node = node[char]
87 88 89 90

        return node

    def __contains__(self, word):
91
        return '' in self._find(word)
92 93

    def add(self, word):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
94
        """Add a word to the trie.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
95 96

        :arg str word: A word.
97 98 99
        """
        node = self.root

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
100 101 102 103
        for char in word:
            if char not in node:
                node[char] = {}
            node = node[char]
104 105 106 107

        node[''] = {}

    def has_prefix(self, word):
108
        return self._find(word) != {}
109 110 111

    def hamming(self, word, distance):
        return _hamming('', self.root, word, distance)
112 113

    def best_hamming(self, word, distance):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
114
        """Find the best match with {word} in the trie.
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130

        :arg str word: Query word.
        :arg int distance: Amount of errors we can still make.

        :returns str: Best match with {word}.
        """
        if word in self:
            return word

        for i in range(1, distance + 1):
            result = self.hamming(word, i)
            if result:
                return result

        return ''

131 132
    def levenshtein(self, word, distance):
        return _levenshtein('', self.root, word, distance)
133 134

    def best_levenshtein(self, word, distance):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
135
        """Find the best match with {word} in the trie.
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150

        :arg str word: Query word.
        :arg int distance: Amount of errors we can still make.

        :returns str: Best match with {word}.
        """
        if word in self:
            return word

        for i in range(1, distance + 1):
            result = self.levenshtein(word, i)
            if result:
                return result

        return ''