dict_trie.py 3.64 KB
Newer Older
Laros's avatar
Laros committed
1
def _hamming(path, node, word, distance):
Laros's avatar
PEP8.  
Laros committed
2 3 4
    """Find the first path in the trie that is within a certain hamming
    distance of {word}. Note that this does not necessarily the one with the
    smallest distance.
Laros's avatar
Laros committed
5

Laros's avatar
Laros committed
6 7 8 9 10
    :arg str path: Path taken so far to reach the current node.
    :arg dict node: Current node.
    :arg str word: Query word.
    :arg int distance: Amount of errors we can still make.

Laros's avatar
Laros committed
11 12
    :returns str: A word in the trie that has Hamming distance of at most
        {distance} to {word}.
Laros's avatar
Laros committed
13 14 15 16
    """
    if distance < 0:
        return ''
    if not word:
Laros's avatar
Laros committed
17
        return path if '' in node else ''
Laros's avatar
Laros committed
18 19 20 21 22 23 24 25 26 27 28

    car, cdr = word[0], word[1:]
    for char in node:
        result = _hamming(
            path + char, node[char], cdr, distance - int(char != car))
        if result:
            return result

    return ''


Laros's avatar
Laros committed
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
def _levenshtein(path, node, word, distance):
    """
    """
    if distance < 0:
        return ''
    if not word:
        return path if '' in node else ''

    car, cdr = word[0], word[1:]

    # Deletion.
    result = _levenshtein(path, node, cdr, distance - 1)
    if result:
        return result

    for char in node:
        # Substitution and insertion.
        result = (
            _levenshtein(
                path + char, node[char], cdr, distance - int(char != car)) or
            _levenshtein(path + char, node[char], word, distance - 1))
        if result:
            return result

    return ''
Laros's avatar
Laros committed
54 55


56 57
class Trie(object):
    def __init__(self, words):
Laros's avatar
PEP8.  
Laros committed
58
        """Initialise the class.
59 60 61 62 63 64 65 66

        :arg list words: List of words.
        """
        self.root = {}

        self._build(words)

    def _build(self, words):
Laros's avatar
PEP8.  
Laros committed
67
        """Build the trie.
68 69 70 71 72 73 74

        :arg list words: List of words.
        """
        for word in words:
            self.add(word)

    def _find(self, word):
Laros's avatar
PEP8.  
Laros committed
75
        """Find the node after following the path in the trie given by {word}.
76 77 78

        :arg str word: A word.

Laros's avatar
Laros committed
79
        :returns dict: The node if found, {} otherwise.
80 81 82
        """
        node = self.root

Laros's avatar
Laros committed
83 84
        for char in word:
            if char not in node:
85
                return {}
Laros's avatar
Laros committed
86
            node = node[char]
87 88 89 90

        return node

    def __contains__(self, word):
Laros's avatar
Laros committed
91
        return '' in self._find(word)
92 93

    def add(self, word):
Laros's avatar
PEP8.  
Laros committed
94
        """Add a word to the trie.
Laros's avatar
Laros committed
95 96

        :arg str word: A word.
97 98 99
        """
        node = self.root

Laros's avatar
Laros committed
100 101 102 103
        for char in word:
            if char not in node:
                node[char] = {}
            node = node[char]
104 105 106 107

        node[''] = {}

    def has_prefix(self, word):
Laros's avatar
Laros committed
108
        return self._find(word) != {}
Laros's avatar
Laros committed
109 110 111

    def hamming(self, word, distance):
        return _hamming('', self.root, word, distance)
Laros's avatar
Laros committed
112 113

    def best_hamming(self, word, distance):
Laros's avatar
PEP8.  
Laros committed
114
        """Find the best match with {word} in the trie.
Laros's avatar
Laros committed
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130

        :arg str word: Query word.
        :arg int distance: Amount of errors we can still make.

        :returns str: Best match with {word}.
        """
        if word in self:
            return word

        for i in range(1, distance + 1):
            result = self.hamming(word, i)
            if result:
                return result

        return ''

Laros's avatar
Laros committed
131 132
    def levenshtein(self, word, distance):
        return _levenshtein('', self.root, word, distance)
133 134

    def best_levenshtein(self, word, distance):
Laros's avatar
PEP8.  
Laros committed
135
        """Find the best match with {word} in the trie.
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150

        :arg str word: Query word.
        :arg int distance: Amount of errors we can still make.

        :returns str: Best match with {word}.
        """
        if word in self:
            return word

        for i in range(1, distance + 1):
            result = self.levenshtein(word, i)
            if result:
                return result

        return ''