backtranslate.py 4.13 KB
Newer Older
1
2
from collections import defaultdict

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
3
from Bio.Data import CodonTable
4
5
6
from Levenshtein import hamming


Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
7
def cmp_subst(subst_1, subst_2):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
8
    """Compare two substitution sets.
9

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
10
11
    :arg dict subst_1: Substitution set.
    :arg dict subst_2: Substitution set.
12

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
13
    :returns bool: True if `subst_1` equals `subst2`, False otherwise.
14
    """
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
15
    if len(subst_1) != len(subst_2):
16
17
        return False

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
18
19
    for item in subst_1:
        if subst_1[item] != subst_2[item]:
20
21
22
23
24
            return False

    return True


25
def reverse_translation_table(table_id=1):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
26
    """Calculate a reverse translation table.
27
28
29
30
31
32
33
34

    :arg int table_id: Translation table id.

    :returns dict: Set of possible codons indexed by amino acid.
    """
    forward_table = CodonTable.unambiguous_dna_by_id[table_id]
    back_table = defaultdict(set)

Laros's avatar
Laros committed
35
    back_table['*'] = set(map(str, forward_table.stop_codons))
36
37

    for codon in forward_table.forward_table:
Laros's avatar
Laros committed
38
        back_table[str(forward_table.forward_table[codon])].add(str(codon))
39

40
    return dict(back_table)
41
42


Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
43
class BackTranslate(object):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
44
    """Back translation."""
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
45
    def __init__(self, table_id=1):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
46
        """
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
47
48
49
        :arg int table_id: Translation table id.
        """
        self._back_table = reverse_translation_table(table_id)
50

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
51
    def _one_subst(self, substitutions, reference_codon, amino_acid):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
52
        """Find single nucleotide substitutions that given a reference codon
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
53
        explains an observed amino acid.
54

55
56
        :arg defaultdict(set) substitutions: Set of single nucleotide
            substitutions indexed by position.
Laros's avatar
Laros committed
57
58
        :arg str reference_codon: Original codon.
        :arg str amino_acid: Observed amino acid.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
59
60
61
62
63
64
65
        """
        for codon in self._back_table[amino_acid]:
            if hamming(codon, reference_codon) == 1:
                for position in range(3):
                    if codon[position] != reference_codon[position]:
                        substitutions[position].add(
                            (reference_codon[position], codon[position]))
66

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
67
    def with_dna(self, reference_codon, amino_acid):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
68
        """Find single nucleotide substitutions that given a reference codon
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
69
        explains an observed amino acid.
70

Laros's avatar
Laros committed
71
72
        :arg str reference_codon: Original codon.
        :arg str amino_acid: Observed amino acid.
73

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
74
75
76
77
        :returns dict: Set of single nucleotide substitutions indexed by
            position.
        """
        substitutions = defaultdict(set)
78

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
79
        self._one_subst(substitutions, reference_codon, amino_acid)
80

81
        return dict(substitutions)
82

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
83
    def without_dna(self, reference_amino_acid, amino_acid):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
84
85
        """Find single nucleotide substitutions that given a reference amino
        acid explains an observed amino acid.
86

Laros's avatar
Laros committed
87
88
        :arg str reference_amino_acid: Original amino acid.
        :arg str amino_acid: Observed amino acid.
89

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
90
91
92
93
        :returns dict: Set of single nucleotide substitutions indexed by
            position.
        """
        substitutions = defaultdict(set)
94

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
95
96
        for reference_codon in self._back_table[reference_amino_acid]:
            self._one_subst(substitutions, reference_codon, amino_acid)
97

98
        return dict(substitutions)
99

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
100
    def improvable(self):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
101
102
        """Calculate all pairs of amino acid substututions that can be improved
        by looking at the underlying codon.
103

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
104
105
106
        :returns list: List of improvable substitutions.
        """
        substitutions = set()
107

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
108
109
110
111
112
113
114
115
116
117
118
119
120
        for reference_amino_acid in self._back_table:
            for sample_amino_acid in self._back_table:
                substitutions_without_dna = self.without_dna(
                    reference_amino_acid, sample_amino_acid)
                for codon in self._back_table[reference_amino_acid]:
                    substitutions_with_dna = self.with_dna(
                        codon, sample_amino_acid)
                    if (substitutions_with_dna and not
                            cmp_subst(substitutions_without_dna,
                            substitutions_with_dna) and
                            reference_amino_acid != sample_amino_acid):
                        substitutions.add(
                            (reference_amino_acid, sample_amino_acid))
121

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
122
        return substitutions