util.py 970 Bytes
Newer Older
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
1
2
3
4
from Bio.Data import IUPACData


def _three_to_one():
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
5
    """The three letter to one letter table for amino acids including stop.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
6
7
8

    :returns dict: Three letter to one letter amino acids table.
    """
Laros's avatar
Laros committed
9
    return dict(list(map(lambda x: (str(x[0]), str(x[1])),
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
10
        IUPACData.protein_letters_3to1_extended.items())) + [('Ter', '*')])
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
11
12


13
def subst_to_cds(substitutions, offset):
14
    """Convert a set of substitutions to CDS coordinates.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
15
16
17
18
19

    :arg dict substitutions: Set of single nucleotide substitutions indexed by
        position.
    :arg int offset: Codon position in the CDS.

20
    :returns set: Substitutions in CDS coordinates.
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
21
22
23
24
25
    """
    variants = set()

    for position in substitutions:
        for substitution in substitutions[position]:
26
27
            variants.add(
                (position + offset + 1, substitution[0], substitution[1]))
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
28
29
30
31

    return variants


Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
32
protein_letters = IUPACData.protein_letters + '*'
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
33
protein_letters_3to1 = _three_to_one()