cli.py 5.47 KB
Newer Older
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
1
2
from argparse import ArgumentParser, FileType, RawDescriptionHelpFormatter
from re import findall
3

4
from Bio import SeqIO
5

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
6
from . import BackTranslate, usage, version, doc_split
7
from .util import protein_letters_3to1, subst_to_cds
8
9


10
def with_dna(input_handle, output_handle, offset, position, amino_acid):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
11
    """Get all variants that result in the observed amino acid change.
12

13
14
15
    :arg stream input_handle: Open readable handle to a FASTA file.
    :arg stream output_handle: Open writable handle to a file.
    :arg int offset: Position of the CDS start in the reference sequence.
16
    :arg int position: Position of the amino acid change (in p. coordinates).
Laros's avatar
Laros committed
17
    :arg str amino_acid: Observed amino acid.
18
19
20

    :returns set: Variants that lead to the observed amino acid change.
    """
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
21
    bt = BackTranslate()
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
22
    reference = str(next(SeqIO.parse(input_handle, 'fasta')).seq)
23
24
    codon_pos = offset - 1 + (position - 1) * 3
    codon = reference[codon_pos:codon_pos + 3]
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
25
    substitutions = bt.with_dna(codon, protein_letters_3to1[amino_acid])
26

Laros's avatar
Laros committed
27
    for subst in sorted(subst_to_cds(substitutions, (position - 1) * 3)):
28
        output_handle.write('{}\t{}\t{}\n'.format(*subst))
29
30


31
def without_dna(output_handle, position, reference_amino_acid, amino_acid):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
32
    """Get all variants that result in the observed amino acid change without
33
34
    making use of the transcript.

35
    :arg stream output_handle: Open writable handle to a file.
36
    :arg int position: Position of the amino acid change (in p. coordinates).
Laros's avatar
Laros committed
37
38
    :arg str reference_amino_acid: Observed amino acid.
    :arg str amino_acid: Observed amino acid.
39
40
41

    :returns set: Variants that lead to the observed amino acid change.
    """
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
42
43
    bt = BackTranslate()
    improvable = bt.improvable()
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
44

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
45
46
    substitutions = bt.without_dna(
        protein_letters_3to1[reference_amino_acid],
47
48
        protein_letters_3to1[amino_acid])

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
49
50
    if (protein_letters_3to1[reference_amino_acid],
            protein_letters_3to1[amino_acid]) in improvable:
51
52
53
        output_handle.write(
            'This substitution can be improved by using `with_dna`.\n')

Laros's avatar
Laros committed
54
    for subst in sorted(subst_to_cds(substitutions, (position - 1) * 3)):
55
56
57
        output_handle.write('{}\t{}\t{}\n'.format(*subst))


58
def find_stops(input_handle, output_handle, offset, compact):
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
59
    """Almost stop codon finder.
60
61
62
63

    :arg stream input_handle: Open readable handle to a FASTA file.
    :arg stream output_handle: Open writable handle to a file.
    :arg int offset: Position of the CDS start in the reference sequence.
64
    :arg bool compact: Output one line per position.
65
66
    """
    bt = BackTranslate()
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
67
    sequence = str(next(SeqIO.parse(input_handle, 'fasta')).seq)
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
68

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
69
    for index, codon in enumerate(findall('...', sequence[offset - 1:])):
70
71
        stop_positions = bt.with_dna(codon, '*')

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
72
        for position in sorted(stop_positions):
73
            if not compact:
Laros's avatar
Laros committed
74
                for subst in sorted(stop_positions[position]):
75
76
77
                    output_handle.write('{}\t{}\t{}\n'.format(
                        offset + (index * 3) + position, *subst))
            else:
78
                output_handle.write('{}\t{}\t{}\n'.format(
79
80
                    offset + (index * 3) + position,
                    list(stop_positions[position])[0][0],
Laros's avatar
Laros committed
81
82
                    ','.join(map(lambda x: x[1],
                    sorted(stop_positions[position])))))
83
84
85


def main():
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
86
87
88
89
90
91
92
    """Main entry point."""
    input_parser = ArgumentParser(add_help=False)
    input_parser.add_argument(
        'input_handle', metavar='INPUT', type=FileType('r'),
        help='input file in FASTA format')
    input_parser.add_argument(
        '-o', dest='offset', type=int, default=1,
93
94
        help='offset in the reference sequence (int default=%(default)s)')

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
95
96
97
98
    output_parser = ArgumentParser(add_help=False)
    output_parser.add_argument(
        'output_handle', metavar='OUTPUT', type=FileType('w'),
        help='output file')
99

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
100
    reference_aa_parser = ArgumentParser(add_help=False)
101
    reference_aa_parser.add_argument(
Laros's avatar
Laros committed
102
        'reference_amino_acid', type=str, help='amino acid, e.g., Asp')
103

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
104
    observed_parser = ArgumentParser(add_help=False)
105
106
107
    observed_parser.add_argument(
        'position', type=int, help='position, e.g., 92')
    observed_parser.add_argument(
Laros's avatar
Laros committed
108
        'amino_acid', type=str, help='amino acid, e.g., Tyr')
109

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
110
    parser = ArgumentParser(
111
        description=usage[0], epilog=usage[1],
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
112
        formatter_class=RawDescriptionHelpFormatter)
113
114
    parser.add_argument('-v', action='version', version=version(parser.prog))
    subparsers = parser.add_subparsers(dest='subcommand')
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
115
    subparsers.required = True
116

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
117
    subparser = subparsers.add_parser(
118
        'with_dna', parents=[input_parser, output_parser, observed_parser],
119
        description=doc_split(with_dna))
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
120
    subparser.set_defaults(func=with_dna)
121

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
122
    subparser = subparsers.add_parser(
123
        'without_dna',
124
        parents=[output_parser, reference_aa_parser, observed_parser],
125
        description=doc_split(without_dna))
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
126
    subparser.set_defaults(func=without_dna)
127

Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
128
129
    subparser = subparsers.add_parser(
        'find_stops', parents=[input_parser, output_parser],
130
        description=doc_split(find_stops))
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
131
    subparser.add_argument(
132
133
        '-c', dest='compact', default=False, action='store_true',
        help='compact output')
Jeroen F.J. Laros's avatar
Jeroen F.J. Laros committed
134
    subparser.set_defaults(func=find_stops)
135

136
    args = parser.parse_args()
137
138

    try:
139
        args.func(
140
            **dict((k, v) for k, v in vars(args).items() if k not in
141
            ('func', 'subcommand')))
142
143
    except IOError as error:
        parser.error(error)