Commit 74a5eee0 authored by Jeroen F.J. Laros's avatar Jeroen F.J. Laros
Browse files

Returning dict instead of defaultdict, updated documentation.

parent 1af509f1
......@@ -2,33 +2,44 @@
This library provides functions for back translation from amino acids to
nucleotides.
from __future__ import unicode_literals
>>> from __future__ import unicode_literals
from backtranslate.backtranslate import BackTranslate
>>> from backtranslate.backtranslate import BackTranslate
# Create a class instance, optionally giving the translation table id.
bt = BackTranslate()
# Find all substitutions that transform the codon 'TTG' into a stop codon.
substitutions = bt.with_dna('TGG', '*')
>>> # Create a class instance, optionally giving the translation table id.
>>> bt = BackTranslate()
>>> # Find all substitutions that transform the codon 'TTG' into a stop
>>> # codon.
>>> bt.with_dna('TGG', '*')
{1: set([('G', 'A')]), 2: set([('G', 'A')])}
Sometimes we do not have access to the DNA sequence so we have to find
possible substitutions from the amino acids directly.
# Find all substitutions that transform a Tryptophan into a stop codon.
substitutions = bt.without_dna('W', '*')
>>> # Find all substitutions that transform a Tryptophan into a stop codon.
>>> bt.without_dna('W', '*')
{1: set([('G', 'A')]), 2: set([('G', 'A')])}
To find out which substitution predictions can be improved by adding codon
information, use the following function.
bt.improvable()
>>> bt.improvable()
set([('I', 'L'), ('R', 'W'), ('Q', 'H'), ('C', '*'), ('*', 'W'),
('K', 'N'), ('C', 'W'), ('S', 'R'), ('L', 'I'), ('*', 'S'), ('S', '*'),
('L', '*'), ('L', 'M'), ('L', 'F'), ('*', 'L'), ('D', 'E'), ('R', 'G'),
('S', 'C'), ('E', 'D'), ('R', 'S'), ('N', 'K'), ('H', 'Q'), ('S', 'T'),
('T', 'S'), ('G', 'R'), ('L', 'V'), ('I', 'M'), ('F', 'L'), ('*', 'Y'),
('Y', '*'), ('V', 'L'), ('R', '*')])
To get substitutions in a readable format, we can use the following:
from backtranslate.util import subst_to_cds
>>> from backtranslate.util import subst_to_cds
# Transform the substitutions to CDS coordinates.
variants = subst_to_cds(substitutions, 12)
>>> substitutions = bt.without_dna('W', '*')
>>> # Transform the substitutions to CDS coordinates.
>>> subst_to_cds(substitutions, 12)
set([(15, 'G', 'A'), (14, 'G', 'A')])
## Command line interface
Use the command `backtranslate` to find substitutions that explain an amino
......
......@@ -43,7 +43,7 @@ def reverse_translation_table(table_id=1):
for codon in forward_table.forward_table:
back_table[str(forward_table.forward_table[codon])].add(str(codon))
return back_table
return dict(back_table)
class BackTranslate(object):
......@@ -64,8 +64,8 @@ class BackTranslate(object):
Find single nucleotide substitutions that given a reference codon
explains an observed amino acid.
:arg dictsubstitutions: Set of single nucleotide substitutions indexed
by position.
:arg defaultdict(set) substitutions: Set of single nucleotide
substitutions indexed by position.
:arg str reference_codon: Original codon.
:arg str amino_acid: Observed amino acid.
"""
......@@ -92,7 +92,7 @@ class BackTranslate(object):
self._one_subst(substitutions, reference_codon, amino_acid)
return substitutions
return dict(substitutions)
def without_dna(self, reference_amino_acid, amino_acid):
......@@ -111,7 +111,7 @@ class BackTranslate(object):
for reference_codon in self._back_table[reference_amino_acid]:
self._one_subst(substitutions, reference_codon, amino_acid)
return substitutions
return dict(substitutions)
def improvable(self):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment