Skip to content
Snippets Groups Projects
Commit 851e71fe authored by Vermaat's avatar Vermaat
Browse files

Visualise protein change, also with alternative start

In the case of an alternative start codon (in the reference CDS),
protein changes were not visualised. This is fixed and a WALTSTART
warning is also issued.

Also, if a new non-reference start codon is created by the variant,
visualise this as such.
parent ae70ddfd
No related branches found
No related tags found
No related merge requests found
......@@ -600,10 +600,6 @@ def protein_description(cds_stop, s1, s2):
else:
description = in_frame_description(s1, s2)
if not s2 or s1[0] != s2[0]:
# Mutation in start codon.
return 'p.?', description[1], description[2], description[3]
return description
#protein_description
......
......@@ -1329,6 +1329,30 @@ def _add_transcript_info(mutator, transcript, output):
# Add protein prediction to output.
if transcript.translate:
# Data added to the output object:
# - origCDS: Original CDS.
# - newCDS: Variant CDS.
# - oldprotein: Original protein sequence, ending with '*'.
# - newprotein:
# - If variant CDS could not be translated, this is '?'.
# - If start codon was affected, this is '?'.
# - If variant protein equals original protein, this is unset.
# - Otherwise, this is the variant protein sequence, ending with
# '*' if a stop codon was found.
# - altStart:
# - If variant CDS could be translated and variant created a new
# start codon, this is the new start codon.
# - Unset otherwise.
# - altProtein:
# - If variant CDS could be translated and variant created a new
# start codon, and variant protein does not equal original
# protein, this is the variant protein sequence, ending with '*'
# if a stop codon was found.
# - oldProteinFancy, newProteinFancy, altProteinFancy: Versions of the
# protein sequences formatted for HTML.
# - oldProteinFancyText, newProteinFancyText, altProteinFancyText:
# Versions of the protein sequences formatted for plaintext.
cds_original = util.splice(mutator.orig, transcript.CDS.positionList)
cds_original.alphabet = IUPAC.unambiguous_dna
......@@ -1343,8 +1367,6 @@ def _add_transcript_info(mutator, transcript, output):
transcript.CM.orientation)
cds_variant.alphabet = IUPAC.unambiguous_dna
#output.addOutput('origCDS', cds_original)
if transcript.CM.orientation == -1:
cds_original = cds_original.reverse_complement()
cds_variant = cds_variant.reverse_complement()
......@@ -1361,8 +1383,17 @@ def _add_transcript_info(mutator, transcript, output):
'In frame stop codon found.')
return
if not protein_original.startswith('M'):
protein_original = 'M' + protein_original[1:]
output.addMessage(__file__, 2, 'WALTSTART',
'Reference protein translated from alternative '
'start codon %s.' % (unicode(cds_original[:3])))
protein_variant = cds_variant.translate(table=transcript.txTable)
if protein_variant:
protein_variant = 'M' + protein_variant[1:]
# Up to and including the first '*', or the entire string.
try:
stop = unicode(protein_variant).index('*')
......@@ -1370,8 +1401,6 @@ def _add_transcript_info(mutator, transcript, output):
except ValueError:
pass
# Note: addOutput('origCDS', ...) was first before the possible
# reverse complement operation above.
output.addOutput('origCDS', unicode(cds_original))
output.addOutput("newCDS", unicode(cds_variant[:len(protein_variant) * 3]))
......@@ -1381,34 +1410,43 @@ def _add_transcript_info(mutator, transcript, output):
# website.py.
# I think it would also be nice to include the mutated list of splice
# sites.
if not protein_variant or unicode(protein_variant[0]) != 'M':
# Todo: Protein differences are not color-coded,
# use something like below in protein_description().
if not protein_variant or unicode(cds_variant[:3]) != unicode(cds_original[:3]):
# Could not translate variant CDS or variant hits start codon. In
# that case we predict p.? and see if a non-reference start codon
# was created.
util.print_protein_html(unicode(protein_original), 0, 0,
output, 'oldProteinFancy')
util.print_protein_html(unicode(protein_original), 0, 0,
output, 'oldProteinFancyText', text=True)
if unicode(cds_variant[0:3]) in \
CodonTable.unambiguous_dna_by_id[transcript.txTable].start_codons:
output.addOutput('newprotein', '?')
util.print_protein_html('?', 0, 0, output, 'newProteinFancy')
util.print_protein_html('?', 0, 0, output,
'newProteinFancyText', text=True)
output.addOutput('altStart', unicode(cds_variant[0:3]))
if unicode(protein_original[1:]) != unicode(protein_variant[1:]):
output.addOutput('altProtein',
'M' + unicode(protein_variant[1:]))
util.print_protein_html('M' + unicode(protein_variant[1:]), 0,
0, output, 'altProteinFancy')
util.print_protein_html('M' + unicode(protein_variant[1:]), 0,
0, output, 'altProteinFancyText', text=True)
else :
output.addOutput('newprotein', '?')
util.print_protein_html('?', 0, 0, output, 'newProteinFancy')
util.print_protein_html('?', 0, 0, output,
'newProteinFancyText', text=True)
output.addOutput('newprotein', '?')
util.print_protein_html('?', 0, 0, output, 'newProteinFancy')
util.print_protein_html('?', 0, 0, output,
'newProteinFancyText', text=True)
if protein_variant:
# Variant CDS could be translated, but start codon was
# affected.
start_codons = CodonTable.unambiguous_dna_by_id[
transcript.txTable].start_codons
if unicode(cds_variant[0:3]) in start_codons:
# A non-reference start codon was created.
output.addOutput('altStart', unicode(cds_variant[0:3]))
if unicode(protein_original) != unicode(protein_variant):
# The resulting protein is actually different, so
# visualise the difference.
# Todo: Protein differences are not color-coded,
# use something like below in protein_description().
output.addOutput('altProtein', unicode(protein_variant))
util.print_protein_html(unicode(protein_variant), 0,
0, output, 'altProteinFancy')
util.print_protein_html(unicode(protein_variant), 0,
0, output, 'altProteinFancyText', text=True)
else:
# Variant CDS was translated and start codon is unchanged.
cds_length = util.cds_length(
mutator.shift_sites(transcript.CDS.positionList))
descr, first, last_original, last_variant = \
......@@ -1840,23 +1878,27 @@ def check_variant(description, output):
# So we manually translate the first codon to M. But only
# if it was not affected by the variant.
protein_variant = cds_variant.translate(table=transcript.txTable)
if unicode(cds_variant[:3]) == unicode(cds_original[:3]):
if protein_variant and unicode(cds_variant[:3]) == unicode(cds_original[:3]):
protein_variant = protein_original[0] + protein_variant[1:]
# Up to and including the first '*', or the entire string.
try:
stop = unicode(protein_variant).index('*')
protein_variant = protein_variant[:stop + 1]
except ValueError:
pass
try:
cds_length = util.cds_length(
mutator.shift_sites(transcript.CDS.positionList))
transcript.proteinDescription = util.protein_description(
cds_length, unicode(protein_original), unicode(protein_variant))[0]
except IndexError:
# Todo: Probably CDS start was hit by removal of exon..
# Up to and including the first '*', or the entire string.
try:
stop = unicode(protein_variant).index('*')
protein_variant = protein_variant[:stop + 1]
except ValueError:
pass
try:
cds_length = util.cds_length(
mutator.shift_sites(transcript.CDS.positionList))
transcript.proteinDescription = util.protein_description(
cds_length, unicode(protein_original), unicode(protein_variant))[0]
except IndexError:
# Todo: Probably CDS start was hit by removal of exon..
transcript.proteinDescription = 'p.?'
else:
# Mutation in start codon.
transcript.proteinDescription = 'p.?'
else:
......
......@@ -1357,6 +1357,7 @@ class TestVariantchecker(MutalyzerTest):
"""
check_variant('AB026906.1:c.276C>T', self.output)
assert 'AB026906.1(SDHD_i001):p.(=)' in self.output.getOutput('protDescriptions')
assert not self.output.getOutput('newProteinFancy')
@fix(cache('NM_024426.4'))
def test_synonymous_p_is_alt_start(self):
......@@ -1366,17 +1367,27 @@ class TestVariantchecker(MutalyzerTest):
"""
check_variant('NM_024426.4:c.1107A>G', self.output)
assert 'NM_024426.4(WT1_i001):p.(=)' in self.output.getOutput('protDescriptions')
assert 'CTG' in self.output.getOutput('altStart')
assert not self.output.getOutput('newProteinFancy')
waltstart = self.output.getMessagesWithErrorCode('WALTSTART')
assert len(waltstart) == 1
assert self.output.getOutput('oldprotein')[0].startswith('M')
assert not self.output.getOutput('newProtein')
assert not self.output.getOutput('altStart')
assert not self.output.getOutput('altProteinFancy')
@fix(cache('AB026906.1'))
def test_start_codon(self):
"""
Mutation of start codon should yield a p.? description.
"""
check_variant('AB026906.1:c.1A>T', self.output)
check_variant('AB026906.1:c.1A>G', self.output)
assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions')
west = self.output.getMessagesWithErrorCode('WSTART')
assert len(west) == 1
wstart = self.output.getMessagesWithErrorCode('WSTART')
assert len(wstart) == 1
assert self.output.getOutput('newprotein')[0] == '?'
waltstart = self.output.getMessagesWithErrorCode('WALTSTART')
assert len(waltstart) == 0
assert not self.output.getOutput('altStart')
@fix(cache('NM_024426.4'))
def test_start_codon_alt_start(self):
......@@ -1388,3 +1399,77 @@ class TestVariantchecker(MutalyzerTest):
assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions')
west = self.output.getMessagesWithErrorCode('WSTART')
assert len(west) == 1
assert self.output.getOutput('newprotein')[0] == '?'
waltstart = self.output.getMessagesWithErrorCode('WALTSTART')
assert len(waltstart) == 1
assert not self.output.getOutput('altStart')
@fix(cache('AB026906.1'))
def test_start_codon_yield_start_p_is(self):
"""
Silent mutation creating new start codon should yield a p.?
description. The visualisation should also render the case for the new
start codon.
"""
check_variant('AB026906.1:c.1A>T', self.output) # yields TTG start codon
assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions')
wstart = self.output.getMessagesWithErrorCode('WSTART')
assert len(wstart) == 1
assert self.output.getOutput('newprotein')[0] == '?'
waltstart = self.output.getMessagesWithErrorCode('WALTSTART')
assert len(waltstart) == 0
assert self.output.getOutput('oldprotein')[0].startswith('M')
assert 'TTG' in self.output.getOutput('altStart')
assert not self.output.getOutput('altProteinFancy')
@fix(cache('NM_024426.4'))
def test_start_codon_alt_start_yield_start_p_is(self):
"""
Silent mutation creating new start codon should yield a p.?
description, also with an alternative start codon. The visualisation
should also render the case for the new start codon.
"""
check_variant('NM_024426.4:c.1C>A', self.output) # yields ATG start codon
assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions')
west = self.output.getMessagesWithErrorCode('WSTART')
assert len(west) == 1
assert self.output.getOutput('newprotein')[0] == '?'
waltstart = self.output.getMessagesWithErrorCode('WALTSTART')
assert len(waltstart) == 1
assert self.output.getOutput('oldprotein')[0].startswith('M')
assert 'ATG' in self.output.getOutput('altStart')
assert not self.output.getOutput('altProteinFancy')
@fix(cache('AB026906.1'))
def test_start_codon_yield_start(self):
"""
Mutation creating new start codon should yield a p.? description. The
visualisation should also render the case for the new start codon.
"""
check_variant('AB026906.1:c.1_4delinsTTGA', self.output) # yields TTG start codon
assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions')
wstart = self.output.getMessagesWithErrorCode('WSTART')
assert len(wstart) == 1
assert self.output.getOutput('newprotein')[0] == '?'
waltstart = self.output.getMessagesWithErrorCode('WALTSTART')
assert len(waltstart) == 0
assert 'TTG' in self.output.getOutput('altStart')
assert self.output.getOutput('altProtein')[0].startswith('M')
@fix(cache('NM_024426.4'))
def test_start_codon_alt_start_yield_start(self):
"""
Mutation creating new start codon should yield a p.? description, also
with an alternative start codon. The visualisation should also render
the new start codon.
"""
check_variant('NM_024426.4:c.1_4delinsATGA', self.output) # yields ATG start codon
assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions')
west = self.output.getMessagesWithErrorCode('WSTART')
assert len(west) == 1
assert self.output.getOutput('newprotein')[0] == '?'
waltstart = self.output.getMessagesWithErrorCode('WALTSTART')
assert len(waltstart) == 1
assert self.output.getOutput('oldprotein')[0].startswith('M')
assert 'ATG' in self.output.getOutput('altStart')
assert self.output.getOutput('altProtein')[0].startswith('M')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment