Commit c8d273cc authored by Vermaat's avatar Vermaat
Browse files

Refactor mutator module (fixes Trac #83)

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@528 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent 4cc55c61
This diff is collapsed.
......@@ -595,6 +595,29 @@ def protein_description(cds_stop, s1, s2) :
#protein_description
def visualise_sequence(sequence, max_length=25, flank_size=6):
"""
If the length of a sequence is larger than a certain maxvissize, the
string is clipped; otherwise the string is just returned.
@arg sequence: DNA sequence.
@type sequence: str
@arg max_length: Maximum length of visualised sequence.
@type max_length: int
@arg flank_size: Length of the flanks in clipped visualised sequence.
@type flank_size: int
@return: Either the original sequence, or an abbreviation of it.
@rtype: str
"""
if len(sequence) > max_length:
return '%s [%ibp] %s' % (sequence[:flank_size],
len(sequence) - flank_size * 2,
sequence[-flank_size:])
return sequence
#visualise_sequence
# Todo: cleanup
def _insert_tag(s, pos1, pos2, tag1, tag2):
"""
......
......@@ -19,6 +19,7 @@ import Bio.Seq
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from mutalyzer import config
from mutalyzer import util
from mutalyzer.grammar import Grammar
from mutalyzer.mutator import Mutator
......@@ -306,7 +307,7 @@ def apply_substitution(position, original, substitute, mutator, record, O):
(original, substitute, position))
return
mutator.subM(position, substitute)
mutator.substitution(position, substitute)
record.name(position, position, 'subst', mutator.orig[position - 1],
substitute, None)
......@@ -374,9 +375,13 @@ def apply_deletion_duplication(first, last, type, mutator, record, O,
'Sequence "%s" at position %s was given, however, ' \
'the HGVS notation prescribes that on the forward strand ' \
'it should be "%s" at position %s.' % (
mutator.visualiseLargeString(str(mutator.orig[first - 1:last])),
util.visualise_sequence(str(mutator.orig[first - 1:last]),
config.get('maxvissize'),
config.get('flankclipsize')),
util.format_range(first, last),
mutator.visualiseLargeString(str(mutator.orig[new_first - 1:new_stop])),
util.visualise_sequence(str(mutator.orig[new_first - 1:new_stop]),
config.get('maxvissize'),
config.get('flankclipsize')),
util.format_range(new_first, new_stop)))
if forward_roll != original_forward_roll and not reverse_strand:
......@@ -386,9 +391,13 @@ def apply_deletion_duplication(first, last, type, mutator, record, O,
O.addMessage(__file__, 1, 'IROLLBACK',
'Sequence "%s" at position %s was not corrected to "%s" at ' \
'position %s, since they reside in different exons.' % (
mutator.visualiseLargeString(str(mutator.orig[first - 1:last])),
util.visualise_sequence(str(mutator.orig[first - 1:last]),
config.get('maxvissize'),
config.get('flankclipsize')),
util.format_range(first, last),
mutator.visualiseLargeString(str(mutator.orig[incorrect_first - 1:incorrect_stop])),
util.visualise_sequence(str(mutator.orig[incorrect_first - 1:incorrect_stop]),
config.get('maxvissize'),
config.get('flankclipsize')),
util.format_range(incorrect_first, incorrect_stop)))
if reverse_roll and reverse_strand:
......@@ -398,17 +407,21 @@ def apply_deletion_duplication(first, last, type, mutator, record, O,
'Sequence "%s" at position %s was given, however, ' \
'the HGVS notation prescribes that on the reverse strand ' \
'it should be "%s" at position %s.' % (
mutator.visualiseLargeString(str(mutator.orig[first - 1:last])),
util.visualise_sequence(str(mutator.orig[first - 1:last]),
config.get('maxvissize'),
config.get('flankclipsize')),
util.format_range(first, last),
mutator.visualiseLargeString(str(mutator.orig[new_first - 1:new_stop])),
util.visualise_sequence(str(mutator.orig[new_first - 1:new_stop]),
config.get('maxvissize'),
config.get('flankclipsize')),
util.format_range(new_first, new_stop)))
# We don't go through the trouble of visualising the *corrected* variant
# and are happy with visualising what the user gave us.
if type == 'del':
mutator.delM(first, last)
mutator.deletion(first, last)
else:
mutator.dupM(first, last)
mutator.duplication(first, last)
record.name(first, last, type, '', '', (reverse_roll, forward_roll),
start_fuzzy=first_fuzzy,
......@@ -442,7 +455,9 @@ def apply_inversion(first, last, mutator, record, O):
O.addMessage(__file__, 2, 'WNOCHANGE',
'Sequence "%s" at position %i_%i is a palindrome ' \
'(its own reverse complement).' % (
mutator.visualiseLargeString(str(mutator.orig[first - 1:last])),
util.visualise_sequence(str(mutator.orig[first - 1:last]),
config.get('maxvissize'),
config.get('flankclipsize')),
first, last))
return
else:
......@@ -451,10 +466,13 @@ def apply_inversion(first, last, mutator, record, O):
'palindrome (the first %i nucleotide(s) are the reverse ' \
'complement of the last one(s)), the HGVS notation ' \
'prescribes that it should be "%s" at position %i_%i.' % (
mutator.visualiseLargeString(str(mutator.orig[first - 1:last])),
util.visualise_sequence(str(mutator.orig[first - 1:last]),
config.get('maxvissize'),
config.get('flankclipsize')),
first, last, snoop,
mutator.visualiseLargeString(
str(mutator.orig[first + snoop - 1: last - snoop])),
util.visualise_sequence(
str(mutator.orig[first + snoop - 1: last - snoop]),
config.get('maxvissize'), config.get('flankclipsize')),
first + snoop, last - snoop))
first += snoop
last -= snoop
......@@ -507,10 +525,10 @@ def apply_insertion(before, after, s, mutator, record, O):
# We don't go through the trouble of visualising the *corrected* variant
# and are happy with visualising what the user gave us.
mutator.insM(before, s)
mutator.insertion(before, s)
new_before = mutator.shiftpos(before)
new_stop = mutator.shiftpos(before) + insertion_length
new_before = mutator.shift(before)
new_stop = mutator.shift(before) + insertion_length
reverse_roll, forward_roll = util.roll(mutator.mutated, new_before + 1, new_stop)
......@@ -617,8 +635,10 @@ def apply_delins(first, last, delete, insert, mutator, record, output):
output.addMessage(__file__, 2, 'WNOCHANGE',
'Sequence "%s" at position %i_%i is identical to ' \
'the variant.' % (
mutator.visualiseLargeString(str(mutator.orig[first - 1:last])),
first, last))
util.visualise_sequence(str(mutator.orig[first - 1:last]),
config.get('maxvissize'),
config.get('flankclipsize')),
first, last))
return
delete_trimmed, insert_trimmed, lcp, lcs = util.trim_common(delete, insert)
......@@ -656,10 +676,12 @@ def apply_delins(first, last, delete, insert, mutator, record, output):
'Sequence "%s" at position %i_%i has the same prefix or ' \
'suffix as the inserted sequence "%s". The HGVS notation ' \
'prescribes that it should be "%s" at position %i_%i.' % (
mutator.visualiseLargeString(str(mutator.orig[first - 1:last])),
util.visualise_sequence(str(mutator.orig[first - 1:last]),
config.get('maxvissize'),
config.get('flankclipsize')),
first, last, insert, insert_trimmed, first + lcp, last - lcs))
mutator.delinsM(first + lcp, last - lcs, insert_trimmed)
mutator.delins(first + lcp, last - lcs, insert_trimmed)
record.name(first + lcp, last - lcs, 'delins', insert_trimmed, '', None)
#apply_delins
......@@ -1228,15 +1250,15 @@ def _add_transcript_info(mutator, transcript, output):
str(util.splice(mutator.orig, transcript.mRNA.positionList)))
output.addOutput('mutatedMRNA',
str(util.splice(mutator.mutated,
mutator.newSplice(transcript.mRNA.positionList))))
mutator.shift_sites(transcript.mRNA.positionList))))
# Add protein prediction to output.
if transcript.translate:
cds_original = Seq(str(util.splice(mutator.orig, transcript.CDS.positionList)),
IUPAC.unambiguous_dna)
cds_variant = Seq(str(util.__nsplice(mutator.mutated,
mutator.newSplice(transcript.mRNA.positionList),
mutator.newSplice(transcript.CDS.location),
mutator.shift_sites(transcript.mRNA.positionList),
mutator.shift_sites(transcript.CDS.location),
transcript.CM.orientation)),
IUPAC.unambiguous_dna)
......@@ -1294,7 +1316,7 @@ def _add_transcript_info(mutator, transcript, output):
else:
cds_length = util.cds_length(
mutator.newSplice(transcript.CDS.positionList))
mutator.shift_sites(transcript.CDS.positionList))
descr, first, last_original, last_variant = \
util.protein_description(cds_length, protein_original,
protein_variant)
......@@ -1616,8 +1638,8 @@ def check_variant(description, output):
cds_original = Seq(str(util.splice(mutator.orig, transcript.CDS.positionList)),
IUPAC.unambiguous_dna)
cds_variant = Seq(str(util.__nsplice(mutator.mutated,
mutator.newSplice(transcript.mRNA.positionList),
mutator.newSplice(transcript.CDS.location),
mutator.shift_sites(transcript.mRNA.positionList),
mutator.shift_sites(transcript.CDS.location),
transcript.CM.orientation)),
IUPAC.unambiguous_dna)
......@@ -1648,7 +1670,7 @@ def check_variant(description, output):
to_stop=True)
try:
cds_length = util.cds_length(
mutator.newSplice(transcript.CDS.positionList))
mutator.shift_sites(transcript.CDS.positionList))
transcript.proteinDescription = util.protein_description(
cds_length, protein_original, protein_variant)[0]
except IndexError:
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment