Commit ee360d37 authored by Alisa Muraveva's avatar Alisa Muraveva
Browse files

* substitution in variantchecker.py

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/mobile-2013@710 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent c4baab90
......@@ -1311,15 +1311,32 @@ def _add_transcript_info(mutator, transcript, output):
'Invalid letters in reference sequence.')
return
if '*' in cds_original.translate(table=transcript.txTable)[:-1]:
star_subst(cds_original,transcript)
return
protein_original = cds_original.translate(table=transcript.txTable,
to_stop=True)
protein_variant = cds_variant.translate(table=transcript.txTable,
to_stop=True)
#if '*' in cds_original.translate(table=transcript.txTable)[:-1]:
# star_subst(cds_original,transcript)
protein_original = cds_original.translate(table=transcript.txTable)
for start, stop, aa, scheme in transcript.transl_except:
if scheme!="p.":
s, e, a, sch = converting_coordinates((start, stop, aa, scheme), transcript.CM)
if protein_original[s] == '*':
protein_original=protein_original.tomutable()
protein_original[s] = aa
protein_original=protein_original.toseq()
if '*' in protein_original[:-1]:
output.addMessage(__file__, 3, 'ESTOP',
'In frame stop codon.')
protein_variant = cds_variant.translate(table=transcript.txTable)
for start, stop, aa, scheme in transcript.transl_except:
if scheme!="p.":
s, e, a, sch = converting_coordinates((start, stop, aa, scheme), transcript.CM)
if protein_variant[s] == '*':
protein_variant=protein_variant.tomutable()
protein_variant[s] = aa
protein_variant=protein_variant.toseq()
# print protein_original, "\n", protein_variant
# Note: addOutput('origCDS', ...) was first before the possible
# reverse complement operation above.
......@@ -1879,71 +1896,39 @@ def check_variant(description, output):
_add_batch_output(output)
#check_variant
def star_subst(sequence,transcript):
'''This function change txTable (a Table with genetic code) according to annotations in gGenBank file.
input:
sequence: nucleotide sequence of transcript
transcript: Locus object'''
t=transcript.transl_except
sequence_t=sequence.translate()
for j in t:
if j[3]!="c.":
j = converting_coordinates(j,transcript.CM)
for m in re.finditer("\*", str(sequence_t)):
coord = m.start()
start = coord * 3
end = coord * 3 + 2 #not important !!!!chain!!!!
for i in t:
if int(start)==int(i[0]) and int(end)==int(i[1]):
triplet= sequence[start:stop+1]
if t[2]=="Stop":
transcript.txTable.stop_codons.append(triplet)
output.addMessage(__file__, 3, 'ESTOP',
'In frame stop codon found.')
else:
transcript.txTable.forward_table[triplet] = t[2]
transcript.txTable.back_table[t[2]] = triplet
if triplet in transcript.txTable.stop_codons:
del transcript.txTable.stop_codons[transcript.txTable.stop_codons.index(triplet)]
output.addMessage(__file__, 2, 'WSTOP',
'In frame stop codon was substituted according to CDS annotation.')
break
output.addMessage(__file__, 3, 'ESTOP',
'In frame stop codon found.')
return
def converting_coordinates(create_exception_output, transript_cm):
''' This function converts coordinates with regard to CDS
input:
create_exception_output: tuple, which contain information about substitutions
transcript_cm: Locus.CM object
'''
start,end,aa,scheme=create_exception_output
if scheme == 'g.':
coding_main_s, coding_offset_s = transript_cm.g2x(start)
if coding_offset_s != 0:
output.addMessage(__file__, 3, 'EOFFSET',
''' This function converts coordinates with regard to CDS
input:
create_exception_output: tuple, which contain information about substitutions
transcript_cm: Locus.CM object
'''
start,end,aa,scheme=create_exception_output
if scheme == 'g.':
coding_main_s, coding_offset_s = transript_cm.g2x(start)
if coding_offset_s != 0:
output.addMessage(__file__, 3, 'EOFFSET',
'Reading frame offset')
return
coding_main_e, coding_offset_e = transript_cm.g2x(end)
if coding_offset_e != 0:
output.addMessage(__file__, 3, 'EOFFSET',
return
coding_main_e, coding_offset_e = transript_cm.g2x(end)
if coding_offset_e != 0:
output.addMessage(__file__, 3, 'EOFFSET',
'Reading frame offset')
return
start = coding_main_s
end = coding_main_e
elif scheme == 'p.':
# I think this is in LRGs, look at it later.
start = start * 3
end = end * 3
elif scheme == "c.":
pass
else:
output.addMessage(__file__, 3, 'ESCHEME',
'Error in transl_exception object') # TODO: Normal description
return
return start,end,aa,"c." # Now `position` is an index in the CDS.
return
start = coding_main_s/3
end = coding_main_e/3
elif scheme == 'c.':
# I think this is in LRGs, look at it later.
start = start/3
end = end/3
elif scheme == "p.":
pass
else:
output.addMessage(__file__, 3, 'ESCHEME',
'Error in transl_exception object') # TODO: Normal description
return
return start,end,aa,"c." # Now `position` is an index in the CDS.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment