Skip to content
Snippets Groups Projects
Commit f7108427 authored by Laros's avatar Laros
Browse files

Finished generating the allele description.


git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@465 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent 70b4ff98
No related branches found
No related tags found
No related merge requests found
......@@ -76,7 +76,7 @@ def LongestCommonSubstring(s1, s2) :
return x_longest, y_longest, longest
#LongestCommonSubstring
def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end) :
"""
Give the HGVS description of the change from {s1} to {s2} in the range
{s1_start}..{s1_end} on {s1} and {s2_start}..{s2_end} on {s2}.
......@@ -100,13 +100,16 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
# Nothing happened.
if s1 == s2:
return "="
return "=", None
# Insertion / Duplication.
if s1_start == s1_end :
ins_length = s2_end - s2_start
dummy, shift = roll(s2, s2_start + 1, s2_end + 1)
# FIXME This needs to be checked.
# NM_002001.2:c.2_3insAT goes wrong.
# NM_002001.2:c.3dup too.
s1_start += shift + 1
s1_end += shift + 1
s2_start += shift + 1
......@@ -115,22 +118,19 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
if s2_start - ins_length >= 0 and \
s1[s1_start - ins_length:s1_start] == s2[s2_start:s2_end] :
if ins_length == 1 : # FIXME we forgot the ins of length 1
if ins_length == 1 :
hgvs = "%idup" % (s1_start)
allele.append(RawVar(start = s1_start,
type = "dup", hgvs = hgvs))
return hgvs
return hgvs, [RawVar(start = s1_start, type = "dup",
hgvs = hgvs)]
#if
hgvs = "%i_%idup" % (s1_start - ins_length + 1, s1_end)
allele.append(RawVar(start = s1_start - ins_length + 1,
end = s1_end, type = "dup", hgvs = hgvs))
return hgvs
return hgvs, [RawVar(start = s1_start - ins_length + 1,
end = s1_end, type = "dup", hgvs = hgvs)]
#if
hgvs = "%i_%iins%s" % (s1_start, s1_start + 1, s2[s2_start:s2_end])
allele.append(RawVar(start = s1_start, end = s1_start + 1,
inserted = s2[s2_start:s2_end], type = "ins", hgvs = hgvs))
return hgvs
return hgvs, [RawVar(start = s1_start, end = s1_start + 1,
inserted = s2[s2_start:s2_end], type = "ins", hgvs = hgvs)]
#if
# Deletion.
......@@ -139,30 +139,26 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
if s1_start + 1 == s1_end :
hgvs = "%idel" % (s1_start + shift + 1)
allele.append(RawVar(start = s1_start + shift + 1, type = "del",
hgvs = hgvs))
return hgvs
return hgvs, [RawVar(start = s1_start + shift + 1, type = "del",
hgvs = hgvs)]
#if
hgvs = "%i_%idel" % (s1_start + shift + 1, s1_end + shift)
allele.append(RawVar(start = s1_start + shift + 1,
s1_end = s1_end + shift, type = "del", hgvs = hgvs))
return hgvs
return hgvs, [RawVar(start = s1_start + shift + 1,
s1_end = s1_end + shift, type = "del", hgvs = hgvs)]
#if
# Substitution.
if s1_start + 1 == s1_end and s2_start + 1 == s2_end :
hgvs = "%i%s>%s" % (s1_start + 1, s1[s1_start], s2[s2_start])
allele.append(RawVar(start = s1_start + 1, deleted = s1[s1_start],
inserted = s2[s2_start], type = "subst", hgvs = hgvs))
return hgvs
return hgvs, [RawVar(start = s1_start + 1, deleted = s1[s1_start],
inserted = s2[s2_start], type = "subst", hgvs = hgvs)]
#if
# Simple InDel.
if s1_start + 1 == s1_end :
hgvs = "%idelins%s" % (s1_start + 1, s2[s2_start:s2_end])
allele.append(RawVar(start = s1_start + 1,
inserted = s2[s2_start:s2_end], type = "delins", hgvs = hgvs))
return hgvs
return hgvs, [RawVar(start = s1_start + 1,
inserted = s2[s2_start:s2_end], type = "delins", hgvs = hgvs)]
#if
# At this stage, we either have an inversion, an indel or a Compound
......@@ -181,9 +177,8 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
default = "%i_%idelins%s" % (s1_start + 1, s1_end, s2[s2_start:s2_end])
if not max(lcs_f_len, lcs_r_len) : # Optimisation, not really needed.
allele.append(RawVar(start = s1_start + 1, end = s1_end,
inserted = s2[s2_start:s2_end], type = "delins", hgvs = hgvs))
return default
return default, [RawVar(start = s1_start + 1, end = s1_end,
inserted = s2[s2_start:s2_end], type = "delins", hgvs = default)]
# Inversion.
if lcs_f_len <= lcs_r_len :
......@@ -197,9 +192,8 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
# Simple Inversion.
if s2_end - s2_start == lcs_r_len and s1_end - s1_start == lcs_r_len :
hgvs = "%i_%iinv" % (s1_start + 1, s1_end)
allele.append(RawVar(start = s1_start + 1, end = s1_end,
type = "inv", hgvs = hgvs))
return hgvs
return hgvs, [RawVar(start = s1_start + 1, end = s1_end,
type = "inv", hgvs = hgvs)]
#if
r1_len = s1_end_r - lcs_r_len
......@@ -214,20 +208,23 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
if r1_len or m2_len :
lcs = len(longest_common_suffix(s1[s1_start:s1_start + r1_len],
s2[s2_start:s2_start + m2_len]))
leftVariant = "%s;" % DNA_description(s1, s2,
leftHgvs, leftRv = DNA_description(s1, s2,
s1_start, s1_start + r1_len - lcs,
s2_start, s2_start + m2_len - lcs, allele)
s2_start, s2_start + m2_len - lcs)
leftVariant = "%s;" % leftHgvs
#if
if r2_len or m1_len :
lcp = len(longest_common_prefix(s1[s1_end - r2_len:s1_end],
s2[s2_end - m1_len:s2_end]))
rightVariant = ";%s" % DNA_description(s1, s2,
s1_end - r2_len + lcp, s1_end, s2_end - m1_len + lcp, s2_end,
allele)
rightHgvs, rightRv = DNA_description(s1, s2,
s1_end - r2_len + lcp, s1_end, s2_end - m1_len + lcp, s2_end)
rightVariant = ";%s" % rightHgvs
#if
partial = "%s%i_%iinv%s" % (leftVariant, s1_start + r1_len + 1,
s1_end - r2_len, rightVariant)
partialRv = leftRv + [RawVar(start = s1_start + r1_len + 1,
end = s1_end - r2_len, type = "inv", hgvs = hgvs)] + rightRv
#if
# Compound variant.
......@@ -237,17 +234,18 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
m1_len = s2_end_f - lcs_f_len
m2_len = s2_end - s2_start - s2_end_f
partial = "%s;%s" % (
DNA_description(s1, s2,
s1_start, s1_start + r1_len, s2_start, s2_start + m1_len,
allele),
DNA_description(s1, s2,
s1_end - r2_len, s1_end, s2_end - m2_len, s2_end, allele))
leftHgvs, leftRv = DNA_description(s1, s2,
s1_start, s1_start + r1_len, s2_start, s2_start + m1_len)
rightHgvs, rightRv = DNA_description(s1, s2,
s1_end - r2_len, s1_end, s2_end - m2_len, s2_end)
partial = "%s;%s" % (leftHgvs, rightHgvs)
partialRv = leftRv + rightRv
#else
if len(partial) <= len(default) :
return partial
return default
return partial, partialRv
return default, [RawVar(start = s1_start + 1, end = s1_end,
inserted = s2[s2_start:s2_end], type = "delins", hgvs = default)]
#DNA_description
def describe(description) :
......@@ -271,10 +269,11 @@ def describe(description) :
#for
print(result.genomicDescription)
allele = []
print(DNA_description(s1, s2, lcp, s1_end, lcp, s2_end, allele))
hgvs, allele = DNA_description(s1, s2, lcp, s1_end, lcp, s2_end)
print(hgvs)
for i in allele :
print i.hgvs
print(i.type)
print(';'.join(map(lambda x : x.hgvs, allele)))
#describe
def main() :
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment