From bfc3f7cb0228a2c01c2bbb7e3c5cc8a0bc13f1a8 Mon Sep 17 00:00:00 2001 From: "J.F.J. Laros" <j.f.j.laros@lumc.nl> Date: Sun, 22 Jan 2012 20:45:33 +0000 Subject: [PATCH] Added rolling for insertions and deletions. Needs checking. git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@442 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- extras/soap-tools/describe.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/extras/soap-tools/describe.py b/extras/soap-tools/describe.py index 893339bd..b6f2c8bd 100644 --- a/extras/soap-tools/describe.py +++ b/extras/soap-tools/describe.py @@ -15,6 +15,7 @@ from suds.client import Client from mutalyzer.util import monkey_patch_suds; monkey_patch_suds() from mutalyzer.util import longest_common_prefix, longest_common_suffix +from mutalyzer.util import palinsnoop, roll WSDL_LOCATION = "http://localhost/mutalyzer/services/?wsdl" @@ -49,8 +50,6 @@ def LongestCommonSubstring(s1, s2) : def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end) : """ """ - # TODO: Roll the variants to the 3' end. - # TODO: Palindrome snooping. # Nothing happened. if s1 == s2: @@ -59,6 +58,12 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end) : # Insertion / Duplication. if s1_start == s1_end : ins_length = s2_end - s2_start + dummy, shift = roll(s2, s2_start + 1, s2_end + 1) + + s1_start += shift + 1 + s1_end += shift + 1 + s2_start += shift + 1 + s2_end += shift + 1 if s2_start - ins_length >= 0 and \ s1[s1_start - ins_length:s1_start] == s2[s2_start:s2_end] : @@ -72,9 +77,11 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end) : # Deletion. if s2_start == s2_end : + dummy, shift = roll(s1, s1_start + 1, s1_end) + if s1_start + 1 == s1_end : - return "%idel" % (s1_start + 1) - return "%i_%idel" % (s1_start + 1, s1_end) + return "%idel" % (s1_start + shift + 1) + return "%i_%idel" % (s1_start + shift + 1, s1_end + shift) #if # Substitution. @@ -87,20 +94,31 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end) : # At this stage, we either have an inversion, an indel or a Compound # variant. - # NOTE: We might want to do a palindrome snoop in the second line. s1_end_f, s2_end_f, lcs_f_len = LongestCommonSubstring(s1[s1_start:s1_end], s2[s2_start:s2_end]) s1_end_r, s2_end_r, lcs_r_len = LongestCommonSubstring(s1[s1_start:s1_end], Bio.Seq.reverse_complement(s2[s2_start:s2_end])) + # Palindrome snooping. + trim = palinsnoop(s1[s1_start + s1_end_r - lcs_r_len:s1_start + s1_end_r]) + if trim < 0 : # Full palindrome. + lcs_r_len = 0 # s1_end_r and s2_end_r should not be used after this. + # Inversion or Compound variant. if max(lcs_f_len, lcs_r_len) > 3 : # TODO: This is not a good criterium. # Inversion. if lcs_f_len <= lcs_r_len : + if trim > 0 : # Partial palindrome. + s1_end_r -= trim + s2_end_r -= trim + lcs_r_len -= 2 * trim + #if + # Simple Inversion. - if s1_end - s1_start == lcs_r_len : + if s2_end - s2_start == lcs_r_len and \ + s1_end - s1_start == lcs_r_len : return "%i_%iinv" % (s1_start + 1, s1_end) r1_len = s1_end_r - lcs_r_len @@ -165,6 +183,11 @@ def describe(description) : s1_end = len(s1) - lcs s2_end = len(s2) - lcs + for i in result.rawVariants.RawVariant : + print i.description + print i.visualisation + print + #for print(result.genomicDescription) print(DNA_description(s1, s2, lcp, s1_end, lcp, s2_end)) #describe -- GitLab