From f7108427da6e470f0dd0ecc46a9ab07c698ca750 Mon Sep 17 00:00:00 2001
From: "J.F.J. Laros" <j.f.j.laros@lumc.nl>
Date: Sun, 29 Jan 2012 19:02:09 +0000
Subject: [PATCH] Finished generating the allele description.

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@465 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
---
 extras/soap-tools/describe.py | 91 +++++++++++++++++------------------
 1 file changed, 45 insertions(+), 46 deletions(-)

diff --git a/extras/soap-tools/describe.py b/extras/soap-tools/describe.py
index c6862b97..2480c49e 100644
--- a/extras/soap-tools/describe.py
+++ b/extras/soap-tools/describe.py
@@ -76,7 +76,7 @@ def LongestCommonSubstring(s1, s2) :
     return x_longest, y_longest, longest
 #LongestCommonSubstring
 
-def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
+def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end) :
     """
     Give the HGVS description of the change from {s1} to {s2} in the range
     {s1_start}..{s1_end} on {s1} and {s2_start}..{s2_end} on {s2}.
@@ -100,13 +100,16 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
 
     # Nothing happened.
     if s1 == s2:
-        return "="
+        return "=", None
 
     # Insertion / Duplication.
     if s1_start == s1_end :
         ins_length = s2_end - s2_start
         dummy, shift = roll(s2, s2_start + 1, s2_end + 1)
 
+        # FIXME This needs to be checked.
+        # NM_002001.2:c.2_3insAT goes wrong.
+        # NM_002001.2:c.3dup too.
         s1_start += shift + 1
         s1_end += shift + 1
         s2_start += shift + 1
@@ -115,22 +118,19 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
         if s2_start - ins_length >= 0 and \
             s1[s1_start - ins_length:s1_start] == s2[s2_start:s2_end] :
 
-            if ins_length == 1 : # FIXME we forgot the ins of length 1
+            if ins_length == 1 :
                 hgvs = "%idup" % (s1_start)
-                allele.append(RawVar(start = s1_start,
-                    type = "dup", hgvs = hgvs))
-                return hgvs
+                return hgvs, [RawVar(start = s1_start, type = "dup",
+                    hgvs = hgvs)]
             #if
 
             hgvs = "%i_%idup" % (s1_start - ins_length + 1, s1_end)
-            allele.append(RawVar(start = s1_start - ins_length + 1,
-                end = s1_end, type = "dup", hgvs = hgvs))
-            return hgvs
+            return hgvs, [RawVar(start = s1_start - ins_length + 1,
+                end = s1_end, type = "dup", hgvs = hgvs)]
         #if
         hgvs = "%i_%iins%s" % (s1_start, s1_start + 1, s2[s2_start:s2_end])
-        allele.append(RawVar(start = s1_start, end = s1_start + 1,
-            inserted = s2[s2_start:s2_end], type = "ins", hgvs = hgvs))
-        return hgvs
+        return hgvs, [RawVar(start = s1_start, end = s1_start + 1,
+            inserted = s2[s2_start:s2_end], type = "ins", hgvs = hgvs)]
     #if
 
     # Deletion.
@@ -139,30 +139,26 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
 
         if s1_start + 1 == s1_end :
             hgvs = "%idel" % (s1_start + shift + 1)
-            allele.append(RawVar(start = s1_start + shift + 1, type = "del",
-                hgvs = hgvs))
-            return hgvs
+            return hgvs, [RawVar(start = s1_start + shift + 1, type = "del",
+                hgvs = hgvs)]
         #if
         hgvs = "%i_%idel" % (s1_start + shift + 1, s1_end + shift)
-        allele.append(RawVar(start = s1_start + shift + 1,
-            s1_end = s1_end + shift, type = "del", hgvs = hgvs))
-        return hgvs 
+        return hgvs, [RawVar(start = s1_start + shift + 1,
+            s1_end = s1_end + shift, type = "del", hgvs = hgvs)]
     #if
 
     # Substitution.
     if s1_start + 1 == s1_end and s2_start + 1 == s2_end :
         hgvs = "%i%s>%s" % (s1_start + 1, s1[s1_start], s2[s2_start])
-        allele.append(RawVar(start = s1_start + 1, deleted = s1[s1_start],
-            inserted = s2[s2_start], type = "subst", hgvs = hgvs))
-        return hgvs
+        return hgvs, [RawVar(start = s1_start + 1, deleted = s1[s1_start],
+            inserted = s2[s2_start], type = "subst", hgvs = hgvs)]
     #if
 
     # Simple InDel.
     if s1_start + 1 == s1_end :
         hgvs = "%idelins%s" % (s1_start + 1, s2[s2_start:s2_end])
-        allele.append(RawVar(start = s1_start + 1,
-            inserted = s2[s2_start:s2_end], type = "delins", hgvs = hgvs))
-        return hgvs
+        return hgvs, [RawVar(start = s1_start + 1,
+            inserted = s2[s2_start:s2_end], type = "delins", hgvs = hgvs)]
     #if
 
     # At this stage, we either have an inversion, an indel or a Compound
@@ -181,9 +177,8 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
     default = "%i_%idelins%s" % (s1_start + 1, s1_end, s2[s2_start:s2_end])
 
     if not max(lcs_f_len, lcs_r_len) : # Optimisation, not really needed.
-        allele.append(RawVar(start = s1_start + 1, end = s1_end,
-            inserted = s2[s2_start:s2_end], type = "delins", hgvs = hgvs))
-        return default
+        return default, [RawVar(start = s1_start + 1, end = s1_end,
+            inserted = s2[s2_start:s2_end], type = "delins", hgvs = default)]
 
     # Inversion.
     if lcs_f_len <= lcs_r_len :
@@ -197,9 +192,8 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
         # Simple Inversion.
         if s2_end - s2_start == lcs_r_len and s1_end - s1_start == lcs_r_len :
             hgvs = "%i_%iinv" % (s1_start + 1, s1_end)
-            allele.append(RawVar(start = s1_start + 1, end = s1_end,
-                type = "inv", hgvs = hgvs))
-            return hgvs
+            return hgvs, [RawVar(start = s1_start + 1, end = s1_end,
+                type = "inv", hgvs = hgvs)]
         #if
 
         r1_len = s1_end_r - lcs_r_len
@@ -214,20 +208,23 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
         if r1_len or m2_len :
             lcs = len(longest_common_suffix(s1[s1_start:s1_start + r1_len],
                 s2[s2_start:s2_start + m2_len]))
-            leftVariant = "%s;" % DNA_description(s1, s2,
+            leftHgvs, leftRv = DNA_description(s1, s2,
                 s1_start, s1_start + r1_len - lcs,
-                s2_start, s2_start + m2_len - lcs, allele)
+                s2_start, s2_start + m2_len - lcs)
+            leftVariant = "%s;" % leftHgvs
         #if
         if r2_len or m1_len :
             lcp = len(longest_common_prefix(s1[s1_end - r2_len:s1_end],
                 s2[s2_end - m1_len:s2_end]))
-            rightVariant = ";%s" % DNA_description(s1, s2,
-                s1_end - r2_len + lcp, s1_end, s2_end - m1_len + lcp, s2_end,
-                allele)
+            rightHgvs, rightRv = DNA_description(s1, s2,
+                s1_end - r2_len + lcp, s1_end, s2_end - m1_len + lcp, s2_end)
+            rightVariant = ";%s" % rightHgvs
         #if
 
         partial = "%s%i_%iinv%s" % (leftVariant, s1_start + r1_len + 1,
             s1_end - r2_len, rightVariant)
+        partialRv = leftRv + [RawVar(start = s1_start + r1_len + 1,
+            end = s1_end - r2_len, type = "inv", hgvs = hgvs)] + rightRv
     #if
 
     # Compound variant.
@@ -237,17 +234,18 @@ def DNA_description(s1, s2, s1_start, s1_end, s2_start, s2_end, allele) :
         m1_len = s2_end_f - lcs_f_len
         m2_len = s2_end - s2_start - s2_end_f
 
-        partial = "%s;%s" % (
-            DNA_description(s1, s2,
-                s1_start, s1_start + r1_len, s2_start, s2_start + m1_len, 
-                allele),
-            DNA_description(s1, s2,
-                s1_end - r2_len, s1_end, s2_end - m2_len, s2_end, allele))
+        leftHgvs, leftRv = DNA_description(s1, s2,
+                s1_start, s1_start + r1_len, s2_start, s2_start + m1_len)
+        rightHgvs, rightRv = DNA_description(s1, s2,
+                s1_end - r2_len, s1_end, s2_end - m2_len, s2_end)
+        partial = "%s;%s" % (leftHgvs, rightHgvs)
+        partialRv = leftRv + rightRv
     #else
 
     if len(partial) <= len(default) :
-        return partial
-    return default
+        return partial, partialRv
+    return default, [RawVar(start = s1_start + 1, end = s1_end,
+        inserted = s2[s2_start:s2_end], type = "delins", hgvs = default)]
 #DNA_description
 
 def describe(description) :
@@ -271,10 +269,11 @@ def describe(description) :
         #for
     print(result.genomicDescription)
 
-    allele = []
-    print(DNA_description(s1, s2, lcp, s1_end, lcp, s2_end, allele))
+    hgvs, allele = DNA_description(s1, s2, lcp, s1_end, lcp, s2_end)
+    print(hgvs)
     for i in allele :
-        print i.hgvs
+        print(i.type)
+    print(';'.join(map(lambda x : x.hgvs, allele)))
 #describe
 
 def main() :
-- 
GitLab