From 8ef4a68a21e029f0dbdef5475b160d14ce54833a Mon Sep 17 00:00:00 2001
From: "J.F.J. Laros" <j.f.j.laros@lumc.nl>
Date: Thu, 14 Aug 2014 15:01:15 +0200
Subject: [PATCH] Added weights for variants.

---
 mutalyzer/describe.py | 64 +++++++++++++++++++++++++++++--------------
 mutalyzer/variant.py  | 23 ++++++++++++++--
 2 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py
index 91d072f0..520322ba 100644
--- a/mutalyzer/describe.py
+++ b/mutalyzer/describe.py
@@ -181,16 +181,28 @@ def find_fs(peptide, alternative_peptide, fs):
 #find_fs
 
 
-def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
+def var_to_rawvar(s1, s2, var, weight_position, seq_list=[], container=DNAVar):
     """
     """
     # Unknown.
     if s1 == '?' or s2 == '?':
         return [container(type="unknown")]
 
+
+    ins_length = var.sample_end - var.sample_start
+    weight = 0
+
+    if seq_list:
+        inserted = seq_list
+        weight = seq_list.weight()
+    else:
+        inserted = ISeqList([ISeq(
+            sequence=s2[var.sample_start:var.sample_end],
+            weight=ins_length)])
+
+
     # Insertion / Duplication.
     if var.reference_start == var.reference_end:
-        ins_length = var.sample_end - var.sample_start
         shift5, shift3 = roll(s2, var.sample_start + 1, var.sample_end)
         shift = shift5 + shift3
 
@@ -199,6 +211,7 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
         var.sample_start += shift3
         var.sample_end += shift3
 
+        # FIXME: range can be a duplication.
         if not seq_list and (var.sample_start - ins_length >= 0 and
             s1[var.reference_start - ins_length:var.reference_start] ==
             s2[var.sample_start:var.sample_end]):
@@ -207,14 +220,16 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
                 end=var.reference_end, type="dup", shift=shift,
                 sample_start=var.sample_start + 1, sample_end=var.sample_end,
                 inserted=ISeqList([ISeq(sequence=s2[
-                var.sample_start:var.sample_end])]))
+                var.sample_start:var.sample_end])]), weight=var.weight)
         #if
+
+        weight += (2 * weight_position + extractor.WEIGHT_SEPARATOR +
+            extractor.WEIGHT_INSERTION)
+
         return container(start=var.reference_start,
-            end=var.reference_start + 1,
-            inserted=seq_list or
-            ISeqList([ISeq(sequence=s2[var.sample_start:var.sample_end])]),
-            type="ins", shift=shift, sample_start=var.sample_start + 1,
-            sample_end=var.sample_end)
+            end=var.reference_start + 1, inserted=inserted, type="ins",
+            shift=shift, sample_start=var.sample_start + 1,
+            sample_end=var.sample_end, weight=weight)
     #if
 
     # Deletion.
@@ -228,7 +243,7 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
         return container(start=var.reference_start + 1,
             end=var.reference_end, type="del", shift=shift,
             sample_start=var.sample_start, sample_end=var.sample_end + 1,
-            deleted=ISeqList([ISeq(sequence=s1[
+            weight=var.weight, deleted=ISeqList([ISeq(sequence=s1[
                 var.reference_start:var.reference_end])]))
     #if
 
@@ -238,7 +253,7 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
 
         return container(start=var.reference_start + 1,
             end=var.reference_end, sample_start=var.sample_start + 1,
-            sample_end=var.sample_end, type="subst",
+            sample_end=var.sample_end, type="subst", weight=var.weight,
             deleted=ISeqList([ISeq(sequence=s1[var.reference_start])]),
             inserted=ISeqList([ISeq(sequence=s2[var.sample_start])]))
     #if
@@ -258,16 +273,19 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
             deleted=ISeqList([ISeq(sequence=s1[
                 var.reference_start:var.reference_end])]),
             inserted=ISeqList([ISeq(sequence=s2[
-                var.sample_start:var.reference_end])]))
+                var.sample_start:var.reference_end])]), weight=var.weight)
     #if
 
     # InDel.
+    weight += weight_position + extractor.WEIGHT_DELETION_INSERTION
+    if var.reference_start + 1 == var.reference_end:
+        weight += weight_position + extractor.WEIGHT_SEPARATOR
+
     return container(start=var.reference_start + 1,
         end=var.reference_end, deleted=ISeqList([ISeq(sequence=s1[
-                var.reference_start:var.reference_end])]), inserted=seq_list or
-        ISeqList([ISeq(sequence=s2[var.sample_start:var.sample_end])]),
+            var.reference_start:var.reference_end])]), inserted=seq_list,
         type="delins", sample_start=var.sample_start + 1,
-        sample_end=var.sample_end)
+        sample_end=var.sample_end, weight=var.weight)
 #var_to_rawvar
 
 def describe_dna(s1, s2):
@@ -285,7 +303,8 @@ def describe_dna(s1, s2):
     description = Allele()
     in_transposition = 0
 
-    for variant in extractor.extract(str(s1), len(s1), str(s2), len(s2), 0):
+    variant_extract = extractor.extract(str(s1), len(s1), str(s2), len(s2), 0)
+    for variant in variant_extract.variants:
         print (variant.type, variant.reference_start,
             variant.reference_end, variant.sample_start,
             variant.sample_end, variant.transposition_start,
@@ -302,22 +321,27 @@ def describe_dna(s1, s2):
         if in_transposition:
             if variant.type & extractor.IDENTITY:
                 seq_list.append(ISeq(start=variant.transposition_start + 1,
-                    end=variant.transposition_end, reverse=False))
+                    end=variant.transposition_end, weight=variant.weight,
+                    reverse=False))
             elif variant.type & extractor.REVERSE_COMPLEMENT:
                 seq_list.append(ISeq(start=variant.transposition_start + 1,
-                    end=variant.transposition_end, reverse=True))
+                    end=variant.transposition_end, weight=variant.weight,
+                    reverse=True))
             else:
                 seq_list.append(ISeq(
-                    sequence=s2[variant.sample_start:variant.sample_end]))
+                    sequence=s2[variant.sample_start:variant.sample_end],
+                    weight=variant.weight))
         #if
         elif not (variant.type & extractor.IDENTITY):
-            description.append(var_to_rawvar(s1, s2, variant))
+            description.append(var_to_rawvar(s1, s2, variant,
+                variant_extract.weight_position))
 
         if variant.type & extractor.TRANSPOSITION_CLOSE:
             in_transposition -= 1
 
             if not in_transposition:
-                description.append(var_to_rawvar(s1, s2, variant, seq_list))
+                description.append(var_to_rawvar(s1, s2, variant,
+                    variant_extract.weight_position, seq_list))
         #if
     #for
 
diff --git a/mutalyzer/variant.py b/mutalyzer/variant.py
index 5db00642..de8187d5 100644
--- a/mutalyzer/variant.py
+++ b/mutalyzer/variant.py
@@ -1,6 +1,7 @@
 """
 """
 
+from extractor.extractor import WEIGHT_SEPARATOR
 from mutalyzer import models
 
 class HGVSList(list):
@@ -12,6 +13,13 @@ class HGVSList(list):
             return "[{}]".format(';'.join(map(str, self)))
         return str(self[0])
     #__str__
+
+    def weight(self):
+        W = sum(map(lambda x: x.weight, self))
+
+        if len(self) > 1:
+            return W + (len(self) + 1) * WEIGHT_SEPARATOR
+        return W
 #HGVSList
 
 class Allele(HGVSList):
@@ -24,7 +32,7 @@ class ISeq(object):
     """
     Container for an inserted sequence.
     """
-    def __init__(self, sequence="", start=0, end=0, reverse=False):
+    def __init__(self, sequence="", start=0, end=0, weight=0, reverse=False):
         """
         :arg sequence: Literal inserted sequence.
         :type sequence: str
@@ -32,12 +40,15 @@ class ISeq(object):
         :type start: int
         :arg end: End position for a transposed sequence.
         :type end: int
+        :arg weight: Weight of the variant (normalised length).
+        :type weight: int
         :arg reverse: Inverted transposed sequence.
         :type reverse: bool
         """
         self.sequence = sequence
         self.start = start
         self.end = end
+        self.weight = weight
         self.reverse = reverse
 
         self.type = "trans"
@@ -64,7 +75,7 @@ class DNAVar(models.DNAVar):
     def __init__(self, start=0, start_offset=0, end=0, end_offset=0,
             sample_start=0, sample_start_offset=0, sample_end=0,
             sample_end_offset=0, type="none", deleted=ISeqList([ISeq()]),
-            inserted=ISeqList([ISeq()]), shift=0):
+            inserted=ISeqList([ISeq()]), weight=0, shift=0):
         """
         Initialise the class with the appropriate values.
 
@@ -90,6 +101,8 @@ class DNAVar(models.DNAVar):
         :type deleted: str
         :arg inserted: Inserted part.
         :type inserted: object
+        :arg weight: Weight of the variant (normalised length).
+        :type weight: int
         :arg shift: Amount of freedom.
         :type shift: int
         """
@@ -106,6 +119,7 @@ class DNAVar(models.DNAVar):
         self.type = type
         self.deleted = deleted
         self.inserted = inserted
+        self.weight = weight
         self.shift = shift
     #__init__
 
@@ -144,7 +158,7 @@ class ProteinVar(models.ProteinVar):
     """
     def __init__(self, start=0, end=0, sample_start=0, sample_end=0,
             type="none", deleted=ISeqList([ISeq()]),
-            inserted=ISeqList([ISeq()]), shift=0, term=0):
+            inserted=ISeqList([ISeq()]), weight=0, shift=0, term=0):
         """
         Initialise the class with the appropriate values.
 
@@ -162,6 +176,8 @@ class ProteinVar(models.ProteinVar):
         :type deleted: str
         :arg inserted: Inserted part.
         :type inserted: object
+        :arg weight: Weight of the variant (normalised length).
+        :type weight: int
         :arg shift: Amount of freedom.
         :type shift: int
         :arg term:
@@ -174,6 +190,7 @@ class ProteinVar(models.ProteinVar):
         self.type = type
         self.deleted = deleted
         self.inserted = inserted
+        self.weight = weight
         self.shift = shift
         self.term = term
     #__init__
-- 
GitLab