Skip to content
Snippets Groups Projects
Commit 8ef4a68a authored by Laros's avatar Laros Committed by Vermaat
Browse files

Added weights for variants.

parent 7f1b9642
No related branches found
No related tags found
No related merge requests found
......@@ -181,16 +181,28 @@ def find_fs(peptide, alternative_peptide, fs):
#find_fs
def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
def var_to_rawvar(s1, s2, var, weight_position, seq_list=[], container=DNAVar):
"""
"""
# Unknown.
if s1 == '?' or s2 == '?':
return [container(type="unknown")]
ins_length = var.sample_end - var.sample_start
weight = 0
if seq_list:
inserted = seq_list
weight = seq_list.weight()
else:
inserted = ISeqList([ISeq(
sequence=s2[var.sample_start:var.sample_end],
weight=ins_length)])
# Insertion / Duplication.
if var.reference_start == var.reference_end:
ins_length = var.sample_end - var.sample_start
shift5, shift3 = roll(s2, var.sample_start + 1, var.sample_end)
shift = shift5 + shift3
......@@ -199,6 +211,7 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
var.sample_start += shift3
var.sample_end += shift3
# FIXME: range can be a duplication.
if not seq_list and (var.sample_start - ins_length >= 0 and
s1[var.reference_start - ins_length:var.reference_start] ==
s2[var.sample_start:var.sample_end]):
......@@ -207,14 +220,16 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
end=var.reference_end, type="dup", shift=shift,
sample_start=var.sample_start + 1, sample_end=var.sample_end,
inserted=ISeqList([ISeq(sequence=s2[
var.sample_start:var.sample_end])]))
var.sample_start:var.sample_end])]), weight=var.weight)
#if
weight += (2 * weight_position + extractor.WEIGHT_SEPARATOR +
extractor.WEIGHT_INSERTION)
return container(start=var.reference_start,
end=var.reference_start + 1,
inserted=seq_list or
ISeqList([ISeq(sequence=s2[var.sample_start:var.sample_end])]),
type="ins", shift=shift, sample_start=var.sample_start + 1,
sample_end=var.sample_end)
end=var.reference_start + 1, inserted=inserted, type="ins",
shift=shift, sample_start=var.sample_start + 1,
sample_end=var.sample_end, weight=weight)
#if
# Deletion.
......@@ -228,7 +243,7 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
return container(start=var.reference_start + 1,
end=var.reference_end, type="del", shift=shift,
sample_start=var.sample_start, sample_end=var.sample_end + 1,
deleted=ISeqList([ISeq(sequence=s1[
weight=var.weight, deleted=ISeqList([ISeq(sequence=s1[
var.reference_start:var.reference_end])]))
#if
......@@ -238,7 +253,7 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
return container(start=var.reference_start + 1,
end=var.reference_end, sample_start=var.sample_start + 1,
sample_end=var.sample_end, type="subst",
sample_end=var.sample_end, type="subst", weight=var.weight,
deleted=ISeqList([ISeq(sequence=s1[var.reference_start])]),
inserted=ISeqList([ISeq(sequence=s2[var.sample_start])]))
#if
......@@ -258,16 +273,19 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar):
deleted=ISeqList([ISeq(sequence=s1[
var.reference_start:var.reference_end])]),
inserted=ISeqList([ISeq(sequence=s2[
var.sample_start:var.reference_end])]))
var.sample_start:var.reference_end])]), weight=var.weight)
#if
# InDel.
weight += weight_position + extractor.WEIGHT_DELETION_INSERTION
if var.reference_start + 1 == var.reference_end:
weight += weight_position + extractor.WEIGHT_SEPARATOR
return container(start=var.reference_start + 1,
end=var.reference_end, deleted=ISeqList([ISeq(sequence=s1[
var.reference_start:var.reference_end])]), inserted=seq_list or
ISeqList([ISeq(sequence=s2[var.sample_start:var.sample_end])]),
var.reference_start:var.reference_end])]), inserted=seq_list,
type="delins", sample_start=var.sample_start + 1,
sample_end=var.sample_end)
sample_end=var.sample_end, weight=var.weight)
#var_to_rawvar
def describe_dna(s1, s2):
......@@ -285,7 +303,8 @@ def describe_dna(s1, s2):
description = Allele()
in_transposition = 0
for variant in extractor.extract(str(s1), len(s1), str(s2), len(s2), 0):
variant_extract = extractor.extract(str(s1), len(s1), str(s2), len(s2), 0)
for variant in variant_extract.variants:
print (variant.type, variant.reference_start,
variant.reference_end, variant.sample_start,
variant.sample_end, variant.transposition_start,
......@@ -302,22 +321,27 @@ def describe_dna(s1, s2):
if in_transposition:
if variant.type & extractor.IDENTITY:
seq_list.append(ISeq(start=variant.transposition_start + 1,
end=variant.transposition_end, reverse=False))
end=variant.transposition_end, weight=variant.weight,
reverse=False))
elif variant.type & extractor.REVERSE_COMPLEMENT:
seq_list.append(ISeq(start=variant.transposition_start + 1,
end=variant.transposition_end, reverse=True))
end=variant.transposition_end, weight=variant.weight,
reverse=True))
else:
seq_list.append(ISeq(
sequence=s2[variant.sample_start:variant.sample_end]))
sequence=s2[variant.sample_start:variant.sample_end],
weight=variant.weight))
#if
elif not (variant.type & extractor.IDENTITY):
description.append(var_to_rawvar(s1, s2, variant))
description.append(var_to_rawvar(s1, s2, variant,
variant_extract.weight_position))
if variant.type & extractor.TRANSPOSITION_CLOSE:
in_transposition -= 1
if not in_transposition:
description.append(var_to_rawvar(s1, s2, variant, seq_list))
description.append(var_to_rawvar(s1, s2, variant,
variant_extract.weight_position, seq_list))
#if
#for
......
"""
"""
from extractor.extractor import WEIGHT_SEPARATOR
from mutalyzer import models
class HGVSList(list):
......@@ -12,6 +13,13 @@ class HGVSList(list):
return "[{}]".format(';'.join(map(str, self)))
return str(self[0])
#__str__
def weight(self):
W = sum(map(lambda x: x.weight, self))
if len(self) > 1:
return W + (len(self) + 1) * WEIGHT_SEPARATOR
return W
#HGVSList
class Allele(HGVSList):
......@@ -24,7 +32,7 @@ class ISeq(object):
"""
Container for an inserted sequence.
"""
def __init__(self, sequence="", start=0, end=0, reverse=False):
def __init__(self, sequence="", start=0, end=0, weight=0, reverse=False):
"""
:arg sequence: Literal inserted sequence.
:type sequence: str
......@@ -32,12 +40,15 @@ class ISeq(object):
:type start: int
:arg end: End position for a transposed sequence.
:type end: int
:arg weight: Weight of the variant (normalised length).
:type weight: int
:arg reverse: Inverted transposed sequence.
:type reverse: bool
"""
self.sequence = sequence
self.start = start
self.end = end
self.weight = weight
self.reverse = reverse
self.type = "trans"
......@@ -64,7 +75,7 @@ class DNAVar(models.DNAVar):
def __init__(self, start=0, start_offset=0, end=0, end_offset=0,
sample_start=0, sample_start_offset=0, sample_end=0,
sample_end_offset=0, type="none", deleted=ISeqList([ISeq()]),
inserted=ISeqList([ISeq()]), shift=0):
inserted=ISeqList([ISeq()]), weight=0, shift=0):
"""
Initialise the class with the appropriate values.
......@@ -90,6 +101,8 @@ class DNAVar(models.DNAVar):
:type deleted: str
:arg inserted: Inserted part.
:type inserted: object
:arg weight: Weight of the variant (normalised length).
:type weight: int
:arg shift: Amount of freedom.
:type shift: int
"""
......@@ -106,6 +119,7 @@ class DNAVar(models.DNAVar):
self.type = type
self.deleted = deleted
self.inserted = inserted
self.weight = weight
self.shift = shift
#__init__
......@@ -144,7 +158,7 @@ class ProteinVar(models.ProteinVar):
"""
def __init__(self, start=0, end=0, sample_start=0, sample_end=0,
type="none", deleted=ISeqList([ISeq()]),
inserted=ISeqList([ISeq()]), shift=0, term=0):
inserted=ISeqList([ISeq()]), weight=0, shift=0, term=0):
"""
Initialise the class with the appropriate values.
......@@ -162,6 +176,8 @@ class ProteinVar(models.ProteinVar):
:type deleted: str
:arg inserted: Inserted part.
:type inserted: object
:arg weight: Weight of the variant (normalised length).
:type weight: int
:arg shift: Amount of freedom.
:type shift: int
:arg term:
......@@ -174,6 +190,7 @@ class ProteinVar(models.ProteinVar):
self.type = type
self.deleted = deleted
self.inserted = inserted
self.weight = weight
self.shift = shift
self.term = term
#__init__
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment