Skip to content
Snippets Groups Projects
Commit ebca9a78 authored by Vermaat's avatar Vermaat
Browse files

Unicode fixes

parent 216146bb
No related branches found
No related tags found
No related merge requests found
......@@ -10,7 +10,6 @@ from __future__ import unicode_literals
import collections
from Bio.SeqUtils import seq3
from Bio.Data import CodonTable
from mutalyzer.util import palinsnoop, roll
......@@ -302,7 +301,8 @@ def describe_dna(s1, s2):
description = Allele()
in_transposition = 0
extracted = extractor.extract(unicode(s1), len(s1), unicode(s2), len(s2), 0)
extracted = extractor.extract(s1.encode('utf-8'), len(s1),
s2.encode('utf-8'), len(s2), 0)
for variant in extracted.variants:
# print (variant.type, variant.reference_start,
# variant.reference_end, variant.sample_start,
......@@ -354,9 +354,9 @@ def describe_protein(s1, s2):
Give an allele description of the change from {s1} to {s2}.
:arg s1: Sequence 1.
:type s1: str
:type s1: unicode
:arg s2: Sequence 2.
:type s2: str
:type s2: unicode
:returns: A list of RawVar objects, representing the allele.
:rtype: list(RawVar)
......@@ -384,8 +384,8 @@ def describe_protein(s1, s2):
s1_part = s1
s2_part = s2
for variant in extractor.extract(str(s1_part), len(s1_part),
str(s2_part), len(s2_part), 1):
for variant in extractor.extract(s1_part.encode('utf-8'), len(s1_part),
s2_part.encode('utf-8'), len(s2_part), 1):
description.append(var_to_rawvar(s1, s2, variant,
container=ProteinVar))
......
......@@ -19,7 +19,7 @@ from .. import variantchecker
class MyEncoder(json.JSONEncoder):
def default(self, o):
json_object = o.__dict__
json_object.update({"hgvs": str(o), "weight": o.weight()})
json_object.update({"hgvs": unicode(o), "weight": o.weight()})
return json_object
#default
......
#!/usr/bin/env python
from __future__ import unicode_literals
import sys
import json
......
#!/usr/bin/env python
import json
import describe
class MyEncoder(json.JSONEncoder):
def default(self, o):
return o.__dict__
def main():
ref = "ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGATATTTTT"
alt = "ACGTCGGTTCGCTAGCTTCGGGGGATAGATAGATATATAGAGATATTTTT"
extracted_allele = describe.describe_dna(ref, alt)
print extracted_allele
print json.dumps({"reference_sequence": ref, "sample_sequence": alt,
"allele_description": extracted_allele}, cls=MyEncoder)
#main
if __name__ == "__main__":
main()
#!/usr/bin/env python
from __future__ import unicode_literals
import json
import describe
class MyEncoder(json.JSONEncoder):
def default(self, o):
return o.__dict__
def main():
ref = "ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGATATTTTT"
alt = "ACGTCGGTTCGCTAGCTTCGGGGGATAGATAGATATATAGAGATATTTTT"
extracted_allele = describe.describe_dna(ref, alt)
print extracted_allele
print json.dumps({"reference_sequence": ref, "sample_sequence": alt,
"allele_description": extracted_allele}, cls=MyEncoder)
#main
if __name__ == "__main__":
main()
......@@ -434,7 +434,7 @@ def longest_common_suffix(s1, s2):
@type s2: unicode
@return: The longest common suffix of s1 and s2.
@rtype: string
@rtype: unicode
"""
return longest_common_prefix(s1[::-1], s2[::-1])[::-1]
#longest_common_suffix
......@@ -680,7 +680,7 @@ def visualise_sequence(sequence, max_length=25, flank_size=6):
@type flank_size: int
@return: Either the original sequence, or an abbreviation of it.
@rtype: str
@rtype: unicode
"""
if len(sequence) > max_length:
return '%s [%ibp] %s' % (sequence[:flank_size],
......
"""
"""
from __future__ import unicode_literals
from Bio.SeqUtils import seq3
from extractor import extractor
from mutalyzer import models
......@@ -18,11 +22,11 @@ class HGVSList(list):
"""
Container for a list of sequences or variants.
"""
def __str__(self):
def __unicode__(self):
if len(self) > 1:
return "[{}]".format(';'.join(map(str, self)))
return str(self[0])
#__str__
return "[{}]".format(';'.join(map(unicode, self)))
return unicode(self[0])
#__unicode__
def weight(self):
weight = sum(map(lambda x: x.weight(), self))
......@@ -47,7 +51,7 @@ class ISeq(object):
weight_position=1):
"""
:arg sequence: Literal inserted sequence.
:type sequence: str
:type sequence: unicode
:arg start: Start position for a transposed sequence.
:type start: int
:arg end: End position for a transposed sequence.
......@@ -66,7 +70,7 @@ class ISeq(object):
self.type = "ins"
#__init__
def __str__(self):
def __unicode__(self):
if self.type == "ins":
return self.sequence
......@@ -75,7 +79,7 @@ class ISeq(object):
inverted = "inv" if self.reverse else ""
return "{}_{}{}".format(self.start, self.end, inverted)
#__str__
#__unicode__
def __nonzero__(self):
return bool(self.sequence)
......@@ -118,9 +122,9 @@ class DNAVar(models.DNAVar):
:arg sample_end_offset:
:type sample_end_offset: int
:arg type: Variant type.
:type type: str
:type type: unicode
:arg deleted: Deleted part of the reference sequence.
:type deleted: str
:type deleted: unicode
:arg inserted: Inserted part.
:type inserted: object
:arg shift: Amount of freedom.
......@@ -143,12 +147,12 @@ class DNAVar(models.DNAVar):
self.shift = shift
#__init__
def __str__(self):
def __unicode__(self):
"""
Give the HGVS description of the raw variant stored in this class.
:returns: The HGVS description of the raw variant stored in this class.
:rtype: str
:rtype: unicode
"""
if self.type == "unknown":
return "?"
......@@ -169,7 +173,7 @@ class DNAVar(models.DNAVar):
#if
return description + "{}>{}".format(self.deleted, self.inserted)
#__str__
#__unicode__
def weight(self):
if self.type == "unknown":
......@@ -204,9 +208,9 @@ class ProteinVar(models.ProteinVar):
:arg sample_end: End position.
:type sample_end: int
:arg type: Variant type.
:type type: str
:type type: unicode
:arg deleted: Deleted part of the reference sequence.
:type deleted: str
:type deleted: unicode
:arg inserted: Inserted part.
:type inserted: object
:arg shift: Amount of freedom.
......@@ -225,7 +229,7 @@ class ProteinVar(models.ProteinVar):
self.term = term
#__init__
def __str__(self):
def __unicode__(self):
"""
Give the HGVS description of the raw variant stored in this class.
......@@ -233,7 +237,7 @@ class ProteinVar(models.ProteinVar):
correct description. Also see the comment in the class definition.
:returns: The HGVS description of the raw variant stored in this class.
:rtype: str
:rtype: unicode
"""
if self.type == "unknown":
return "?"
......@@ -262,5 +266,5 @@ class ProteinVar(models.ProteinVar):
if self.term:
return description + "fs*{}".format(self.term)
return description
#__str__
#__unicode__
#ProteinVar
......@@ -278,9 +278,9 @@ def name_checker():
extracted = extractedProt = '(skipped)'
if allele:
extracted = str(allele) #describe.allele_description(allele)
extracted = unicode(allele) #describe.allele_description(allele)
if prot_allele:
extractedProt = str(prot_allele) #describe.allele_description(prot_allele)
extractedProt = unicode(prot_allele) #describe.allele_description(prot_allele)
else:
extracted = extractedProt = ''
......@@ -700,7 +700,7 @@ def description_extractor():
'Variant sequence is not DNA.')
raw_vars = describe.describe_dna(reference_sequence, variant_sequence)
description = str(raw_vars) #describe.allele_description(raw_vars)
description = unicode(raw_vars) #describe.allele_description(raw_vars)
errors, warnings, summary = output.Summary()
messages = map(util.message_info, output.getMessages())
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment