diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py index 82c8d6c46e963fe0f5bd8c5d72f6deb0bf67377b..09c133d885509d2db7de41115eabb2f2d18d1c1e 100644 --- a/mutalyzer/describe.py +++ b/mutalyzer/describe.py @@ -10,7 +10,6 @@ from __future__ import unicode_literals import collections -from Bio.SeqUtils import seq3 from Bio.Data import CodonTable from mutalyzer.util import palinsnoop, roll @@ -302,7 +301,8 @@ def describe_dna(s1, s2): description = Allele() in_transposition = 0 - extracted = extractor.extract(unicode(s1), len(s1), unicode(s2), len(s2), 0) + extracted = extractor.extract(s1.encode('utf-8'), len(s1), + s2.encode('utf-8'), len(s2), 0) for variant in extracted.variants: # print (variant.type, variant.reference_start, # variant.reference_end, variant.sample_start, @@ -354,9 +354,9 @@ def describe_protein(s1, s2): Give an allele description of the change from {s1} to {s2}. :arg s1: Sequence 1. - :type s1: str + :type s1: unicode :arg s2: Sequence 2. - :type s2: str + :type s2: unicode :returns: A list of RawVar objects, representing the allele. :rtype: list(RawVar) @@ -384,8 +384,8 @@ def describe_protein(s1, s2): s1_part = s1 s2_part = s2 - for variant in extractor.extract(str(s1_part), len(s1_part), - str(s2_part), len(s2_part), 1): + for variant in extractor.extract(s1_part.encode('utf-8'), len(s1_part), + s2_part.encode('utf-8'), len(s2_part), 1): description.append(var_to_rawvar(s1, s2, variant, container=ProteinVar)) diff --git a/mutalyzer/entrypoints/mutalyzer.py b/mutalyzer/entrypoints/mutalyzer.py index a548376e074909401d3512a4f32a720bf8139946..08864546e84539502e4ce8a66875e2da56b78152 100644 --- a/mutalyzer/entrypoints/mutalyzer.py +++ b/mutalyzer/entrypoints/mutalyzer.py @@ -19,7 +19,7 @@ from .. import variantchecker class MyEncoder(json.JSONEncoder): def default(self, o): json_object = o.__dict__ - json_object.update({"hgvs": str(o), "weight": o.weight()}) + json_object.update({"hgvs": unicode(o), "weight": o.weight()}) return json_object #default diff --git a/mutalyzer/extractor_loader.py b/mutalyzer/extractor_loader.py index 81f43ccead409525b3c1b32b4112350e56ffc8d9..ee8b097e50518f008daafbde2bd7357da648bf37 100644 --- a/mutalyzer/extractor_loader.py +++ b/mutalyzer/extractor_loader.py @@ -1,5 +1,7 @@ #!/usr/bin/env python +from __future__ import unicode_literals + import sys import json diff --git a/mutalyzer/test.py b/mutalyzer/test.py index acdca95dd7b58c417a84951d49c5ea5844c9932a..f733aec814afeda4f4c22ef9b8cabfaecb1b2221 100644 --- a/mutalyzer/test.py +++ b/mutalyzer/test.py @@ -1,23 +1,25 @@ -#!/usr/bin/env python - -import json - -import describe - -class MyEncoder(json.JSONEncoder): - def default(self, o): - return o.__dict__ - -def main(): - ref = "ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGATATTTTT" - alt = "ACGTCGGTTCGCTAGCTTCGGGGGATAGATAGATATATAGAGATATTTTT" - - extracted_allele = describe.describe_dna(ref, alt) - - print extracted_allele - print json.dumps({"reference_sequence": ref, "sample_sequence": alt, - "allele_description": extracted_allele}, cls=MyEncoder) -#main - -if __name__ == "__main__": - main() +#!/usr/bin/env python + +from __future__ import unicode_literals + +import json + +import describe + +class MyEncoder(json.JSONEncoder): + def default(self, o): + return o.__dict__ + +def main(): + ref = "ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGATATTTTT" + alt = "ACGTCGGTTCGCTAGCTTCGGGGGATAGATAGATATATAGAGATATTTTT" + + extracted_allele = describe.describe_dna(ref, alt) + + print extracted_allele + print json.dumps({"reference_sequence": ref, "sample_sequence": alt, + "allele_description": extracted_allele}, cls=MyEncoder) +#main + +if __name__ == "__main__": + main() diff --git a/mutalyzer/util.py b/mutalyzer/util.py index 6b7987b31c8f9a7bed62507572f0c417589d6c4a..63f916d39fde7e22e8a9202aaab245ecc1803d97 100644 --- a/mutalyzer/util.py +++ b/mutalyzer/util.py @@ -434,7 +434,7 @@ def longest_common_suffix(s1, s2): @type s2: unicode @return: The longest common suffix of s1 and s2. - @rtype: string + @rtype: unicode """ return longest_common_prefix(s1[::-1], s2[::-1])[::-1] #longest_common_suffix @@ -680,7 +680,7 @@ def visualise_sequence(sequence, max_length=25, flank_size=6): @type flank_size: int @return: Either the original sequence, or an abbreviation of it. - @rtype: str + @rtype: unicode """ if len(sequence) > max_length: return '%s [%ibp] %s' % (sequence[:flank_size], diff --git a/mutalyzer/variant.py b/mutalyzer/variant.py index a8e512a255dbad2319f0617d744d27c031652916..ab7a5e85ced3ee713f3e0b5bd43a56ad447ad8ee 100644 --- a/mutalyzer/variant.py +++ b/mutalyzer/variant.py @@ -1,6 +1,10 @@ """ """ +from __future__ import unicode_literals + +from Bio.SeqUtils import seq3 + from extractor import extractor from mutalyzer import models @@ -18,11 +22,11 @@ class HGVSList(list): """ Container for a list of sequences or variants. """ - def __str__(self): + def __unicode__(self): if len(self) > 1: - return "[{}]".format(';'.join(map(str, self))) - return str(self[0]) - #__str__ + return "[{}]".format(';'.join(map(unicode, self))) + return unicode(self[0]) + #__unicode__ def weight(self): weight = sum(map(lambda x: x.weight(), self)) @@ -47,7 +51,7 @@ class ISeq(object): weight_position=1): """ :arg sequence: Literal inserted sequence. - :type sequence: str + :type sequence: unicode :arg start: Start position for a transposed sequence. :type start: int :arg end: End position for a transposed sequence. @@ -66,7 +70,7 @@ class ISeq(object): self.type = "ins" #__init__ - def __str__(self): + def __unicode__(self): if self.type == "ins": return self.sequence @@ -75,7 +79,7 @@ class ISeq(object): inverted = "inv" if self.reverse else "" return "{}_{}{}".format(self.start, self.end, inverted) - #__str__ + #__unicode__ def __nonzero__(self): return bool(self.sequence) @@ -118,9 +122,9 @@ class DNAVar(models.DNAVar): :arg sample_end_offset: :type sample_end_offset: int :arg type: Variant type. - :type type: str + :type type: unicode :arg deleted: Deleted part of the reference sequence. - :type deleted: str + :type deleted: unicode :arg inserted: Inserted part. :type inserted: object :arg shift: Amount of freedom. @@ -143,12 +147,12 @@ class DNAVar(models.DNAVar): self.shift = shift #__init__ - def __str__(self): + def __unicode__(self): """ Give the HGVS description of the raw variant stored in this class. :returns: The HGVS description of the raw variant stored in this class. - :rtype: str + :rtype: unicode """ if self.type == "unknown": return "?" @@ -169,7 +173,7 @@ class DNAVar(models.DNAVar): #if return description + "{}>{}".format(self.deleted, self.inserted) - #__str__ + #__unicode__ def weight(self): if self.type == "unknown": @@ -204,9 +208,9 @@ class ProteinVar(models.ProteinVar): :arg sample_end: End position. :type sample_end: int :arg type: Variant type. - :type type: str + :type type: unicode :arg deleted: Deleted part of the reference sequence. - :type deleted: str + :type deleted: unicode :arg inserted: Inserted part. :type inserted: object :arg shift: Amount of freedom. @@ -225,7 +229,7 @@ class ProteinVar(models.ProteinVar): self.term = term #__init__ - def __str__(self): + def __unicode__(self): """ Give the HGVS description of the raw variant stored in this class. @@ -233,7 +237,7 @@ class ProteinVar(models.ProteinVar): correct description. Also see the comment in the class definition. :returns: The HGVS description of the raw variant stored in this class. - :rtype: str + :rtype: unicode """ if self.type == "unknown": return "?" @@ -262,5 +266,5 @@ class ProteinVar(models.ProteinVar): if self.term: return description + "fs*{}".format(self.term) return description - #__str__ + #__unicode__ #ProteinVar diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py index 5bb2ae3576c049e40ad7968aebc1fe8550041f87..41db5354ab97fb9ffc41d495c879635cc8c6daa4 100644 --- a/mutalyzer/website/views.py +++ b/mutalyzer/website/views.py @@ -278,9 +278,9 @@ def name_checker(): extracted = extractedProt = '(skipped)' if allele: - extracted = str(allele) #describe.allele_description(allele) + extracted = unicode(allele) #describe.allele_description(allele) if prot_allele: - extractedProt = str(prot_allele) #describe.allele_description(prot_allele) + extractedProt = unicode(prot_allele) #describe.allele_description(prot_allele) else: extracted = extractedProt = '' @@ -700,7 +700,7 @@ def description_extractor(): 'Variant sequence is not DNA.') raw_vars = describe.describe_dna(reference_sequence, variant_sequence) - description = str(raw_vars) #describe.allele_description(raw_vars) + description = unicode(raw_vars) #describe.allele_description(raw_vars) errors, warnings, summary = output.Summary() messages = map(util.message_info, output.getMessages())