diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py index 44de257d0ab5d47fd1f01b2893e9cf76c89d5a13..555894661475b0ed114568f06dbf4d72b50e6c90 100644 --- a/mutalyzer/describe.py +++ b/mutalyzer/describe.py @@ -212,15 +212,14 @@ class Seq(object): class SeqList(list): def __str__(self): - representation = ';'.join(map(str, self)) - if len(self) > 1: - return "[{}]".format(representation) - return representation + return "[{}]".format(';'.join(map(str, self))) + return str(self[0]) #__str__ #SeqList class HGVSVar(object): + # NOTE: This may be obsolete, but check the JSON generation. def update(self): self.hgvs = str(self) self.hgvs_length = len(self) @@ -321,6 +320,7 @@ class DNAVar(models.DNAVar, HGVSVar): variant stored in this class. :rtype: int """ + # NOTE: Obsolete? if self.type in ("none", "unknown"): # `=' or `?' return 1 @@ -434,6 +434,7 @@ class ProteinVar(models.ProteinVar, HGVSVar): variant stored in this class. :rtype: int """ + # NOTE: Obsolete? if not self.start: # = return 1 @@ -477,6 +478,7 @@ class Allele(list): :rtype: int """ # NOTE: Do we need to count the ; and [] ? + # NOTE: Obsolete? return sum(map(lambda x: x.hgvs_length, self)) #length #Allele @@ -571,7 +573,7 @@ def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar): sample_end=var.sample_end) #var_to_rawvar -def describe(s1, s2, dna=True): +def describe_dna(s1, s2): """ Give an allele description of the change from {s1} to {s2}. @@ -584,84 +586,91 @@ def describe(s1, s2, dna=True): :rtype: list(RawVar) """ description = Allele() + in_transposition = 0 + + for variant in extractor.extract(str(s1), len(s1), str(s2), len(s2), 0): + print (variant.type, variant.reference_start, + variant.reference_end, variant.sample_start, + variant.sample_end, variant.transposition_start, + variant.transposition_end) + print (variant.type & extractor.TRANSPOSITION_OPEN, variant.type & + extractor.TRANSPOSITION_CLOSE) + + if variant.type & extractor.TRANSPOSITION_OPEN: + if not in_transposition: + seq_list = SeqList() + in_transposition += 1 + #if - if not dna: - fs1, fs2 = make_fs_tables(1) - longest_fs_f = max(find_fs(s1, s2, fs1), find_fs(s1, s2, fs2)) - longest_fs_r = max(find_fs(s2, s1, fs1), find_fs(s2, s1, fs2)) - - if longest_fs_f > longest_fs_r: - print s1[:longest_fs_f[1]], s1[longest_fs_f[1]:] - print s2[:len(s2) - longest_fs_f[0]], \ - s2[len(s2) - longest_fs_f[0]:] - s1_part = s1[:longest_fs_f[1]] - s2_part = s2[:len(s2) - longest_fs_f[0]] - term = longest_fs_f[0] + if in_transposition: + if variant.type & extractor.IDENTITY: + seq_list.append(Seq(start=variant.transposition_start + 1, + end=variant.transposition_end, reverse=False)) + elif variant.type & extractor.REVERSE_COMPLEMENT: + seq_list.append(Seq(start=variant.transposition_start + 1, + end=variant.transposition_end, reverse=True)) + else: + seq_list.append(Seq( + sequence=s2[variant.sample_start:variant.sample_end])) #if - else: - print s1[:len(s1) - longest_fs_r[0]], \ - s1[len(s1) - longest_fs_r[0]:] - print s2[:longest_fs_r[1]], s2[longest_fs_r[1]:] - s1_part = s1[:len(s1) - longest_fs_r[0]] - s2_part = s2[:longest_fs_r[1]] - term = len(s2) - longest_fs_r[1] - #else - - s1_part = s1 - s2_part = s2 - for variant in extractor.extract(unicode(s1_part), len(s1_part), - unicode(s2_part), len(s2_part), 1): - description.append(var_to_rawvar(s1, s2, variant, container=ProteinVar)) - - if description: - description[-1].term = term + 2 - description[-1].update() + elif not (variant.type & extractor.IDENTITY): + description.append(var_to_rawvar(s1, s2, variant)) + + if variant.type & extractor.TRANSPOSITION_CLOSE: + in_transposition -= 1 + + if not in_transposition: + description.append(var_to_rawvar(s1, s2, variant, seq_list)) #if + #for + + return description +#describe_dna + +def describe_protein(s1, s2): + """ + Give an allele description of the change from {s1} to {s2}. + + :arg s1: Sequence 1. + :type s1: str + :arg s2: Sequence 2. + :type s2: str + + :returns: A list of RawVar objects, representing the allele. + :rtype: list(RawVar) + """ + description = Allele() + + fs1, fs2 = make_fs_tables(1) + longest_fs_f = max(find_fs(s1, s2, fs1), find_fs(s1, s2, fs2)) + longest_fs_r = max(find_fs(s2, s1, fs1), find_fs(s2, s1, fs2)) + + if longest_fs_f > longest_fs_r: + print s1[:longest_fs_f[1]], s1[longest_fs_f[1]:] + print s2[:len(s2) - longest_fs_f[0]], s2[len(s2) - longest_fs_f[0]:] + s1_part = s1[:longest_fs_f[1]] + s2_part = s2[:len(s2) - longest_fs_f[0]] + term = longest_fs_f[0] #if - else: # DNA description extraction, the only thing that `works'. - in_transposition = 0 - - for variant in extractor.extract(unicode(s1), len(s1), unicode(s2), len(s2), - 0): - print variant.type, variant.reference_start, variant.reference_end, variant.sample_start, variant.sample_end, variant.transposition_start, variant.transposition_end - print variant.type & extractor.TRANSPOSITION_OPEN, variant.type & extractor.TRANSPOSITION_CLOSE - - if variant.type & extractor.TRANSPOSITION_OPEN: - if not in_transposition: - seq_list = SeqList() - in_transposition += 1 - #if - - if in_transposition: - if variant.type & extractor.IDENTITY: - seq_list.append(Seq(#reference=s1, - start=variant.transposition_start + 1, - end=variant.transposition_end, reverse=False)) - elif variant.type & extractor.REVERSE_COMPLEMENT: - seq_list.append(Seq(#reference=s1, - start=variant.transposition_start + 1, - end=variant.transposition_end, reverse=True)) - else: - seq_list.append(Seq( - sequence=s2[variant.sample_start:variant.sample_end])) - #if - elif not (variant.type & extractor.IDENTITY): - description.append(var_to_rawvar(s1, s2, variant)) - - if variant.type & extractor.TRANSPOSITION_CLOSE: - in_transposition -= 1 - - if not in_transposition: - description.append(var_to_rawvar(s1, s2, variant, seq_list)) - #for i in seq_list: - # print i.dump() - #if - #for + else: + print s1[:len(s1) - longest_fs_r[0]], s1[len(s1) - longest_fs_r[0]:] + print s2[:longest_fs_r[1]], s2[longest_fs_r[1]:] + s1_part = s1[:len(s1) - longest_fs_r[0]] + s2_part = s2[:longest_fs_r[1]] + term = len(s2) - longest_fs_r[1] #else - # Nothing happened. - if not description: - return Allele(RawVar()) + s1_part = s1 + s2_part = s2 + for variant in extractor.extract(str(s1_part), len(s1_part), + str(s2_part), len(s2_part), 1): + description.append(var_to_rawvar(s1, s2, variant, + container=ProteinVar)) + + if description: + description[-1].term = term + 2 + description[-1].update() + #if return description -#describe +#describe_protein diff --git a/mutalyzer/entrypoints/mutalyzer.py b/mutalyzer/entrypoints/mutalyzer.py index f2a4b10de57f038cd2f6342b6067a1ecbbc4db69..dc1ea00880440ac85000a77d20481fd5acbcf3bd 100644 --- a/mutalyzer/entrypoints/mutalyzer.py +++ b/mutalyzer/entrypoints/mutalyzer.py @@ -100,28 +100,28 @@ def check_name(description): reference_sequence = O.getIndexedOutput("original", 0) sample_sequence = O.getIndexedOutput("mutated", 0) - extracted_allele = describe.describe(reference_sequence, + described_allele = describe.describe_dna(reference_sequence, sample_sequence) - #extracted_protein_allele = describe.describe( + #described_protein_allele = describe.describe( # O.getIndexedOutput("oldprotein", 0), # O.getIndexedOutput("newprotein", 0, default=""), # DNA=False) - extracted_protein_allele = "" + described_protein_allele = "" - extracted = extracted_protein = '(skipped)' + described = described_protein = '(skipped)' - if extracted_allele: - extracted = extracted_allele - if extracted_protein_allele: - extracted_protein = extracted_protein_allele + if described_allele: + described = described_allele + if described_protein_allele: + described_protein = described_protein_allele print "\nExperimental services:" - print extracted - print extracted_protein + print described + print described_protein #print "+++ %s" % O.getOutput("myTranscriptDescription") print json.dumps({"reference_sequence": reference_sequence, "sample_sequence": sample_sequence, "allele_description": - extracted_allele}, cls=MyEncoder) + described_allele}, cls=MyEncoder) def main():