From 6c64e5eeb9eaa9aadfb49492dca12b64ef870b9f Mon Sep 17 00:00:00 2001 From: "Jeroen F.J. Laros" <jlaros@fixedpoint.nl> Date: Sun, 19 Apr 2015 18:22:00 +0200 Subject: [PATCH] Moved describe functionality to the extractor package. --- mutalyzer/describe.py | 233 ------------------------- mutalyzer/entrypoints/mutalyzer.py | 3 +- mutalyzer/services/rpc.py | 3 +- mutalyzer/util.py | 142 +-------------- mutalyzer/variant.py | 270 ----------------------------- mutalyzer/website/views.py | 4 +- requirements.txt | 2 +- tests/test_describe.py | 187 -------------------- 8 files changed, 9 insertions(+), 835 deletions(-) delete mode 100644 mutalyzer/describe.py delete mode 100644 mutalyzer/variant.py delete mode 100644 tests/test_describe.py diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py deleted file mode 100644 index f17114ef..00000000 --- a/mutalyzer/describe.py +++ /dev/null @@ -1,233 +0,0 @@ -""" -Generate a HGVS description of the variant(s) leading from one sequence to an -other. -""" - - -from __future__ import unicode_literals - -import collections - -from Bio.Data import CodonTable - -from mutalyzer.util import palinsnoop, roll -from mutalyzer.variant import ISeq, ISeqList, DNAVar, ProteinVar, Allele - -from extractor import extractor - - -def printpos(s, start, end, fill=0): - """ - For debugging purposes. - """ - # TODO: See if this can partially replace or be merged with the - # visualisation in the _visualise() function of mutator.py - fs = 10 # Flank size. - - return '{} {}{} {}'.format(s[start - fs:start], s[start:end], '-' * fill, - s[end:end + fs]) - - -def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar, - weight_position=1): - """ - Convert a variant from the extractor module to one of the RawVar - subclasses. - - :arg unicode s1: Reference sequence. - :arg unicode s2: Sample sequence. - :arg str var: Variant from the extractor module. - :arg str seq_list: Container for an inserted sequence. - :arg str container: Destination container. - :arg str weight_position: Weight of a position. - """ - # Unknown. - if s1 == '?' or s2 == '?': - return [container(type='unknown', weight_position=weight_position)] - - # Insertion / Duplication. - if var.reference_start == var.reference_end: - ins_length = var.sample_end - var.sample_start - shift5, shift3 = roll(s2, var.sample_start + 1, var.sample_end) - shift = shift5 + shift3 - - var.reference_start += shift3 - var.reference_end += shift3 - var.sample_start += shift3 - var.sample_end += shift3 - - if (var.sample_start - ins_length >= 0 and - s1[var.reference_start - ins_length:var.reference_start] == - s2[var.sample_start:var.sample_end]): - # NOTE: We may want to omit the inserted / deleted sequence and - # use the ranges instead. - return container(start=var.reference_start - ins_length + 1, - end=var.reference_end, type='dup', shift=shift, - sample_start=var.sample_start + 1, sample_end=var.sample_end, - inserted=ISeqList([ISeq(sequence=s2[ - var.sample_start:var.sample_end], - weight_position=weight_position)]), - weight_position=weight_position) - - return container(start=var.reference_start, - end=var.reference_start + 1, - inserted=seq_list or - ISeqList([ISeq(sequence=s2[var.sample_start:var.sample_end], - weight_position=weight_position)]), - type='ins', shift=shift, sample_start=var.sample_start + 1, - sample_end=var.sample_end, weight_position=weight_position) - - # Deletion. - if var.sample_start == var.sample_end: - shift5, shift3 = roll(s1, var.reference_start + 1, var.reference_end) - shift = shift5 + shift3 - - var.reference_start += shift3 - var.reference_end += shift3 - - return container(start=var.reference_start + 1, - end=var.reference_end, type='del', shift=shift, - sample_start=var.sample_start, sample_end=var.sample_end + 1, - deleted=ISeqList([ISeq(sequence=s1[ - var.reference_start:var.reference_end], - weight_position=weight_position)]), - weight_position=weight_position) - - # Substitution. - if (var.reference_start + 1 == var.reference_end and - var.sample_start + 1 == var.sample_end): - return container(start=var.reference_start + 1, - end=var.reference_end, sample_start=var.sample_start + 1, - sample_end=var.sample_end, type='subst', - deleted=ISeqList([ISeq(sequence=s1[var.reference_start], - weight_position=weight_position)]), - inserted=ISeqList([ISeq(sequence=s2[var.sample_start], - weight_position=weight_position)]), - weight_position=weight_position) - - # Inversion. - if var.type & extractor.REVERSE_COMPLEMENT: - trim = palinsnoop(s1[var.reference_start:var.reference_end]) - - if trim > 0: # Partial palindrome. - var.reference_end -= trim - var.sample_end -= trim - - return container(start=var.reference_start + 1, - end=var.reference_end, type='inv', - sample_start=var.sample_start + 1, sample_end=var.sample_end, - deleted=ISeqList([ISeq(sequence=s1[ - var.reference_start:var.reference_end], - weight_position=weight_position)]), - inserted=ISeqList([ISeq(sequence=s2[ - var.sample_start:var.reference_end], - weight_position=weight_position)]), - weight_position=weight_position) - - # InDel. - return container(start=var.reference_start + 1, - end=var.reference_end, deleted=ISeqList([ISeq(sequence=s1[ - var.reference_start:var.reference_end], - weight_position=weight_position)]), - inserted=seq_list or - ISeqList([ISeq(sequence=s2[var.sample_start:var.sample_end], - weight_position=weight_position)]), - type='delins', sample_start=var.sample_start + 1, - sample_end=var.sample_end, weight_position=weight_position) - - -def describe_dna(s1, s2): - """ - Give an allele description of the change from {s1} to {s2}. - - :arg unicode s1: Sequence 1. - :arg unicode s2: Sequence 2. - - :returns list(RawVar): A list of RawVar objects, representing the allele. - """ - description = Allele() - in_transposition = 0 - - extracted = extractor.extract(s1.encode('utf-8'), len(s1), - s2.encode('utf-8'), len(s2), 0) - for variant in extracted.variants: - # print (variant.type, variant.reference_start, - # variant.reference_end, variant.sample_start, - # variant.sample_end, variant.transposition_start, - # variant.transposition_end) - # print (variant.type & extractor.TRANSPOSITION_OPEN, variant.type & - # extractor.TRANSPOSITION_CLOSE) - - if variant.type & extractor.TRANSPOSITION_OPEN: - if not in_transposition: - seq_list = ISeqList() - in_transposition += 1 - - if in_transposition: - if variant.type & extractor.IDENTITY: - seq_list.append(ISeq(start=variant.transposition_start + 1, - end=variant.transposition_end, reverse=False, - weight_position=extracted.weight_position)) - elif variant.type & extractor.REVERSE_COMPLEMENT: - seq_list.append(ISeq(start=variant.transposition_start + 1, - end=variant.transposition_end, reverse=True, - weight_position=extracted.weight_position)) - else: - seq_list.append(ISeq( - sequence=s2[variant.sample_start:variant.sample_end], - weight_position=extracted.weight_position)) - elif not (variant.type & extractor.IDENTITY): - description.append(var_to_rawvar(s1, s2, variant, - weight_position=extracted.weight_position)) - - if variant.type & extractor.TRANSPOSITION_CLOSE: - in_transposition -= 1 - - if not in_transposition: - description.append(var_to_rawvar(s1, s2, variant, seq_list, - weight_position=extracted.weight_position)) - - if not description: - return Allele([DNAVar()]) - return description - - -def describe_protein(s1, s2): - """ - Give an allele description of the change from {s1} to {s2}. - - :arg unicode s1: Sequence 1. - :arg unicode s2: Sequence 2. - - :returns list(RawVar): A list of RawVar objects, representing the allele. - """ - description = Allele() - - fs1, fs2 = make_fs_tables(1) - longest_fs_f = max(find_fs(s1, s2, fs1), find_fs(s1, s2, fs2)) - longest_fs_r = max(find_fs(s2, s1, fs1), find_fs(s2, s1, fs2)) - - if longest_fs_f > longest_fs_r: - print s1[:longest_fs_f[1]], s1[longest_fs_f[1]:] - print s2[:len(s2) - longest_fs_f[0]], s2[len(s2) - longest_fs_f[0]:] - s1_part = s1[:longest_fs_f[1]] - s2_part = s2[:len(s2) - longest_fs_f[0]] - term = longest_fs_f[0] - else: - print s1[:len(s1) - longest_fs_r[0]], s1[len(s1) - longest_fs_r[0]:] - print s2[:longest_fs_r[1]], s2[longest_fs_r[1]:] - s1_part = s1[:len(s1) - longest_fs_r[0]] - s2_part = s2[:longest_fs_r[1]] - term = len(s2) - longest_fs_r[1] - - s1_part = s1 - s2_part = s2 - for variant in extractor.extract(s1_part.encode('utf-8'), len(s1_part), - s2_part.encode('utf-8'), len(s2_part), 1): - description.append(var_to_rawvar(s1, s2, variant, - container=ProteinVar)) - - if description: - description[-1].term = term + 2 - - return description diff --git a/mutalyzer/entrypoints/mutalyzer.py b/mutalyzer/entrypoints/mutalyzer.py index 08864546..18cb0293 100644 --- a/mutalyzer/entrypoints/mutalyzer.py +++ b/mutalyzer/entrypoints/mutalyzer.py @@ -10,8 +10,9 @@ from __future__ import unicode_literals import argparse import json +from extractor import describe + from . import _cli_string -from .. import describe from .. import output from .. import variantchecker diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index c296ee6d..16337a2d 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -23,6 +23,8 @@ from operator import attrgetter from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.sql import func +from extractor import describe + import mutalyzer from mutalyzer.config import settings from mutalyzer.db import session @@ -40,7 +42,6 @@ from mutalyzer import Retriever from mutalyzer import GenRecord from mutalyzer import Scheduler from mutalyzer.models import * -from mutalyzer import describe def create_rpc_fault(output): diff --git a/mutalyzer/util.py b/mutalyzer/util.py index 63f916d3..9e0bf8a7 100644 --- a/mutalyzer/util.py +++ b/mutalyzer/util.py @@ -31,66 +31,7 @@ import time from Bio.SeqUtils import seq3 - -# Taken from BioPython. -AMBIGUOUS_DNA_COMPLEMENT = { - 'A': 'T', - 'C': 'G', - 'G': 'C', - 'T': 'A', - 'M': 'K', - 'R': 'Y', - 'W': 'W', - 'S': 'S', - 'Y': 'R', - 'K': 'M', - 'V': 'B', - 'H': 'D', - 'D': 'H', - 'B': 'V', - 'X': 'X', - 'N': 'N'} -AMBIGUOUS_RNA_COMPLEMENT = { - 'A': 'U', - 'C': 'G', - 'G': 'C', - 'U': 'A', - 'M': 'K', - 'R': 'Y', - 'W': 'W', - 'S': 'S', - 'Y': 'R', - 'K': 'M', - 'V': 'B', - 'H': 'D', - 'D': 'H', - 'B': 'V', - 'X': 'X', - 'N': 'N'} - - -def _make_translation_table(complement_mapping): - before = complement_mapping.keys() - before += [b.lower() for b in before] - after = complement_mapping.values() - after += [b.lower() for b in after] - return {ord(k): v for k, v in zip(before, after)} - - -_dna_complement_table = _make_translation_table(AMBIGUOUS_DNA_COMPLEMENT) -_rna_complement_table = _make_translation_table(AMBIGUOUS_RNA_COMPLEMENT) - - -def reverse_complement(sequence): - """ - Reverse complement of a sequence represented as unicode string. - """ - if 'U' in sequence or 'u' in sequence: - table = _rna_complement_table - else: - table = _dna_complement_table - - return ''.join(reversed(sequence.translate(table))) +from extractor.describe import reverse_complement, palinsnoop, roll def is_utf8_alias(encoding): @@ -309,87 +250,6 @@ def roll_(s, start, end) : #roll -def roll(s, first, last): - """ - Determine the variability of a variant by looking at cyclic - permutations. Not all cyclic permutations are tested at each time, it - is sufficient to check ``aW'' if ``Wa'' matches (with ``a'' a letter, - ``W'' a word) when rolling to the left for example. - - >>> roll('abbabbabbabb', 4, 6) - (3, 6) - >>> roll('abbabbabbabb', 5, 5) - (0, 1) - >>> roll('abcccccde', 4, 4) - (1, 3) - - @arg s: A reference sequence. - @type s: any sequence type - @arg first: First position of the pattern in the reference sequence. - @type first: int - @arg last: Last position of the pattern in the reference sequence. - @type last: int - - @return: tuple: - - left ; Amount of positions that the pattern can be shifted to - the left. - - right ; Amount of positions that the pattern can be shifted to - the right. - @rtype: tuple(int, int) - """ - pattern = s[first - 1:last] # Extract the pattern - pattern_length = len(pattern) - - # Keep rolling to the left as long as a cyclic permutation matches. - minimum = first - 2 - j = pattern_length - 1 - while minimum > -1 and s[minimum] == pattern[j % pattern_length]: - j -= 1 - minimum -= 1 - - # Keep rolling to the right as long as a cyclic permutation matches. - maximum = last - j = 0 - while maximum < len(s) and s[maximum] == pattern[j % pattern_length]: - j += 1 - maximum += 1 - - return first - minimum - 2, maximum - last -#roll - - -def palinsnoop(s): - """ - Check a sequence for a reverse-complement-palindromic prefix (and - suffix). If one is detected, return the length of this prefix. If the - string equals its reverse complement, return -1. - - >>> palinsnoop('TACGCTA') - 2 - >>> palinsnoop('TACGTA') - -1 - >>> palinsnoop('TACGCTT') - 0 - - @arg s: A nucleotide sequence. - @type s: unicode - - @return: The number of elements that are palindromic or -1 if the string - is a 'palindrome'. - @rtype: int - """ - s_revcomp = reverse_complement(s) - - for i in range(int(math.ceil(len(s) / 2.0))): - if s[i] != s_revcomp[i]: - # The first i elements are 'palindromic'. - return i - - # Perfect 'palindrome'. - return -1 -#palinsnoop - - def longest_common_prefix(s1, s2): """ Calculate the longest common prefix of two strings. diff --git a/mutalyzer/variant.py b/mutalyzer/variant.py deleted file mode 100644 index 2ea2d393..00000000 --- a/mutalyzer/variant.py +++ /dev/null @@ -1,270 +0,0 @@ -""" -Models for the description extractor. -""" - -from __future__ import unicode_literals - -from Bio.SeqUtils import seq3 - -from extractor import extractor - - -WEIGHTS = { - 'subst': extractor.WEIGHT_SUBSTITUTION, - 'del': extractor.WEIGHT_DELETION, - 'ins': extractor.WEIGHT_INSERTION, - 'dup': extractor.WEIGHT_INSERTION, - 'inv': extractor.WEIGHT_INVERSION, - 'delins': extractor.WEIGHT_DELETION_INSERTION -} - - -class HGVSList(object): - """ - Container for a list of sequences or variants. - """ - def __init__(self, items=[]): - self.items = list(items) - - - def __getitem__(self, index): - return self.items[index] - - - def __bool__(self): - return bool(len(self.items) > 0) - - - def __nonzero__(self): # Python 2.x compatibility. - return self.__bool__() - - - def __unicode__(self): - if len(self.items) > 1: - return '[{}]'.format(';'.join(map(unicode, self.items))) - return unicode(self.items[0]) - - - def append(self, item): - self.items.append(item) - - - def weight(self): - weight = sum(map(lambda x: x.weight(), self.items)) - - if len(self.items) > 1: - return weight + (len(self.items) + 1) * extractor.WEIGHT_SEPARATOR - return weight - - -class Allele(HGVSList): - pass - - -class ISeqList(HGVSList): - pass - - -class ISeq(object): - """ - Container for an inserted sequence. - """ - def __init__(self, sequence='', start=0, end=0, reverse=False, - weight_position=1): - """ - Initialise the class with the appropriate values. - - :arg unicode sequence: Literal inserted sequence. - :arg int start: Start position for a transposed sequence. - :arg int end: End position for a transposed sequence. - :arg bool reverse: Inverted transposed sequence. - """ - self.sequence = sequence - self.start = start - self.end = end - self.reverse = reverse - self.weight_position = weight_position - - self.type = 'trans' - if self.sequence: - self.type = 'ins' - - - def __unicode__(self): - if self.type == 'ins': - return self.sequence - - if not (self.start or self.end): - return '' - - inverted = 'inv' if self.reverse else '' - return '{}_{}{}'.format(self.start, self.end, inverted) - - - def __bool__(self): - return bool(self.sequence) - - - def __nonzero__(self): # Python 2.x compatibility. - return self.__bool__() - - - def weight(self): - if self.type == 'ins': - return len(self.sequence) * extractor.WEIGHT_BASE - - inverse_weight = WEIGHTS['inv'] if self.reverse else 0 - return (self.weight_position * 2 + extractor.WEIGHT_SEPARATOR + - inverse_weight) - - -class DNAVar(object): - """ - Container for a DNA variant. - """ - def __init__(self, start=0, start_offset=0, end=0, end_offset=0, - sample_start=0, sample_start_offset=0, sample_end=0, - sample_end_offset=0, type='none', deleted=ISeqList([ISeq()]), - inserted=ISeqList([ISeq()]), shift=0, weight_position=1): - """ - Initialise the class with the appropriate values. - - :arg int start: Start position. - :arg int start_offset: - :arg int end: End position. - :arg int end_offset: - :arg int sample_start: Start position. - :arg int sample_start_offset: - :arg int sample_end: End position. - :arg int sample_end_offset: - :arg unicode type: Variant type. - :arg unicode deleted: Deleted part of the reference sequence. - :arg ISeqList inserted: Inserted part. - :arg int shift: Amount of freedom. - """ - # TODO: Will this container be used for all variants, or only genomic? - # start_offset and end_offset may be never used. - self.start = start - self.start_offset = start_offset - self.end = end - self.end_offset = end_offset - self.sample_start = sample_start - self.sample_start_offset = sample_start_offset - self.sample_end = sample_end - self.sample_end_offset = sample_end_offset - self.type = type - self.deleted = deleted - self.inserted = inserted - self.weight_position = weight_position - self.shift = shift - - - def __unicode__(self): - """ - Give the HGVS description of the raw variant stored in this class. - - :returns unicode: The HGVS description of the raw variant stored in - this class. - """ - if self.type == 'unknown': - return '?' - if self.type == 'none': - return '=' - - description = '{}'.format(self.start) - - if self.start != self.end: - description += '_{}'.format(self.end) - - if self.type != 'subst': - description += '{}'.format(self.type) - - if self.type in ('ins', 'delins'): - return description + '{}'.format(self.inserted) - return description - - return description + '{}>{}'.format(self.deleted, self.inserted) - - - def weight(self): - if self.type == 'unknown': - return -1 - if self.type == 'none': - return 0 - - weight = self.weight_position - if self.start != self.end: - weight += self.weight_position + extractor.WEIGHT_SEPARATOR - - return weight + WEIGHTS[self.type] + self.inserted.weight() - - -class ProteinVar(object): - """ - Container for a protein variant. - - """ - #NOTE: This is experimental code. It is not used at the moment. - def __init__(self, start=0, end=0, sample_start=0, sample_end=0, - type='none', deleted=ISeqList([ISeq()]), - inserted=ISeqList([ISeq()]), shift=0, term=0): - """ - Initialise the class with the appropriate values. - - :arg int start: Start position. - :arg int end: End position. - :arg int sample_start: Start position. - :arg int sample_end: End position. - :arg unicode type: Variant type. - :arg unicode deleted: Deleted part of the reference sequence. - :arg ISeqList inserted: Inserted part. - :arg int shift: Amount of freedom. - :arg int term: Number of positions until stop codon. - """ - self.start = start - self.end = end - self.sample_start = sample_start - self.sample_end = sample_end - self.type = type - self.deleted = deleted - self.inserted = inserted - self.shift = shift - self.term = term - - - def __unicode__(self): - """ - Give the HGVS description of the raw variant stored in this class. - - Note that this function relies on the absence of values to make the - correct description. The method used in the DNAVar is better. - - :returns unicode: The HGVS description of the raw variant stored in - this class. - """ - if self.type == 'unknown': - return '?' - if self.type == 'none': - return '=' - - description = '' - if not self.deleted: - if self.type == 'ext': - description += '*' - else: - description += '{}'.format(seq3(self.start_aa)) - else: - description += '{}'.format(seq3(self.deleted)) - description += '{}'.format(self.start) - if self.end: - description += '_{}{}'.format(seq3(self.end_aa), self.end) - if self.type not in ('subst', 'stop', 'ext', 'fs'): # fs is not a type - description += self.type - if self.inserted: - description += '{}'.format(seq3(self.inserted)) - - if self.type == 'stop': - return description + '*' - if self.term: - return description + 'fs*{}'.format(self.term) - return description diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py index 41db5354..f2ae4793 100644 --- a/mutalyzer/website/views.py +++ b/mutalyzer/website/views.py @@ -19,8 +19,10 @@ from lxml import etree from spyne.server.http import HttpBase from sqlalchemy.orm.exc import NoResultFound +from extractor import describe + import mutalyzer -from mutalyzer import (announce, describe, File, Retriever, Scheduler, stats, +from mutalyzer import (announce, File, Retriever, Scheduler, stats, util, variantchecker) from mutalyzer.config import settings from mutalyzer.db.models import BATCH_JOB_TYPES diff --git a/requirements.txt b/requirements.txt index a3cb8eb2..39429208 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -e git+https://github.com/LUMC/magic-python.git#egg=Magic_file_extensions --e git+https://github.com/LUMC/extractor.git@3148fad6b081c36e9675494f4896ba60f14707a9#egg=extractor +-e git+https://github.com/LUMC/extractor.git@f270ef101e909895d58e47420935f2bbfdb28a3b#egg=extractor Flask==0.10.1 Jinja2==2.7.3 MySQL-python==1.2.5 diff --git a/tests/test_describe.py b/tests/test_describe.py deleted file mode 100644 index 484d824d..00000000 --- a/tests/test_describe.py +++ /dev/null @@ -1,187 +0,0 @@ -""" -Tests for the mutalyzer.describe module. -""" - - -from __future__ import unicode_literals - -#import logging; logging.basicConfig() -import os - -import mutalyzer -from mutalyzer import describe - -from utils import MutalyzerTest - - -class TestDescribe(MutalyzerTest): - """ - Test the mutalyzer.describe module. - """ - def _single_variant(self, sample, expected): - """ - General single variant test. - """ - reference = 'ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT' - - result = describe.describe_dna(reference, sample) - assert result[0].type == expected[0] - assert result[0].start == expected[1] - assert result[0].end == expected[2] - assert result[0].sample_start == expected[3] - assert result[0].sample_end == expected[4] - assert result[0].deleted[0].sequence == expected[5] - assert result[0].inserted[0].sequence == expected[6] - assert unicode(result[0]) == expected[7] - - - def test1(self): - """ - Test 1. - """ - result = describe.describe_dna( - 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA') - assert unicode(result) == '[5_6insTT;17del;26A>C;35dup]' - - - def test2(self): - """ - Test 2. - """ - result = describe.describe_dna( - 'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTACTGTG', - 'TAAGCACCAGGAGTCCATGAAGAAGCTGGATCCTCCCATGGAATCCCCTACTCTACTGTG') - assert unicode(result) == '[26A>C;30C>A;35G>C]' - - - def test3(self): - """ - Test 3. - """ - result = describe.describe_dna( - 'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTA', - 'TAAGCACCAGGAGTCCATGAAGAAGCCATGTCCTGCCATGGAATCCCCTACTCTA') - assert unicode(result) == '[26_29inv;30C>G]' - - - def test4(self): - """ - Test 4. - """ - result = describe.describe_dna( - 'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTA', - 'TAAGCACCAGGAGTCCATGAAGAAGCCATGTCCTGCCATGAATCCCCTACTCTA') - assert unicode(result) == '[26_29inv;30C>G;41del]' - - - def test5(self): - """ - Test 5. - """ - self._single_variant('ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('none', 0, 0, 0, 0, '', '', '=')) - - - def test6(self): - """ - Test 6. - """ - self._single_variant('ACGTCGGTTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('subst', 7, 7, 7, 7, 'A', 'G', '7A>G')) - - - def test7(self): - """ - Test 7. - """ - self._single_variant('ACGTCGTTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('del', 7, 7, 6, 7, 'A', '', '7del')) - - - def test8(self): - """ - Test 8. - """ - self._single_variant('ACGTCGTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('del', 7, 8, 6, 7, 'AT', '', '7_8del')) - - - def test9(self): - """ - Test 9. - """ - self._single_variant('ACGTCGCATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('ins', 6, 7, 7, 7, '', 'C', '6_7insC')) - - - def test10(self): - """ - Test 10. - """ - self._single_variant('ACGTCGCCATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('ins', 6, 7, 7, 8, '', 'CC', '6_7insCC')) - - - def test11(self): - """ - Test 11. - """ - self._single_variant('ACGTCGAATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('dup', 7, 7, 8, 8, '', 'A', '7dup')) - - - def test12(self): - """ - Test 12. - """ - self._single_variant('ACGTCGAGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('dup', 6, 7, 8, 9, '', 'GA', '6_7dup')) - - - def test13(self): - """ - Test 13. - """ - self._single_variant('ACGTCGACGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('dup', 5, 7, 8, 10, '', 'CGA', '5_7dup')) - - - def test14(self): - """ - Test 14. - """ - self._single_variant('ACGTCGCGAATCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('inv', 7, 11, 7, 11, 'ATTCG', 'CGAAT', '7_11inv')) - - - def test15(self): - """ - Test 15. - """ - self._single_variant('ACGTCGCCTTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('delins', 7, 7, 7, 8, 'A', 'CC', '7delinsCC')) - - - def test16(self): - """ - Test 16. - """ - self._single_variant('ACGTCGATTCGCTAGCTTCGTTTTGATAGATAGAGATATAGAGAT', - ('delins', 21, 23, 21, 24, 'GGG', 'TTTT', '21_23delinsTTTT')) - - - def test17(self): - """ - Test 17. - """ - self._single_variant('ACGTCTCTTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('inv', 6, 7, 6, 7, 'GA', 'TC', '6_7inv')) - - - def test18(self): - """ - Test 18. - """ - self._single_variant('ACGTCGTCTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT', - ('delins', 7, 8, 7, 8, 'AT', 'TC', '7_8delinsTC')) -- GitLab