From 6c64e5eeb9eaa9aadfb49492dca12b64ef870b9f Mon Sep 17 00:00:00 2001
From: "Jeroen F.J. Laros" <jlaros@fixedpoint.nl>
Date: Sun, 19 Apr 2015 18:22:00 +0200
Subject: [PATCH] Moved describe functionality to the extractor package.

---
 mutalyzer/describe.py              | 233 -------------------------
 mutalyzer/entrypoints/mutalyzer.py |   3 +-
 mutalyzer/services/rpc.py          |   3 +-
 mutalyzer/util.py                  | 142 +--------------
 mutalyzer/variant.py               | 270 -----------------------------
 mutalyzer/website/views.py         |   4 +-
 requirements.txt                   |   2 +-
 tests/test_describe.py             | 187 --------------------
 8 files changed, 9 insertions(+), 835 deletions(-)
 delete mode 100644 mutalyzer/describe.py
 delete mode 100644 mutalyzer/variant.py
 delete mode 100644 tests/test_describe.py

diff --git a/mutalyzer/describe.py b/mutalyzer/describe.py
deleted file mode 100644
index f17114ef..00000000
--- a/mutalyzer/describe.py
+++ /dev/null
@@ -1,233 +0,0 @@
-"""
-Generate a HGVS description of the variant(s) leading from one sequence to an
-other.
-"""
-
-
-from __future__ import unicode_literals
-
-import collections
-
-from Bio.Data import CodonTable
-
-from mutalyzer.util import palinsnoop, roll
-from mutalyzer.variant import ISeq, ISeqList, DNAVar, ProteinVar, Allele
-
-from extractor import extractor
-
-
-def printpos(s, start, end, fill=0):
-    """
-    For debugging purposes.
-    """
-    # TODO: See if this can partially replace or be merged with the
-    #       visualisation in the _visualise() function of mutator.py
-    fs = 10 # Flank size.
-
-    return '{} {}{} {}'.format(s[start - fs:start], s[start:end], '-' * fill,
-        s[end:end + fs])
-
-
-def var_to_rawvar(s1, s2, var, seq_list=[], container=DNAVar,
-        weight_position=1):
-    """
-    Convert a variant from the extractor module to one of the RawVar
-    subclasses.
-
-    :arg unicode s1: Reference sequence.
-    :arg unicode s2: Sample sequence.
-    :arg str var: Variant from the extractor module.
-    :arg str seq_list: Container for an inserted sequence.
-    :arg str container: Destination container.
-    :arg str weight_position: Weight of a position.
-    """
-    # Unknown.
-    if s1 == '?' or s2 == '?':
-        return [container(type='unknown', weight_position=weight_position)]
-
-    # Insertion / Duplication.
-    if var.reference_start == var.reference_end:
-        ins_length = var.sample_end - var.sample_start
-        shift5, shift3 = roll(s2, var.sample_start + 1, var.sample_end)
-        shift = shift5 + shift3
-
-        var.reference_start += shift3
-        var.reference_end += shift3
-        var.sample_start += shift3
-        var.sample_end += shift3
-
-        if (var.sample_start - ins_length >= 0 and
-                s1[var.reference_start - ins_length:var.reference_start] ==
-                s2[var.sample_start:var.sample_end]):
-            # NOTE: We may want to omit the inserted / deleted sequence and
-            # use the ranges instead.
-            return container(start=var.reference_start - ins_length + 1,
-                end=var.reference_end, type='dup', shift=shift,
-                sample_start=var.sample_start + 1, sample_end=var.sample_end,
-                inserted=ISeqList([ISeq(sequence=s2[
-                var.sample_start:var.sample_end],
-                    weight_position=weight_position)]),
-                weight_position=weight_position)
-
-        return container(start=var.reference_start,
-            end=var.reference_start + 1,
-            inserted=seq_list or
-            ISeqList([ISeq(sequence=s2[var.sample_start:var.sample_end],
-                weight_position=weight_position)]),
-            type='ins', shift=shift, sample_start=var.sample_start + 1,
-            sample_end=var.sample_end, weight_position=weight_position)
-
-    # Deletion.
-    if var.sample_start == var.sample_end:
-        shift5, shift3 = roll(s1, var.reference_start + 1, var.reference_end)
-        shift = shift5 + shift3
-
-        var.reference_start += shift3
-        var.reference_end += shift3
-
-        return container(start=var.reference_start + 1,
-            end=var.reference_end, type='del', shift=shift,
-            sample_start=var.sample_start, sample_end=var.sample_end + 1,
-            deleted=ISeqList([ISeq(sequence=s1[
-                var.reference_start:var.reference_end],
-                weight_position=weight_position)]),
-            weight_position=weight_position)
-
-    # Substitution.
-    if (var.reference_start + 1 == var.reference_end and
-            var.sample_start + 1 == var.sample_end):
-        return container(start=var.reference_start + 1,
-            end=var.reference_end, sample_start=var.sample_start + 1,
-            sample_end=var.sample_end, type='subst',
-            deleted=ISeqList([ISeq(sequence=s1[var.reference_start],
-                weight_position=weight_position)]),
-            inserted=ISeqList([ISeq(sequence=s2[var.sample_start],
-                weight_position=weight_position)]),
-            weight_position=weight_position)
-
-    # Inversion.
-    if var.type & extractor.REVERSE_COMPLEMENT:
-        trim = palinsnoop(s1[var.reference_start:var.reference_end])
-
-        if trim > 0: # Partial palindrome.
-            var.reference_end -= trim
-            var.sample_end -= trim
-
-        return container(start=var.reference_start + 1,
-            end=var.reference_end, type='inv',
-            sample_start=var.sample_start + 1, sample_end=var.sample_end,
-            deleted=ISeqList([ISeq(sequence=s1[
-                var.reference_start:var.reference_end],
-                weight_position=weight_position)]),
-            inserted=ISeqList([ISeq(sequence=s2[
-                var.sample_start:var.reference_end],
-                weight_position=weight_position)]),
-            weight_position=weight_position)
-
-    # InDel.
-    return container(start=var.reference_start + 1,
-        end=var.reference_end, deleted=ISeqList([ISeq(sequence=s1[
-                var.reference_start:var.reference_end],
-                weight_position=weight_position)]),
-        inserted=seq_list or
-        ISeqList([ISeq(sequence=s2[var.sample_start:var.sample_end],
-            weight_position=weight_position)]),
-        type='delins', sample_start=var.sample_start + 1,
-        sample_end=var.sample_end, weight_position=weight_position)
-
-
-def describe_dna(s1, s2):
-    """
-    Give an allele description of the change from {s1} to {s2}.
-
-    :arg unicode s1: Sequence 1.
-    :arg unicode s2: Sequence 2.
-
-    :returns list(RawVar): A list of RawVar objects, representing the allele.
-    """
-    description = Allele()
-    in_transposition = 0
-
-    extracted = extractor.extract(s1.encode('utf-8'), len(s1),
-        s2.encode('utf-8'), len(s2), 0)
-    for variant in extracted.variants:
-       # print (variant.type, variant.reference_start,
-       #     variant.reference_end, variant.sample_start,
-       #     variant.sample_end, variant.transposition_start,
-       #     variant.transposition_end)
-       # print (variant.type & extractor.TRANSPOSITION_OPEN, variant.type &
-       #     extractor.TRANSPOSITION_CLOSE)
-
-        if variant.type & extractor.TRANSPOSITION_OPEN:
-            if not in_transposition:
-                seq_list = ISeqList()
-            in_transposition += 1
-
-        if in_transposition:
-            if variant.type & extractor.IDENTITY:
-                seq_list.append(ISeq(start=variant.transposition_start + 1,
-                    end=variant.transposition_end, reverse=False,
-                    weight_position=extracted.weight_position))
-            elif variant.type & extractor.REVERSE_COMPLEMENT:
-                seq_list.append(ISeq(start=variant.transposition_start + 1,
-                    end=variant.transposition_end, reverse=True,
-                    weight_position=extracted.weight_position))
-            else:
-                seq_list.append(ISeq(
-                    sequence=s2[variant.sample_start:variant.sample_end],
-                    weight_position=extracted.weight_position))
-        elif not (variant.type & extractor.IDENTITY):
-            description.append(var_to_rawvar(s1, s2, variant,
-                weight_position=extracted.weight_position))
-
-        if variant.type & extractor.TRANSPOSITION_CLOSE:
-            in_transposition -= 1
-
-            if not in_transposition:
-                description.append(var_to_rawvar(s1, s2, variant, seq_list,
-                    weight_position=extracted.weight_position))
-
-    if not description:
-        return Allele([DNAVar()])
-    return description
-
-
-def describe_protein(s1, s2):
-    """
-    Give an allele description of the change from {s1} to {s2}.
-
-    :arg unicode s1: Sequence 1.
-    :arg unicode s2: Sequence 2.
-
-    :returns list(RawVar): A list of RawVar objects, representing the allele.
-    """
-    description = Allele()
-
-    fs1, fs2 = make_fs_tables(1)
-    longest_fs_f = max(find_fs(s1, s2, fs1), find_fs(s1, s2, fs2))
-    longest_fs_r = max(find_fs(s2, s1, fs1), find_fs(s2, s1, fs2))
-
-    if longest_fs_f > longest_fs_r:
-        print s1[:longest_fs_f[1]], s1[longest_fs_f[1]:]
-        print s2[:len(s2) - longest_fs_f[0]], s2[len(s2) - longest_fs_f[0]:]
-        s1_part = s1[:longest_fs_f[1]]
-        s2_part = s2[:len(s2) - longest_fs_f[0]]
-        term = longest_fs_f[0]
-    else:
-        print s1[:len(s1) - longest_fs_r[0]], s1[len(s1) - longest_fs_r[0]:]
-        print s2[:longest_fs_r[1]], s2[longest_fs_r[1]:]
-        s1_part = s1[:len(s1) - longest_fs_r[0]]
-        s2_part = s2[:longest_fs_r[1]]
-        term = len(s2) - longest_fs_r[1]
-
-    s1_part = s1
-    s2_part = s2
-    for variant in extractor.extract(s1_part.encode('utf-8'), len(s1_part),
-            s2_part.encode('utf-8'), len(s2_part), 1):
-        description.append(var_to_rawvar(s1, s2, variant,
-            container=ProteinVar))
-
-    if description:
-        description[-1].term = term + 2
-
-    return description
diff --git a/mutalyzer/entrypoints/mutalyzer.py b/mutalyzer/entrypoints/mutalyzer.py
index 08864546..18cb0293 100644
--- a/mutalyzer/entrypoints/mutalyzer.py
+++ b/mutalyzer/entrypoints/mutalyzer.py
@@ -10,8 +10,9 @@ from __future__ import unicode_literals
 import argparse
 import json
 
+from extractor import describe
+
 from . import _cli_string
-from .. import describe
 from .. import output
 from .. import variantchecker
 
diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py
index c296ee6d..16337a2d 100644
--- a/mutalyzer/services/rpc.py
+++ b/mutalyzer/services/rpc.py
@@ -23,6 +23,8 @@ from operator import attrgetter
 from sqlalchemy.orm.exc import NoResultFound
 from sqlalchemy.sql import func
 
+from extractor import describe
+
 import mutalyzer
 from mutalyzer.config import settings
 from mutalyzer.db import session
@@ -40,7 +42,6 @@ from mutalyzer import Retriever
 from mutalyzer import GenRecord
 from mutalyzer import Scheduler
 from mutalyzer.models import *
-from mutalyzer import describe
 
 
 def create_rpc_fault(output):
diff --git a/mutalyzer/util.py b/mutalyzer/util.py
index 63f916d3..9e0bf8a7 100644
--- a/mutalyzer/util.py
+++ b/mutalyzer/util.py
@@ -31,66 +31,7 @@ import time
 
 from Bio.SeqUtils import seq3
 
-
-# Taken from BioPython.
-AMBIGUOUS_DNA_COMPLEMENT = {
-    'A': 'T',
-    'C': 'G',
-    'G': 'C',
-    'T': 'A',
-    'M': 'K',
-    'R': 'Y',
-    'W': 'W',
-    'S': 'S',
-    'Y': 'R',
-    'K': 'M',
-    'V': 'B',
-    'H': 'D',
-    'D': 'H',
-    'B': 'V',
-    'X': 'X',
-    'N': 'N'}
-AMBIGUOUS_RNA_COMPLEMENT = {
-    'A': 'U',
-    'C': 'G',
-    'G': 'C',
-    'U': 'A',
-    'M': 'K',
-    'R': 'Y',
-    'W': 'W',
-    'S': 'S',
-    'Y': 'R',
-    'K': 'M',
-    'V': 'B',
-    'H': 'D',
-    'D': 'H',
-    'B': 'V',
-    'X': 'X',
-    'N': 'N'}
-
-
-def _make_translation_table(complement_mapping):
-    before = complement_mapping.keys()
-    before += [b.lower() for b in before]
-    after = complement_mapping.values()
-    after += [b.lower() for b in after]
-    return {ord(k): v for k, v in zip(before, after)}
-
-
-_dna_complement_table = _make_translation_table(AMBIGUOUS_DNA_COMPLEMENT)
-_rna_complement_table = _make_translation_table(AMBIGUOUS_RNA_COMPLEMENT)
-
-
-def reverse_complement(sequence):
-    """
-    Reverse complement of a sequence represented as unicode string.
-    """
-    if 'U' in sequence or 'u' in sequence:
-        table = _rna_complement_table
-    else:
-        table = _dna_complement_table
-
-    return ''.join(reversed(sequence.translate(table)))
+from extractor.describe import reverse_complement, palinsnoop, roll
 
 
 def is_utf8_alias(encoding):
@@ -309,87 +250,6 @@ def roll_(s, start, end) :
 #roll
 
 
-def roll(s, first, last):
-    """
-    Determine the variability of a variant by looking at cyclic
-    permutations. Not all cyclic permutations are tested at each time, it
-    is sufficient to check ``aW'' if ``Wa'' matches (with ``a'' a letter,
-    ``W'' a word) when rolling to the left for example.
-
-        >>> roll('abbabbabbabb', 4, 6)
-        (3, 6)
-        >>> roll('abbabbabbabb', 5, 5)
-        (0, 1)
-        >>> roll('abcccccde', 4, 4)
-        (1, 3)
-
-    @arg s: A reference sequence.
-    @type s: any sequence type
-    @arg first: First position of the pattern in the reference sequence.
-    @type first: int
-    @arg last: Last position of the pattern in the reference sequence.
-    @type last: int
-
-    @return: tuple:
-        - left  ; Amount of positions that the pattern can be shifted to
-                  the left.
-        - right ; Amount of positions that the pattern can be shifted to
-                  the right.
-    @rtype: tuple(int, int)
-    """
-    pattern = s[first - 1:last]   # Extract the pattern
-    pattern_length = len(pattern)
-
-    # Keep rolling to the left as long as a cyclic permutation matches.
-    minimum = first - 2
-    j = pattern_length - 1
-    while minimum > -1 and s[minimum] == pattern[j % pattern_length]:
-        j -= 1
-        minimum -= 1
-
-    # Keep rolling to the right as long as a cyclic permutation matches.
-    maximum = last
-    j = 0
-    while maximum < len(s) and s[maximum] == pattern[j % pattern_length]:
-        j += 1
-        maximum += 1
-
-    return first - minimum - 2, maximum - last
-#roll
-
-
-def palinsnoop(s):
-    """
-    Check a sequence for a reverse-complement-palindromic prefix (and
-    suffix). If one is detected, return the length of this prefix. If the
-    string equals its reverse complement, return -1.
-
-        >>> palinsnoop('TACGCTA')
-        2
-        >>> palinsnoop('TACGTA')
-        -1
-        >>> palinsnoop('TACGCTT')
-        0
-
-    @arg s: A nucleotide sequence.
-    @type s: unicode
-
-    @return: The number of elements that are palindromic or -1 if the string
-             is a 'palindrome'.
-    @rtype: int
-    """
-    s_revcomp = reverse_complement(s)
-
-    for i in range(int(math.ceil(len(s) / 2.0))):
-        if s[i] != s_revcomp[i]:
-            # The first i elements are 'palindromic'.
-            return i
-
-    # Perfect 'palindrome'.
-    return -1
-#palinsnoop
-
-
 def longest_common_prefix(s1, s2):
     """
     Calculate the longest common prefix of two strings.
diff --git a/mutalyzer/variant.py b/mutalyzer/variant.py
deleted file mode 100644
index 2ea2d393..00000000
--- a/mutalyzer/variant.py
+++ /dev/null
@@ -1,270 +0,0 @@
-"""
-Models for the description extractor.
-"""
-
-from __future__ import unicode_literals
-
-from Bio.SeqUtils import seq3
-
-from extractor import extractor
-
-
-WEIGHTS = {
-    'subst': extractor.WEIGHT_SUBSTITUTION,
-    'del': extractor.WEIGHT_DELETION,
-    'ins': extractor.WEIGHT_INSERTION,
-    'dup': extractor.WEIGHT_INSERTION,
-    'inv': extractor.WEIGHT_INVERSION,
-    'delins': extractor.WEIGHT_DELETION_INSERTION
-}
-
-
-class HGVSList(object):
-    """
-    Container for a list of sequences or variants.
-    """
-    def __init__(self, items=[]):
-        self.items = list(items)
-
-
-    def __getitem__(self, index):
-        return self.items[index]
-
-
-    def __bool__(self):
-        return bool(len(self.items) > 0)
-
-
-    def __nonzero__(self): # Python 2.x compatibility.
-        return self.__bool__()
-
-
-    def __unicode__(self):
-        if len(self.items) > 1:
-            return '[{}]'.format(';'.join(map(unicode, self.items)))
-        return unicode(self.items[0])
-
-
-    def append(self, item):
-        self.items.append(item)
-
-
-    def weight(self):
-        weight = sum(map(lambda x: x.weight(), self.items))
-
-        if len(self.items) > 1:
-            return weight + (len(self.items) + 1) * extractor.WEIGHT_SEPARATOR
-        return weight
-
-
-class Allele(HGVSList):
-    pass
-
-
-class ISeqList(HGVSList):
-    pass
-
-
-class ISeq(object):
-    """
-    Container for an inserted sequence.
-    """
-    def __init__(self, sequence='', start=0, end=0, reverse=False,
-            weight_position=1):
-        """
-        Initialise the class with the appropriate values.
-
-        :arg unicode sequence: Literal inserted sequence.
-        :arg int start: Start position for a transposed sequence.
-        :arg int end: End position for a transposed sequence.
-        :arg bool reverse: Inverted transposed sequence.
-        """
-        self.sequence = sequence
-        self.start = start
-        self.end = end
-        self.reverse = reverse
-        self.weight_position = weight_position
-
-        self.type = 'trans'
-        if self.sequence:
-            self.type = 'ins'
-
-
-    def __unicode__(self):
-        if self.type == 'ins':
-            return self.sequence
-
-        if not (self.start or self.end):
-            return ''
-
-        inverted = 'inv' if self.reverse else ''
-        return '{}_{}{}'.format(self.start, self.end, inverted)
-
-
-    def __bool__(self):
-         return bool(self.sequence)
-
-
-    def __nonzero__(self): # Python 2.x compatibility.
-        return self.__bool__()
-
-
-    def weight(self):
-        if self.type == 'ins':
-            return len(self.sequence) * extractor.WEIGHT_BASE
-
-        inverse_weight = WEIGHTS['inv'] if self.reverse else 0
-        return (self.weight_position * 2 + extractor.WEIGHT_SEPARATOR +
-            inverse_weight)
-
-
-class DNAVar(object):
-    """
-    Container for a DNA variant.
-    """
-    def __init__(self, start=0, start_offset=0, end=0, end_offset=0,
-            sample_start=0, sample_start_offset=0, sample_end=0,
-            sample_end_offset=0, type='none', deleted=ISeqList([ISeq()]),
-            inserted=ISeqList([ISeq()]), shift=0, weight_position=1):
-        """
-        Initialise the class with the appropriate values.
-
-        :arg int start: Start position.
-        :arg int start_offset:
-        :arg int end: End position.
-        :arg int end_offset:
-        :arg int sample_start: Start position.
-        :arg int sample_start_offset:
-        :arg int sample_end: End position.
-        :arg int sample_end_offset:
-        :arg unicode type: Variant type.
-        :arg unicode deleted: Deleted part of the reference sequence.
-        :arg ISeqList inserted: Inserted part.
-        :arg int shift: Amount of freedom.
-        """
-        # TODO: Will this container be used for all variants, or only genomic?
-        #       start_offset and end_offset may be never used.
-        self.start = start
-        self.start_offset = start_offset
-        self.end = end
-        self.end_offset = end_offset
-        self.sample_start = sample_start
-        self.sample_start_offset = sample_start_offset
-        self.sample_end = sample_end
-        self.sample_end_offset = sample_end_offset
-        self.type = type
-        self.deleted = deleted
-        self.inserted = inserted
-        self.weight_position = weight_position
-        self.shift = shift
-
-
-    def __unicode__(self):
-        """
-        Give the HGVS description of the raw variant stored in this class.
-
-        :returns unicode: The HGVS description of the raw variant stored in
-            this class.
-        """
-        if self.type == 'unknown':
-            return '?'
-        if self.type == 'none':
-            return '='
-
-        description = '{}'.format(self.start)
-
-        if self.start != self.end:
-            description += '_{}'.format(self.end)
-
-        if self.type != 'subst':
-            description += '{}'.format(self.type)
-
-            if self.type in ('ins', 'delins'):
-                return description + '{}'.format(self.inserted)
-            return description
-
-        return description + '{}>{}'.format(self.deleted, self.inserted)
-
-
-    def weight(self):
-        if self.type == 'unknown':
-            return -1
-        if self.type == 'none':
-            return 0
-
-        weight = self.weight_position
-        if self.start != self.end:
-            weight += self.weight_position + extractor.WEIGHT_SEPARATOR
-
-        return weight + WEIGHTS[self.type] + self.inserted.weight()
-
-
-class ProteinVar(object):
-    """
-    Container for a protein variant.
-
-    """
-    #NOTE: This is experimental code. It is not used at the moment.
-    def __init__(self, start=0, end=0, sample_start=0, sample_end=0,
-            type='none', deleted=ISeqList([ISeq()]),
-            inserted=ISeqList([ISeq()]), shift=0, term=0):
-        """
-        Initialise the class with the appropriate values.
-
-        :arg int start: Start position.
-        :arg int end: End position.
-        :arg int sample_start: Start position.
-        :arg int sample_end: End position.
-        :arg unicode type: Variant type.
-        :arg unicode deleted: Deleted part of the reference sequence.
-        :arg ISeqList inserted: Inserted part.
-        :arg int shift: Amount of freedom.
-        :arg int term: Number of positions until stop codon.
-        """
-        self.start = start
-        self.end = end
-        self.sample_start = sample_start
-        self.sample_end = sample_end
-        self.type = type
-        self.deleted = deleted
-        self.inserted = inserted
-        self.shift = shift
-        self.term = term
-
-
-    def __unicode__(self):
-        """
-        Give the HGVS description of the raw variant stored in this class.
-
-        Note that this function relies on the absence of values to make the
-        correct description. The method used in the DNAVar is better.
-
-        :returns unicode: The HGVS description of the raw variant stored in
-            this class.
-        """
-        if self.type == 'unknown':
-            return '?'
-        if self.type == 'none':
-            return '='
-
-        description = ''
-        if not self.deleted:
-            if self.type == 'ext':
-                description += '*'
-            else:
-                description += '{}'.format(seq3(self.start_aa))
-        else:
-            description += '{}'.format(seq3(self.deleted))
-        description += '{}'.format(self.start)
-        if self.end:
-            description += '_{}{}'.format(seq3(self.end_aa), self.end)
-        if self.type not in ('subst', 'stop', 'ext', 'fs'): # fs is not a type
-            description += self.type
-        if self.inserted:
-            description += '{}'.format(seq3(self.inserted))
-
-        if self.type == 'stop':
-            return description + '*'
-        if self.term:
-            return description + 'fs*{}'.format(self.term)
-        return description
diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py
index 41db5354..f2ae4793 100644
--- a/mutalyzer/website/views.py
+++ b/mutalyzer/website/views.py
@@ -19,8 +19,10 @@ from lxml import etree
 from spyne.server.http import HttpBase
 from sqlalchemy.orm.exc import NoResultFound
 
+from extractor import describe
+
 import mutalyzer
-from mutalyzer import (announce, describe, File, Retriever, Scheduler, stats,
+from mutalyzer import (announce, File, Retriever, Scheduler, stats,
                        util, variantchecker)
 from mutalyzer.config import settings
 from mutalyzer.db.models import BATCH_JOB_TYPES
diff --git a/requirements.txt b/requirements.txt
index a3cb8eb2..39429208 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 -e git+https://github.com/LUMC/magic-python.git#egg=Magic_file_extensions
--e git+https://github.com/LUMC/extractor.git@3148fad6b081c36e9675494f4896ba60f14707a9#egg=extractor
+-e git+https://github.com/LUMC/extractor.git@f270ef101e909895d58e47420935f2bbfdb28a3b#egg=extractor
 Flask==0.10.1
 Jinja2==2.7.3
 MySQL-python==1.2.5
diff --git a/tests/test_describe.py b/tests/test_describe.py
deleted file mode 100644
index 484d824d..00000000
--- a/tests/test_describe.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""
-Tests for the mutalyzer.describe module.
-"""
-
-
-from __future__ import unicode_literals
-
-#import logging; logging.basicConfig()
-import os
-
-import mutalyzer
-from mutalyzer import describe
-
-from utils import MutalyzerTest
-
-
-class TestDescribe(MutalyzerTest):
-    """
-    Test the mutalyzer.describe module.
-    """
-    def _single_variant(self, sample, expected):
-        """
-        General single variant test.
-        """
-        reference = 'ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT'
-
-        result = describe.describe_dna(reference, sample)
-        assert result[0].type == expected[0]
-        assert result[0].start == expected[1]
-        assert result[0].end == expected[2]
-        assert result[0].sample_start == expected[3]
-        assert result[0].sample_end == expected[4]
-        assert result[0].deleted[0].sequence == expected[5]
-        assert result[0].inserted[0].sequence == expected[6]
-        assert unicode(result[0]) == expected[7]
-
-
-    def test1(self):
-        """
-        Test 1.
-        """
-        result = describe.describe_dna(
-            'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA',
-            'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA')
-        assert unicode(result) == '[5_6insTT;17del;26A>C;35dup]'
-
-
-    def test2(self):
-        """
-        Test 2.
-        """
-        result = describe.describe_dna(
-            'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTACTGTG',
-            'TAAGCACCAGGAGTCCATGAAGAAGCTGGATCCTCCCATGGAATCCCCTACTCTACTGTG')
-        assert unicode(result) == '[26A>C;30C>A;35G>C]'
-
-
-    def test3(self):
-        """
-        Test 3.
-        """
-        result = describe.describe_dna(
-            'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTA',
-            'TAAGCACCAGGAGTCCATGAAGAAGCCATGTCCTGCCATGGAATCCCCTACTCTA')
-        assert unicode(result) == '[26_29inv;30C>G]'
-
-
-    def test4(self):
-        """
-        Test 4.
-        """
-        result = describe.describe_dna(
-            'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTA',
-            'TAAGCACCAGGAGTCCATGAAGAAGCCATGTCCTGCCATGAATCCCCTACTCTA')
-        assert unicode(result) == '[26_29inv;30C>G;41del]'
-
-
-    def test5(self):
-        """
-        Test 5.
-        """
-        self._single_variant('ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('none', 0, 0, 0, 0, '', '', '='))
-
-
-    def test6(self):
-        """
-        Test 6.
-        """
-        self._single_variant('ACGTCGGTTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('subst', 7, 7, 7, 7, 'A', 'G', '7A>G'))
-
-
-    def test7(self):
-        """
-        Test 7.
-        """
-        self._single_variant('ACGTCGTTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('del', 7, 7, 6, 7, 'A', '', '7del'))
-
-
-    def test8(self):
-        """
-        Test 8.
-        """
-        self._single_variant('ACGTCGTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('del', 7, 8, 6, 7, 'AT', '', '7_8del'))
-
-
-    def test9(self):
-        """
-        Test 9.
-        """
-        self._single_variant('ACGTCGCATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('ins', 6, 7, 7, 7, '', 'C', '6_7insC'))
-
-
-    def test10(self):
-        """
-        Test 10.
-        """
-        self._single_variant('ACGTCGCCATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('ins', 6, 7, 7, 8, '', 'CC', '6_7insCC'))
-
-
-    def test11(self):
-        """
-        Test 11.
-        """
-        self._single_variant('ACGTCGAATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('dup', 7, 7, 8, 8, '', 'A', '7dup'))
-
-
-    def test12(self):
-        """
-        Test 12.
-        """
-        self._single_variant('ACGTCGAGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('dup', 6, 7, 8, 9, '', 'GA', '6_7dup'))
-
-
-    def test13(self):
-        """
-        Test 13.
-        """
-        self._single_variant('ACGTCGACGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('dup', 5, 7, 8, 10, '', 'CGA', '5_7dup'))
-
-
-    def test14(self):
-        """
-        Test 14.
-        """
-        self._single_variant('ACGTCGCGAATCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('inv', 7, 11, 7, 11, 'ATTCG', 'CGAAT', '7_11inv'))
-
-
-    def test15(self):
-        """
-        Test 15.
-        """
-        self._single_variant('ACGTCGCCTTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('delins', 7, 7, 7, 8, 'A', 'CC', '7delinsCC'))
-
-
-    def test16(self):
-        """
-        Test 16.
-        """
-        self._single_variant('ACGTCGATTCGCTAGCTTCGTTTTGATAGATAGAGATATAGAGAT',
-            ('delins', 21, 23, 21, 24, 'GGG', 'TTTT', '21_23delinsTTTT'))
-
-
-    def test17(self):
-        """
-        Test 17.
-        """
-        self._single_variant('ACGTCTCTTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('inv', 6, 7, 6, 7, 'GA', 'TC', '6_7inv'))
-
-
-    def test18(self):
-        """
-        Test 18.
-        """
-        self._single_variant('ACGTCGTCTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT',
-            ('delins', 7, 8, 7, 8, 'AT', 'TC', '7_8delinsTC'))
-- 
GitLab