"""
Tests for the mutalyzer.parsers.genbank module.
"""


from __future__ import unicode_literals

import os

import pytest

from mutalyzer.parsers.genbank import GBparser

from fixtures import with_references


@pytest.fixture
def parser():
    return GBparser()


@pytest.mark.parametrize('products,expected', [
    (['a b c d e', 'a b C D e', 'a b c d e'], (2, 1)),
    (['a b c d e', 'a b C d e', 'a B c d e'], (1, 2)),
    (['a c d a', 'a b a', 'a a', 'a'], (1, 1)),
    ([''], (-1, -1)),
    (['', ''], (-1, -1)),
    (['a', 'a'], (-1, -1)),
    (['a', 'b'], (0, 0)),
    (['a b c', 'a b c'], (-1, -1)),
    (['a b c d a b', 'a b'], (2, 2))
])
def test_product_lists_mismatch(parser, products, expected):
    """
    Test finding mismatches in some product lists.
    """
    assert parser._find_mismatch(products) == expected


@with_references('A1BG')
def test_only_complete_genes_included(settings, references, parser):
    """
    Incomplete genes from the reference file should be ignored.
    """
    # contains A1BG (complete) and A1BG-AS1, ZNF497, LOC100419840
    # (incomplete).
    accession = references[0].accession
    filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession)
    record = parser.create_record(filename)
    assert [g.name for g in record.geneList] == ['A1BG']

@with_references('ADAC')
def test_no_version(settings, references, parser):
    """
    Genbank file without 'version' field, so BioPython record.id is the
    accession number without version. Our parser used to crash on that.

    This genbank file was contributed by Gerard Schaafsma (original
    source unknown).
    """
    accession = references[0].accession
    genbank_filename = os.path.join(settings.CACHE_DIR,
                                    '%s.gb.bz2' % accession)
    parser.create_record(genbank_filename)