From d94f20cf326b4c9116e8ea672298621e6f96d36b Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Sun, 11 Oct 2015 00:55:51 +0200 Subject: [PATCH] Refactor unit tests using common py.test layout and fixtures --- .gitignore | 1 + .travis.yml | 27 +- doc/testing.rst | 15 +- mutalyzer/db/__init__.py | 14 + requirements.txt | 3 +- tests/conftest.py | 31 + tests/data/references.yml | 268 +++ tests/fixtures.py | 1191 ++++++------- tests/test_crossmap.py | 722 ++++---- tests/test_db_queries.py | 98 +- tests/test_grammar.py | 288 ++-- tests/test_mapping.py | 644 ++++--- tests/test_migrations.py | 9 +- tests/test_mutator.py | 2568 ++++++++++++++-------------- tests/test_parsers_genbank.py | 76 +- tests/test_scheduler.py | 700 ++++---- tests/test_services_json.py | 591 +++---- tests/test_services_soap.py | 1583 ++++++++--------- tests/test_variantchecker.py | 3026 +++++++++++++++++---------------- tests/test_website.py | 1889 ++++++++++---------- tests/utils.py | 86 - 21 files changed, 7083 insertions(+), 6747 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/data/references.yml delete mode 100644 tests/utils.py diff --git a/.gitignore b/.gitignore index 3c4834e9..b82c3e99 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc +.cache /build /dist /mutalyzer.egg-info diff --git a/.travis.yml b/.travis.yml index 698cb61c..4d927751 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,28 +1,17 @@ # Validate this file using http://lint.travis-ci.org/ language: python +python: "2.7" addons: postgresql: "9.4" -services: - - redis-server -python: - - "2.7" -env: - global: - - MUTALYZER_TEST_REDIS_URI=redis://localhost - matrix: - - MUTALYZER_TEST_DATABASE_URI=sqlite:// - - MUTALYZER_TEST_DATABASE_URI=postgres://postgres@127.0.0.1/mutalyzer_test - - MUTALYZER_TEST_DATABASE_URI=mysql://travis@127.0.0.1/mutalyzer_test?charset=utf8 +services: redis-server before_install: - sudo apt-get update -qq - sudo apt-get install -y swig - pip install -r requirements.txt - - bash -c "if [[ '$MUTALYZER_TEST_DATABASE_URI' =~ 'postgres' ]]; then pip install psycopg2; fi" -install: - - pip install . + - pip install psycopg2 +install: pip install . before_script: - - bash -c "if [[ '$MUTALYZER_TEST_DATABASE_URI' =~ 'postgres' ]]; then psql -c 'DROP DATABASE IF EXISTS mutalyzer_test;' -U postgres; fi" - - bash -c "if [[ '$MUTALYZER_TEST_DATABASE_URI' =~ 'postgres' ]]; then psql -c 'CREATE DATABASE mutalyzer_test;' -U postgres; fi" - - bash -c "if [[ '$MUTALYZER_TEST_DATABASE_URI' =~ 'mysql' ]]; then mysql -e 'CREATE DATABASE IF NOT EXISTS mutalyzer_test;'; fi" -script: - - py.test + - psql -c 'DROP DATABASE IF EXISTS mutalyzer_test;' -U postgres + - psql -c 'CREATE DATABASE mutalyzer_test;' -U postgres + - mysql -e 'CREATE DATABASE IF NOT EXISTS mutalyzer_test;' +script: py.test --redis-uri redis://localhost --database-uri sqlite:// --database-uri postgres://postgres@127.0.0.1/mutalyzer_test --database-uri mysql://travis@127.0.0.1/mutalyzer_test?charset=utf8 diff --git a/doc/testing.rst b/doc/testing.rst index e7bf32f0..75565645 100644 --- a/doc/testing.rst +++ b/doc/testing.rst @@ -16,23 +16,26 @@ the Mutalyzer source directory. tests. By default, the tests use an in-memory SQLite database. This can be customized -by setting the `MUTALYZER_TEST_DATABASE_URI` to a valid `SQLAlchemy connection -URI +with the ``--database-uri` command line argument and a valid `SQLAlchemy +connection URI <http://docs.sqlalchemy.org/en/rel_1_0/core/engines.html#database-urls>`_ (obviously, the contents of this database will be lost). For example, to use an SQLite database on the filesystem:: - $ MUTALYZER_TEST_DATABASE_URI=sqlite:////tmp/mutalyzer.sql py.test + $ py.test --database-uri sqlite:////tmp/mutalyzer.sql Or, using `pg_virtualenv <https://alioth.debian.org/scm/loggerhead/pkg-postgresql/postgresql-common/trunk/view/head:/pg_virtualenv>`_ (included with the Debian PostgreSQL packages), to run the tests with PostgreSQL:: - $ pg_virtualenv bash -c 'MUTALYZER_TEST_DATABASE_URI=postgres://${PGUSER}:${PGPASSWORD}@${PGHOST}:${PGPORT}/${PGDATABASE} py.test' + $ pg_virtualenv bash -c 'py.test --database-uri postgres://${PGUSER}:${PGPASSWORD}@${PGHOST}:${PGPORT}/${PGDATABASE}' -Similarly, the `MUTALYZER_TEST_REDIS_URI` specifies a Redis server to use for -testing. If unspecified, a mock Redis server is used. +Multiple ``--database-uri`` arguments are allowed. Tests using the database +will be run once for every database specified. + +Similarly, ``--redis-uri`` (only one allowed) specifies a Redis server to use +for testing. If unspecified, a mock Redis server is used. Tests are `run automatically on Travis CI <https://travis-ci.org/mutalyzer/mutalyzer>`_ with SQLite, PostgreSQL, and diff --git a/mutalyzer/db/__init__.py b/mutalyzer/db/__init__.py index 1655baa6..8e66ddbe 100644 --- a/mutalyzer/db/__init__.py +++ b/mutalyzer/db/__init__.py @@ -30,6 +30,20 @@ def create_engine(): """ Create an SQLAlchemy connection engine from the current configuration. """ + if not settings.DATABASE_URI: + # Just return silently when no database is configured (this function + # may still be called via the configuration settings hook). Of course + # actually using the database will fail. + return + + # TODO: Remove when refactoring tests is complete. We should check that + # this is never reached with 'sqlite://' from the default_settings or with + # a value from MUTALYZER_SETTINGS during tests. The uri(s) configured in + # the tests should be used instead. + # This also makes sure that tests can never accidentally use the database + # without having it declared as a needed fixture. + print '!!!!!!!!!!!!! Creating session for', settings.DATABASE_URI + url = make_url(settings.DATABASE_URI) options = {} diff --git a/requirements.txt b/requirements.txt index da0098a2..32d7ba52 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ description-extractor==2.2.1 Flask==0.10.1 Jinja2==2.7.3 MySQL-python==1.2.5 +PyYAML==3.11 SQLAlchemy==0.9.8 Sphinx==1.2.3 Werkzeug==0.9.6 @@ -14,7 +15,7 @@ lxml==3.4.0 mock==1.0.1 mockredispy==2.9.0.9 pyparsing==2.0.3 -pytest==2.6.3 +pytest==2.8.2 pytz==2014.7 redis==2.10.3 requests==2.4.3 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..79c9344c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,31 @@ +""" +Test configuration. +""" + + +from __future__ import unicode_literals + +from fixtures import * # noqa + + +DEFAULT_DATABASE_URIS = ['sqlite://'] +DEFAULT_REDIS_URI = None + + +def pytest_addoption(parser): + parser.addoption( + '--database-uri', metavar='URI', dest='database_uris', default=[], + action='append', + help='Database connection, multiple allowed (default: in-memory ' + 'SQLite database)') + parser.addoption( + '--redis-uri', metavar='URI', dest='redis_uri', + default=DEFAULT_REDIS_URI, + help='Redis connection (default: mock Redis server)') + + +def pytest_generate_tests(metafunc): + if 'database_uri' in metafunc.fixturenames: + metafunc.parametrize( + 'database_uri', + metafunc.config.option.database_uris or DEFAULT_DATABASE_URIS) diff --git a/tests/data/references.yml b/tests/data/references.yml new file mode 100644 index 00000000..17afd29a --- /dev/null +++ b/tests/data/references.yml @@ -0,0 +1,268 @@ +A1BG: + accession: UD_139015218717 + checksum: e179de8b248806815394c4f7496ba872 + filename: UD_139015218717.gb.bz2 + links: + - - NM_001207009 + - NP_001193938 + - - NM_198458 + - NP_940860 + - - XM_005258578 + - XP_005258635 + - - XM_005258577 + - XP_005258634 + - - NR_015380 + - null + - - NM_130786 + - NP_570602 + - - XM_005258393 + - XP_005258450 +AA010203.1: + checksum: 57cee03becb77ce68a225b9c844afb24 + filename: AA010203.1.gb.bz2 + geninfo_id: '1471230' +AB026906.1: + checksum: 29b003d5a71af74dc61a92d2ef5cd5d9 + filename: AB026906.1.gb.bz2 + geninfo_id: '5295993' +AF230870.1: + checksum: 9fefa34f40d94910edb5de34a3f98910 + filename: AF230870.1.gb.bz2 + geninfo_id: '7739657' +AL449423.14: + checksum: 00a014242818a3b003b4c077af9e10e0 + filename: AL449423.14.gb.bz2 + geninfo_id: '16944057' +COL1A1: + accession: UD_139022298843 + checksum: 815517e36fb380b52842ace6a6e78637 + filename: UD_139022298843.gb.bz2 + links: + - - XM_005257059 + - XP_005257116 + - - XM_005257058 + - XP_005257115 + - - NM_000088 + - NP_000079 +DMD: + accession: UD_139262478721 + checksum: d41d8cd98f00b204e9800998ecf8427e + filename: UD_139262478721.gb.bz2 + links: + - - XM_006724469 + - XP_006724532 + - - XM_006724472 + - XP_006724535 + - - XM_006724473 + - XP_006724536 + - - XM_006724474 + - XP_006724537 + - - XM_006724475 + - XP_006724538 + - - XM_006724471 + - XP_006724534 + - - XM_006724470 + - XP_006724533 + - - XR_430491 + - null + - - XM_006724468 + - XP_006724531 + - - XM_006724476 + - XP_006724539 + - - NM_000109 + - NP_000100 + - - NM_004006 + - NP_003997 + - - NM_004009 + - NP_004000 + - - NM_004010 + - NP_004001 + - - NM_004011 + - NP_004002 + - - NM_004012 + - NP_004003 + - - NM_004023 + - NP_004014 + - - NM_004020 + - NP_004011 + - - NM_004022 + - NP_004013 + - - NM_004021 + - NP_004012 + - - NM_004013 + - NP_004004 + - - NM_004014 + - NP_004005 + - - NM_004018 + - NP_004009 + - - NM_004017 + - NP_004008 + - - NM_004016 + - NP_004007 + - - NM_004015 + - NP_004006 + - - NM_004019 + - NP_004010 +DPYD: + accession: UD_139015208095 + checksum: b2b9d402a6e43f80ce1e9bbb72a3c0c6 + filename: UD_139015208095.gb.bz2 + links: + - - NR_046590 + - null + - - XM_005270562 + - XP_005270619 + - - NM_000110 + - NP_000101 + - - XM_005270561 + - XP_005270618 + - - XM_005270563 + - XP_005270620 + - - XM_005270564 + - XP_005270621 + - - NM_001160301 + - NP_001153773 +L41870.1: + checksum: 91b1e539a053f731f95d230a06710897 + filename: L41870.1.gb.bz2 + geninfo_id: '793994' +LRG_1: + checksum: 5b8f5a39fcd9e3005688eddffd482746 + filename: LRG_1.xml.bz2 +MARK1: + accession: UD_139015213982 + checksum: 0d63a8fe5beddeb793940f6ae194b985 + filename: UD_139015213982.gb.bz2 + links: + - - NM_018650 + - NP_061120 + - - XM_005273133 + - null + - - XM_005273134 + - XP_005273191 + - - XM_005273135 + - null + - - XM_005273136 + - null +NG_008939.1: + checksum: 114a03e16ad2f63531d796c2fb0d7039 + filename: NG_008939.1.gb.bz2 + geninfo_id: '211938431' + links: + - - NM_000532 + - NP_000523 +NG_009105.1: + checksum: f2579e6c4a8ead4566e485aad493ef7e + filename: NG_009105.1.gb.bz2 + geninfo_id: '216548283' + links: + - - NM_020061 + - NP_064445 +NG_012337.1: + checksum: ad712f4f225398d2b11b4f08110c70e6 + filename: NG_012337.1.gb.bz2 + geninfo_id: '254039638' + links: + - - NM_018195 + - NP_060665 + - - NM_001082969 + - NP_001076438 + - - NM_001082970 + - NP_001076439 + - - NM_003002 + - NP_002993 + - - NM_012459 + - NP_036591 +NG_012772.1: + checksum: 163881f00c9c26516d52a4ddb34f941f + filename: NG_012772.1.gb.bz2 + geninfo_id: '256574794' + links: + - - NM_052818 + - NP_438169 + - - NM_001079691 + - NP_001073159 + - - NM_000059 + - NP_000050 + - - NM_001136571 + - NP_001130043 +NM_000059.3: + checksum: f93216b3a596adab279ebd7903258548 + filename: NM_000059.3.gb.bz2 + geninfo_id: '119395733' +NM_000088.3: + checksum: 5d1f23e3c1799bdb5586c6786b5d5744 + filename: NM_000088.3.gb.bz2 + geninfo_id: '110349771' +NM_000132.3: + checksum: 94569bee76d7c8b1168e17df4fe1dcb4 + filename: NM_000132.3.gb.bz2 + geninfo_id: '192448441' +NM_000143.3: + checksum: c91799f40fdc0466bf7702af14cf070a + filename: NM_000143.3.gb.bz2 + geninfo_id: '299758401' +NM_000193.2: + checksum: 86d03e1cf38c1387d90116539ea0678f + filename: NM_000193.2.gb.bz2 + geninfo_id: '21071042' +NM_001199.3: + checksum: e750b6dcead66b8bb953ce445bcd3093 + filename: NM_001199.3.gb.bz2 +NM_002001.2: + checksum: 7fd5aa4fe864fd5193f224fca8cea70d + filename: NM_002001.2.gb.bz2 + geninfo_id: '31317229' +NM_003002.2: + checksum: 990aa672364937335365609617df3050 + filename: NM_003002.2.gb.bz2 + geninfo_id: '222352156' +NM_004006.1: + checksum: be8fea2905e146bfe096e25fbfda2eef + filename: NM_004006.1.gb.bz2 + geninfo_id: '5032282' +NM_004006.2: + checksum: ee2090536af19a13ac1d6faa46d0b12e + filename: NM_004006.2.gb.bz2 + geninfo_id: '238018044' +NM_024426.4: + checksum: 830a3beb9b7af3c6ba3e8a15b1bd0f54 + filename: NM_024426.4.gb.bz2 + geninfo_id: '309951095' +NM_203473.1: + checksum: ec8fbdeda11ef8ec953e4ed39e9a84e5 + filename: NM_203473.1 + geninfo_id: '45439330' +NP_064445.1: + checksum: 33ea9315882b4a9d8c33018a201be2fa + filename: NP_064445.1.gb.bz2 + geninfo_id: '9910526' +chr9_reverse: + accession: UD_139015349377 + checksum: d21f92d09116c4831ce8d3ef832aa281 + filename: UD_139015349377.gb.bz2 + links: + - - NM_001195250 + - NP_001182179 + - - NR_036576 + - null + - - NR_036577 + - null + - - NM_001195252 + - NP_001182181 + - - NM_001195248 + - NP_001182177 + - - NM_175069 + - NP_778239 + - - NM_175073 + - NP_778243 + - - NM_001195251 + - NP_001182180 + - - NM_001195254 + - NP_001182183 + - - NR_036578 + - null + - - NR_036579 + - null + - - NM_001195249 + - NP_001182178 diff --git a/tests/fixtures.py b/tests/fixtures.py index c71f315f..25d29812 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,9 +1,5 @@ """ -Fixtures for unit tests. - -Each fixture is defined by a function which when called sets up the fixture. -The order of calling can be important (e.g., fixtures using the database such -as :func:`hg19` must be called after the :func:`database` fixture). +Test fixtures. """ @@ -12,666 +8,555 @@ from __future__ import unicode_literals import os import shutil -from mutalyzer.config import settings -from mutalyzer.db import Base, session +import pytest +import yaml + +from mutalyzer.config import settings as _settings +from mutalyzer.output import Output +from mutalyzer.redisclient import client as redis from mutalyzer.db.models import (Assembly, Chromosome, Reference, TranscriptMapping, TranscriptProteinLink) +from mutalyzer import db as _db -#: Reference definitions for use with the :func:`cache` fixture. -REFERENCES = { - 'AB026906.1': {'filename': 'AB026906.1.gb.bz2', - 'checksum': '29b003d5a71af74dc61a92d2ef5cd5d9', - 'geninfo_id': '5295993'}, - 'AL449423.14': {'filename': 'AL449423.14.gb.bz2', - 'checksum': '00a014242818a3b003b4c077af9e10e0', - 'geninfo_id': '16944057'}, - 'NM_000059.3': {'filename': 'NM_000059.3.gb.bz2', - 'checksum': 'f93216b3a596adab279ebd7903258548', - 'geninfo_id': '119395733'}, - 'NM_003002.2': {'filename': 'NM_003002.2.gb.bz2', - 'checksum': '990aa672364937335365609617df3050', - 'geninfo_id': '222352156'}, - 'NG_012772.1': {'filename': 'NG_012772.1.gb.bz2', - 'checksum': '163881f00c9c26516d52a4ddb34f941f', - 'geninfo_id': '256574794', - 'links': [('NM_052818', 'NP_438169'), - ('NM_001079691', 'NP_001073159'), - ('NM_000059', 'NP_000050'), - ('NM_001136571', 'NP_001130043')]}, - 'AA010203.1': {'filename': 'AA010203.1.gb.bz2', - 'checksum': '57cee03becb77ce68a225b9c844afb24', - 'geninfo_id': '1471230'}, - 'NM_000088.3': {'filename': 'NM_000088.3.gb.bz2', - 'checksum': '5d1f23e3c1799bdb5586c6786b5d5744', - 'geninfo_id': '110349771'}, - 'NM_000143.3': {'filename': 'NM_000143.3.gb.bz2', - 'checksum': 'c91799f40fdc0466bf7702af14cf070a', - 'geninfo_id': '299758401'}, - 'NM_002001.2': {'filename': 'NM_002001.2.gb.bz2', - 'checksum': '7fd5aa4fe864fd5193f224fca8cea70d', - 'geninfo_id': '31317229'}, - 'NM_001199.3': {'filename': 'NM_001199.3.gb.bz2', - 'checksum': 'e750b6dcead66b8bb953ce445bcd3093'}, - 'NG_008939.1': {'filename': 'NG_008939.1.gb.bz2', - 'checksum': '114a03e16ad2f63531d796c2fb0d7039', - 'geninfo_id': '211938431', - 'links': [('NM_000532', 'NP_000523')]}, - 'NM_000193.2': {'filename': 'NM_000193.2.gb.bz2', - 'checksum': '86d03e1cf38c1387d90116539ea0678f', - 'geninfo_id': '21071042'}, - 'NM_024426.4': {'filename': 'NM_024426.4.gb.bz2', - 'checksum': '830a3beb9b7af3c6ba3e8a15b1bd0f54', - 'geninfo_id': '309951095'}, - 'NP_064445.1': {'filename': 'NP_064445.1.gb.bz2', - 'checksum': '33ea9315882b4a9d8c33018a201be2fa', - 'geninfo_id': '9910526'}, - 'L41870.1': {'filename': 'L41870.1.gb.bz2', - 'checksum': '91b1e539a053f731f95d230a06710897', - 'geninfo_id': '793994'}, - 'NG_009105.1': {'filename': 'NG_009105.1.gb.bz2', - 'checksum': 'f2579e6c4a8ead4566e485aad493ef7e', - 'geninfo_id': '216548283', - 'links': [('NM_020061', 'NP_064445')]}, - 'AF230870.1': {'filename': 'AF230870.1.gb.bz2', - 'checksum': '9fefa34f40d94910edb5de34a3f98910', - 'geninfo_id': '7739657'}, - 'NG_012337.1': {'filename': 'NG_012337.1.gb.bz2', - 'checksum': 'ad712f4f225398d2b11b4f08110c70e6', - 'geninfo_id': '254039638', - 'links': [('NM_018195', 'NP_060665'), - ('NM_001082969', 'NP_001076438'), - ('NM_001082970', 'NP_001076439'), - ('NM_003002', 'NP_002993'), - ('NM_012459', 'NP_036591')]}, - 'NM_203473.1': {'filename': 'NM_203473.1', - 'checksum': 'ec8fbdeda11ef8ec953e4ed39e9a84e5', - 'geninfo_id': '45439330'}, - 'NM_000132.3': {'filename': 'NM_000132.3.gb.bz2', - 'checksum': '94569bee76d7c8b1168e17df4fe1dcb4', - 'geninfo_id': '192448441'}, - 'NM_004006.1': {'filename': 'NM_004006.1.gb.bz2', - 'checksum': 'be8fea2905e146bfe096e25fbfda2eef', - 'geninfo_id': '5032282'}, - 'NM_004006.2': {'filename': 'NM_004006.2.gb.bz2', - 'checksum': 'ee2090536af19a13ac1d6faa46d0b12e', - 'geninfo_id': '238018044'}, - 'LRG_1': {'filename': 'LRG_1.xml.bz2', - 'checksum': '5b8f5a39fcd9e3005688eddffd482746'}, - 'DMD': {'accession': 'UD_139262478721', - 'filename': 'UD_139262478721.gb.bz2', - 'checksum': 'd41d8cd98f00b204e9800998ecf8427e', - 'links': [('XM_006724469', 'XP_006724532'), - ('XM_006724472', 'XP_006724535'), - ('XM_006724473', 'XP_006724536'), - ('XM_006724474', 'XP_006724537'), - ('XM_006724475', 'XP_006724538'), - ('XM_006724471', 'XP_006724534'), - ('XM_006724470', 'XP_006724533'), - ('XR_430491', None), - ('XM_006724468', 'XP_006724531'), - ('XM_006724476', 'XP_006724539'), - ('NM_000109', 'NP_000100'), - ('NM_004006', 'NP_003997'), - ('NM_004009', 'NP_004000'), - ('NM_004010', 'NP_004001'), - ('NM_004011', 'NP_004002'), - ('NM_004012', 'NP_004003'), - ('NM_004023', 'NP_004014'), - ('NM_004020', 'NP_004011'), - ('NM_004022', 'NP_004013'), - ('NM_004021', 'NP_004012'), - ('NM_004013', 'NP_004004'), - ('NM_004014', 'NP_004005'), - ('NM_004018', 'NP_004009'), - ('NM_004017', 'NP_004008'), - ('NM_004016', 'NP_004007'), - ('NM_004015', 'NP_004006'), - ('NM_004019', 'NP_004010')]}, - 'DPYD': {'accession': 'UD_139015208095', - 'filename': 'UD_139015208095.gb.bz2', - 'checksum': 'b2b9d402a6e43f80ce1e9bbb72a3c0c6', - 'links': [('NR_046590', None), - ('XM_005270562', 'XP_005270619'), - ('NM_000110', 'NP_000101'), - ('XM_005270561', 'XP_005270618'), - ('XM_005270563', 'XP_005270620'), - ('XM_005270564', 'XP_005270621'), - ('NM_001160301', 'NP_001153773')]}, - 'MARK1': {'accession': 'UD_139015213982', - 'filename': 'UD_139015213982.gb.bz2', - 'checksum': '0d63a8fe5beddeb793940f6ae194b985', - 'links': [('NM_018650', 'NP_061120'), - ('XM_005273133', None), - ('XM_005273134', 'XP_005273191'), - ('XM_005273135', None), - ('XM_005273136', None)]}, - 'A1BG': {'accession': 'UD_139015218717', - 'filename': 'UD_139015218717.gb.bz2', - 'checksum': 'e179de8b248806815394c4f7496ba872', - 'links': [('NM_001207009', 'NP_001193938'), - ('NM_198458', 'NP_940860'), - ('XM_005258578', 'XP_005258635'), - ('XM_005258577', 'XP_005258634'), - ('NR_015380', None), - ('NM_130786', 'NP_570602'), - ('XM_005258393', 'XP_005258450')]}, - 'chr9_reverse': {'accession': 'UD_139015349377', - 'filename': 'UD_139015349377.gb.bz2', - 'checksum': 'd21f92d09116c4831ce8d3ef832aa281', - 'links': [('NM_001195250', 'NP_001182179'), - ('NR_036576', None), - ('NR_036577', None), - ('NM_001195252', 'NP_001182181'), - ('NM_001195248', 'NP_001182177'), - ('NM_175069', 'NP_778239'), - ('NM_175073', 'NP_778243'), - ('NM_001195251', 'NP_001182180'), - ('NM_001195254', 'NP_001182183'), - ('NR_036578', None), - ('NR_036579', None), - ('NM_001195249', 'NP_001182178')]}, - 'COL1A1': {'accession': 'UD_139022298843', - 'filename': 'UD_139022298843.gb.bz2', - 'checksum': '815517e36fb380b52842ace6a6e78637', - 'links': [('XM_005257059', 'XP_005257116'), - ('XM_005257058', 'XP_005257115'), - ('NM_000088', 'NP_000079')]}} - - -def database(): - """ - Fixture for database table definitions. - """ - Base.metadata.create_all(session.get_bind()) +@pytest.fixture(autouse=True) +def settings(request, tmpdir): + # This fixture should always be loaded, otherwise we have the risk that + # `MUTALYZER_SETTINGS` will be used. + cache_dir = unicode(tmpdir.mkdir('cache')) + log_file = unicode(tmpdir.join('log').ensure()) + + redis_uri = request.config.option.redis_uri + + _settings.configure({ + 'DEBUG': False, + 'TESTING': True, + 'CACHE_DIR': cache_dir, + 'LOG_FILE': log_file, + 'DATABASE_URI': None, + 'REDIS_URI': redis_uri + }) + + if redis_uri is not None: + redis.flushdb() + + return _settings + + +@pytest.fixture +def output(settings): + return Output('test') + + +@pytest.fixture +def db(request, settings, database_uri): + settings.configure({ + 'DATABASE_URI': database_uri + }) + + # Mutalyzer create tables automatically if we're using an SQLite + # in-memory database. + if database_uri != 'sqlite://': + _db.Base.metadata.drop_all(_db.session.get_bind()) + _db.Base.metadata.create_all(_db.session.get_bind()) + + request.addfinalizer(_db.session.remove) + + return _db + + +@pytest.fixture(scope='session') +def available_references(): + filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'references.yml') + with open(filename) as f: + return yaml.safe_load(f) + +@pytest.fixture +def references(request, settings, db, available_references): + try: + keys = request.param + except AttributeError: + return [] -def hg19(): + references = [] + + for key in keys: + entry = available_references[key] + try: + accession = entry['accession'] + except KeyError: + accession = key + geninfo_id = entry.get('geninfo_id') + + # TODO: use pytest basepath or something? + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + entry['filename']) + shutil.copy(path, settings.CACHE_DIR) + + reference = Reference( + accession, entry['checksum'], geninfo_identifier=geninfo_id) + db.session.add(reference) + + for transcript, protein in entry.get('links', []): + db.session.add(TranscriptProteinLink(transcript, protein)) + + references.append(reference) + + db.session.commit() + + return references + + +@pytest.fixture +def hg19(db): """ Fixture for GRCh37/hg19 genome assembly with chromosomes. """ assembly = Assembly('GRCh37', 9606, 'Homo sapiens', alias='hg19') - session.add(assembly) - - session.add_all(Chromosome(assembly, name, accession, organelle) - for accession, name, organelle in [ - ('NC_000001.10', 'chr1', 'nucleus'), - ('NC_000002.11', 'chr2', 'nucleus'), - ('NC_000003.11', 'chr3', 'nucleus'), - ('NC_000004.11', 'chr4', 'nucleus'), - ('NC_000005.9', 'chr5', 'nucleus'), - ('NC_000006.11', 'chr6', 'nucleus'), - ('NC_000007.13', 'chr7', 'nucleus'), - ('NC_000008.10', 'chr8', 'nucleus'), - ('NC_000009.11', 'chr9', 'nucleus'), - ('NC_000010.10', 'chr10', 'nucleus'), - ('NC_000011.9', 'chr11', 'nucleus'), - ('NC_000012.11', 'chr12', 'nucleus'), - ('NC_000013.10', 'chr13', 'nucleus'), - ('NC_000014.8', 'chr14', 'nucleus'), - ('NC_000015.9', 'chr15', 'nucleus'), - ('NC_000016.9', 'chr16', 'nucleus'), - ('NC_000017.10', 'chr17', 'nucleus'), - ('NC_000018.9', 'chr18', 'nucleus'), - ('NC_000019.9', 'chr19', 'nucleus'), - ('NC_000020.10', 'chr20', 'nucleus'), - ('NC_000021.8', 'chr21', 'nucleus'), - ('NC_000022.10', 'chr22', 'nucleus'), - ('NC_000023.10', 'chrX', 'nucleus'), - ('NC_000024.9', 'chrY', 'nucleus'), - ('NT_167244.1', 'chr6_apd_hap1', 'nucleus'), - ('NT_113891.2', 'chr6_cox_hap2', 'nucleus'), - ('NT_167245.1', 'chr6_dbb_hap3', 'nucleus'), - ('NT_167246.1', 'chr6_mann_hap4', 'nucleus'), - ('NT_167247.1', 'chr6_mcf_hap5', 'nucleus'), - ('NT_167248.1', 'chr6_qbl_hap6', 'nucleus'), - ('NT_167249.1', 'chr6_ssto_hap7', 'nucleus'), - ('NT_167250.1', 'chr4_ctg9_hap1', 'nucleus'), - ('NT_167251.1', 'chr17_ctg5_hap1', 'nucleus'), - ('NC_012920.1', 'chrM', 'mitochondrion')]) - - session.commit() - - -def hg19_transcript_mappings(): - """ - Fixture for some selected transcript mappings in the GRCh37/hg19 genome - assembly. Depends on the :func:`hg19` fixture. - """ - chromosome_1 = Chromosome.query.filter_by(accession='NC_000001.10').one() - chromosome_3 = Chromosome.query.filter_by(accession='NC_000003.11').one() - chromosome_6 = Chromosome.query.filter_by(accession='NC_000006.11').one() - chromosome_7 = Chromosome.query.filter_by(accession='NC_000007.13').one() - chromosome_8 = Chromosome.query.filter_by(accession='NC_000008.10').one() - chromosome_11 = Chromosome.query.filter_by(accession='NC_000011.9').one() - chromosome_20 = Chromosome.query.filter_by(accession='NC_000020.10').one() - chromosome_22 = Chromosome.query.filter_by(accession='NC_000022.10').one() - chromosome_x = Chromosome.query.filter_by(accession='NC_000023.10').one() - chromosome_mt = Chromosome.query.filter_by(accession='NC_012920.1').one() - - session.add_all([chromosome_1, chromosome_6, chromosome_8, chromosome_11, - chromosome_20, chromosome_22, chromosome_mt]) - - session.add(TranscriptMapping( - chromosome_11, - 'refseq', - 'NM_003002', - 'SDHD', - 'forward', - 111957571, - 111966518, - [111957571, 111958581, 111959591, 111965529], - [111957683, 111958697, 111959735, 111966518], - 'ncbi', - transcript=1, - cds=(111957632, 111965694), - select_transcript=False, - version=2)) - session.add(TranscriptMapping( - chromosome_11, - 'refseq', - 'NM_012459', - 'TIMM8B', - 'reverse', - 111955524, - 111957522, - [111955524, 111957364], - [111956186, 111957522], - 'ncbi', - transcript=1, - cds=(111956019, 111957492), - select_transcript=False, - version=2)) - session.add(TranscriptMapping( - chromosome_11, - 'refseq', - 'NR_028383', - 'TIMM8B', - 'reverse', - 111955524, - 111957522, - [111955524, 111956702, 111957364], - [111956186, 111957034, 111957522], - 'ncbi', - transcript=1, - cds=None, - select_transcript=False, - version=1)) - session.add(TranscriptMapping( - chromosome_6, - 'refseq', - 'NM_000500', - 'CYP21A2', - 'forward', - 32006082, - 32009419, - [32006082, 32006499, 32006871, 32007133, 32007323, 32007526, - 32007782, 32008183, 32008445, 32008646], - [32006401, 32006588, 32007025, 32007234, 32007424, 32007612, - 32007982, 32008361, 32008548, 32009419], - 'ncbi', - transcript=1, - cds=(32006200, 32008911), - select_transcript=False, - version=5)) - session.add(TranscriptMapping( - chromosome_22, - 'refseq', - 'NM_001145134', - 'CPT1B', - 'reverse', - 51007290, - 51017096, - [51007290, 51007765, 51008005, 51008722, 51009320, 51009587, - 51009804, 51010435, 51010632, 51011304, 51011949, 51012764, - 51012922, 51014464, 51014627, 51015286, 51015753, 51016204, - 51016978], - [51007510, 51007850, 51008097, 51008835, 51009472, 51009721, - 51009968, 51010551, 51010737, 51011489, 51012144, 51012848, - 51013029, 51014541, 51014764, 51015463, 51015892, 51016363, - 51017096], - 'ncbi', - transcript=1, - cds=(51007767, 51016344), - select_transcript=False, - version=1)) - session.add(TranscriptMapping( - chromosome_22, - 'refseq', - 'NR_021492', - 'LOC100144603', - 'forward', - 51021455, - 51022356, - [51021455, 51022027], - [51021752, 51022356], - 'ncbi', - transcript=1, - cds=None, - select_transcript=False, - version=1)) - session.add(TranscriptMapping( - chromosome_1, - 'refseq', - 'NM_001007553', - 'CSDE1', - 'reverse', - 115259538, - 115300624, - [115259538, 115261234, 115262200, 115263160, 115266504, 115267842, - 115268832, 115269604, 115272879, 115273129, 115275225, 115276353, - 115276610, 115277063, 115279379, 115280092, 115280584, 115282313, - 115292442, 115300546], - [115260837, 115261366, 115262363, 115263338, 115266623, 115267954, - 115269007, 115269711, 115273043, 115273269, 115275437, 115276478, - 115276738, 115277144, 115279476, 115280184, 115280693, 115282511, - 115292828, 115300624], - 'ncbi', - transcript=1, - cds=(115260790, 115282511), - select_transcript=False, - version=1)) - session.add(TranscriptMapping( - chromosome_1, - 'refseq', - 'NM_001130523', - 'CSDE1', - 'reverse', - 115259538, - 115300671, - [115259538, 115261234, 115262200, 115263160, 115266504, 115267842, - 115268832, 115269604, 115272879, 115273129, 115275225, 115276353, - 115276610, 115277063, 115279379, 115280584, 115282313, 115284148, - 115292442, 115300546], - [115260837, 115261366, 115262363, 115263338, 115266623, 115267954, - 115269007, 115269711, 115273043, 115273269, 115275437, 115276478, - 115276738, 115277144, 115279476, 115280693, 115282511, 115284294, - 115292828, 115300671], - 'ncbi', - transcript=1, - cds=(115260790, 115284285), - select_transcript=False, - version=1)) - session.add(TranscriptMapping( - chromosome_1, - 'refseq', - 'NM_002241', - 'KCNJ10', - 'reverse', - 160007257, - 160040051, - [160007257, 160039812], - [160012322, 160040051], - 'ncbi', - transcript=1, - cds=(160011183, 160012322), - select_transcript=False, - version=4)) - session.add(TranscriptMapping( - chromosome_20, - 'refseq', - 'NM_001162505', - 'TMEM189', - 'reverse', - 48740274, - 48770335, - [48740274, 48744512, 48746083, 48747402, 48760039, 48770054], - [48741716, 48744724, 48746227, 48747484, 48760158, 48770335], - 'ncbi', - transcript=1, - cds=(48741595, 48770174), - select_transcript=False, - version=1)) - session.add(TranscriptMapping( - chromosome_8, - 'refseq', - 'NM_017780', - 'CHD7', - 'forward', - 61591339, - 61779465, - [61591339, 61653818, 61693559, 61707545, 61712947, 61714087, - 61720776, 61728946, 61732566, 61734349, 61734583, 61735062, - 61736399, 61741222, 61742881, 61748632, 61749376, 61750227, - 61750635, 61754203, 61754406, 61757423, 61757809, 61761074, - 61761610, 61763052, 61763591, 61763821, 61764578, 61765057, - 61765388, 61766922, 61768534, 61769004, 61773463, 61774755, - 61775107, 61777575], - [61591641, 61655656, 61693989, 61707686, 61713084, 61714152, - 61720831, 61729060, 61732649, 61734486, 61734704, 61735305, - 61736575, 61741365, 61743136, 61748842, 61749571, 61750394, - 61750814, 61754313, 61754611, 61757622, 61757968, 61761163, - 61761713, 61763181, 61763663, 61763878, 61764806, 61765265, - 61766059, 61767082, 61768761, 61769447, 61773684, 61774895, - 61775211, 61779465], - 'ncbi', - transcript=1, - cds=(61653992, 61778492), - select_transcript=False, - version=2)) - session.add(TranscriptMapping( - chromosome_mt, - 'refseq', - 'NC_012920', - 'ND4', - 'forward', - 10760, - 12137, - [10760], - [12137], - 'reference', - transcript=1, - cds=(10760, 12137), - select_transcript=True, - version=1)) - session.add(TranscriptMapping( - chromosome_1, - 'refseq', - 'NM_002001', - 'FCER1A', - 'forward', - 159259504, - 159278014, - [159259504, 159272096, 159272644, 159273718, 159275778, 159277538], - [159259543, 159272209, 159272664, 159273972, 159276035, 159278014], - 'ncbi', - transcript=1, - cds=(159272155, 159277722), - select_transcript=False, - version=2)) - session.add(TranscriptMapping( - chromosome_7, - 'refseq', - 'XM_001715131', - 'LOC100132858', - 'reverse', - 19828, - 36378, - [19828, 20834, 31060, 32957, 35335, 36224], - [19895, 21029, 31437, 33107, 35541, 36378], - 'ncbi', - transcript=1, - cds=(19828, 36378), - select_transcript=False, - version=2)) - session.add(TranscriptMapping( - chromosome_x, - 'refseq', - 'NM_004011', - 'DMD', - 'reverse', - 31137345, - 32430371, - [31137345, 31144759, 31152219, 31164408, 31165392, 31187560, - 31190465, 31191656, 31196049, 31196786, 31198487, 31200855, - 31222078, 31224699, 31227615, 31241164, 31279072, 31341715, - 31366673, 31462598, 31496223, 31497100, 31514905, 31525398, - 31645790, 31676107, 31697492, 31747748, 31792077, 31838092, - 31854835, 31893305, 31947713, 31950197, 31986456, 32235033, - 32305646, 32328199, 32360217, 32361251, 32364060, 32366523, - 32380905, 32382699, 32383137, 32398627, 32404427, 32407618, - 32408188, 32429869, 32430279], - [31140047, 31144790, 31152311, 31164531, 31165635, 31187718, - 31190530, 31191721, 31196087, 31196922, 31198598, 31201021, - 31222235, 31224784, 31227816, 31241238, 31279133, 31341775, - 31366751, 31462744, 31496491, 31497220, 31515061, 31525570, - 31645979, 31676261, 31697703, 31747865, 31792309, 31838200, - 31854936, 31893490, 31947862, 31950344, 31986631, 32235180, - 32305818, 32328393, 32360399, 32361403, 32364197, 32366645, - 32381075, 32382827, 32383316, 32398797, 32404582, 32407791, - 32408298, 32430030, 32430371], - 'ncbi', - transcript=1, - cds=(31140036, 32430326), - select_transcript=False, - version=3)) - session.add(TranscriptMapping( - chromosome_x, - 'refseq', - 'NM_004019', - 'DMD', - 'reverse', - 31196312, - 31285024, - [31196312, 31198487, 31200855, 31222078, 31224699, 31227615, - 31241164, 31279072, 31284927], - [31196922, 31198598, 31201021, 31222235, 31224784, 31227816, - 31241238, 31279133, 31285024], - 'ncbi', - transcript=1, - cds=(31196782, 31284946), - select_transcript=False, - version=2)) - session.add(TranscriptMapping( - chromosome_x, - 'refseq', - 'NM_004007', - 'DMD', - 'reverse', - 31137345, - 33038317, - [31137345, 31144759, 31152219, 31164408, 31165392, 31187560, - 31190465, 31191656, 31196049, 31196786, 31198487, 31200855, - 31222078, 31224699, 31227615, 31241164, 31279072, 31341715, - 31366673, 31462598, 31496223, 31497100, 31514905, 31525398, - 31645790, 31676107, 31697492, 31747748, 31792077, 31838092, - 31854835, 31893305, 31947713, 31950197, 31986456, 32235033, - 32305646, 32328199, 32360217, 32361251, 32364060, 32366523, - 32380905, 32382699, 32383137, 32398627, 32404427, 32407618, - 32408188, 32429869, 32456358, 32459297, 32466573, 32472779, - 32481556, 32482703, 32486615, 32490281, 32503036, 32509394, - 32519872, 32536125, 32563276, 32583819, 32591647, 32591862, - 32613874, 32632420, 32662249, 32663081, 32715987, 32717229, - 32827610, 32834585, 32841412, 32862900, 32867845, 33038256], - [31140047, 31144790, 31152311, 31164531, 31165635, 31187718, - 31190530, 31191721, 31196087, 31196922, 31198598, 31201021, - 31222235, 31224784, 31227816, 31241238, 31279133, 31341775, - 31366751, 31462744, 31496491, 31497220, 31515061, 31525570, - 31645979, 31676261, 31697703, 31747865, 31792309, 31838200, - 31854936, 31893490, 31947862, 31950344, 31986631, 32235180, - 32305818, 32328393, 32360399, 32361403, 32364197, 32366645, - 32381075, 32382827, 32383316, 32398797, 32404582, 32407791, - 32408298, 32430030, 32456507, 32459431, 32466755, 32472949, - 32481711, 32482816, 32486827, 32490426, 32503216, 32509635, - 32519959, 32536248, 32563451, 32583998, 32591754, 32591963, - 32613993, 32632570, 32662430, 32663269, 32716115, 32717410, - 32827728, 32834757, 32841504, 32862977, 32867937, 33038317], - 'ncbi', - transcript=1, - cds=(31140036, 32834745), - select_transcript=False, - version=2)) - session.add(TranscriptMapping( - chromosome_x, - 'refseq', - 'NM_203473', - 'PORCN', - 'forward', - 48367371, - 48379202, - [48367371, 48368172, 48369683, 48370280, 48370714, 48370977, - 48371223, 48372628, 48372913, 48374105, 48374278, 48374449, - 48375571, 48378763], - [48367491, 48368344, 48369875, 48370323, 48370895, 48371107, - 48371240, 48372753, 48373013, 48374181, 48374341, 48374534, - 48375681, 48379202], - 'ncbi', - transcript=1, - cds=(48368209, 48378864), - select_transcript=False, - version=1)) - session.add(TranscriptMapping( - chromosome_x, - 'refseq', - 'NM_000132', - 'F8', - 'reverse', - 154064063, - 154250998, - [154064063, 154088707, 154089993, 154091358, 154124352, 154128141, - 154129646, 154130326, 154132181, 154132571, 154133086, 154134695, - 154156846, 154175973, 154182167, 154185232, 154189350, 154194245, - 154194701, 154197606, 154212962, 154215512, 154221211, 154225248, - 154227754, 154250685], - [154066027, 154088883, 154090141, 154091502, 154124507, 154128226, - 154129717, 154130442, 154132363, 154132799, 154133298, 154134848, - 154159951, 154176182, 154182317, 154185446, 154189443, 154194416, - 154194962, 154197827, 154213078, 154215580, 154221423, 154225370, - 154227875, 154250998], - 'ncbi', - transcript=1, - cds=(154065872, 154250827), - select_transcript=False, - version=3)) - session.add(TranscriptMapping( - chromosome_3, - 'refseq', - 'NM_000249', - 'MLH1', - 'forward', - 37034841, - 37092337, - [37034841, 37038110, 37042446, 37045892, 37048482, 37050305, - 37053311, 37053502, 37055923, 37058997, 37061801, 37067128, - 37070275, 37081677, 37083759, 37089010, 37090008, 37090395, - 37091977], - [37035154, 37038200, 37042544, 37045965, 37048554, 37050396, - 37053353, 37053590, 37056035, 37059090, 37061954, 37067498, - 37070423, 37081785, 37083822, 37089174, 37090100, 37090508, - 37092337], - 'ncbi', - transcript=1, - cds=(37035039, 37092144), - select_transcript=False, - version=3)) - - session.commit() - - -def cache(*references): - """ - Returns a cache fixture for the given references. - """ - def cache_with_references(): - for reference in references: - entry = REFERENCES[reference] - try: - accession = entry['accession'] - except KeyError: - accession = reference - geninfo_id = entry.get('geninfo_id') + db.session.add(assembly) - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - entry['filename']) - shutil.copy(path, settings.CACHE_DIR) + db.session.add_all(Chromosome(assembly, name, accession, organelle) + for accession, name, organelle in [ + ('NC_000001.10', 'chr1', 'nucleus'), + ('NC_000002.11', 'chr2', 'nucleus'), + ('NC_000003.11', 'chr3', 'nucleus'), + ('NC_000004.11', 'chr4', 'nucleus'), + ('NC_000005.9', 'chr5', 'nucleus'), + ('NC_000006.11', 'chr6', 'nucleus'), + ('NC_000007.13', 'chr7', 'nucleus'), + ('NC_000008.10', 'chr8', 'nucleus'), + ('NC_000009.11', 'chr9', 'nucleus'), + ('NC_000010.10', 'chr10', 'nucleus'), + ('NC_000011.9', 'chr11', 'nucleus'), + ('NC_000012.11', 'chr12', 'nucleus'), + ('NC_000013.10', 'chr13', 'nucleus'), + ('NC_000014.8', 'chr14', 'nucleus'), + ('NC_000015.9', 'chr15', 'nucleus'), + ('NC_000016.9', 'chr16', 'nucleus'), + ('NC_000017.10', 'chr17', 'nucleus'), + ('NC_000018.9', 'chr18', 'nucleus'), + ('NC_000019.9', 'chr19', 'nucleus'), + ('NC_000020.10', 'chr20', 'nucleus'), + ('NC_000021.8', 'chr21', 'nucleus'), + ('NC_000022.10', 'chr22', 'nucleus'), + ('NC_000023.10', 'chrX', 'nucleus'), + ('NC_000024.9', 'chrY', 'nucleus'), + ('NT_167244.1', 'chr6_apd_hap1', 'nucleus'), + ('NT_113891.2', 'chr6_cox_hap2', 'nucleus'), + ('NT_167245.1', 'chr6_dbb_hap3', 'nucleus'), + ('NT_167246.1', 'chr6_mann_hap4', 'nucleus'), + ('NT_167247.1', 'chr6_mcf_hap5', 'nucleus'), + ('NT_167248.1', 'chr6_qbl_hap6', 'nucleus'), + ('NT_167249.1', 'chr6_ssto_hap7', 'nucleus'), + ('NT_167250.1', 'chr4_ctg9_hap1', 'nucleus'), + ('NT_167251.1', 'chr17_ctg5_hap1', 'nucleus'), + ('NC_012920.1', 'chrM', 'mitochondrion')]) - session.add(Reference(accession, entry['checksum'], - geninfo_identifier=geninfo_id)) + db.session.commit() - for transcript, protein in entry.get('links', []): - session.add(TranscriptProteinLink(transcript, protein)) + return assembly - session.commit() - return cache_with_references +@pytest.fixture +def hg19_transcript_mappings(db, hg19): + """ + Fixture for some selected transcript mappings in the GRCh37/hg19 genome + assembly. + """ + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr11').one(), + 'refseq', + 'NM_003002', + 'SDHD', + 'forward', + 111957571, + 111966518, + [111957571, 111958581, 111959591, 111965529], + [111957683, 111958697, 111959735, 111966518], + 'ncbi', + transcript=1, + cds=(111957632, 111965694), + select_transcript=False, + version=2)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr11').one(), + 'refseq', + 'NM_012459', + 'TIMM8B', + 'reverse', + 111955524, + 111957522, + [111955524, 111957364], + [111956186, 111957522], + 'ncbi', + transcript=1, + cds=(111956019, 111957492), + select_transcript=False, + version=2)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr11').one(), + 'refseq', + 'NR_028383', + 'TIMM8B', + 'reverse', + 111955524, + 111957522, + [111955524, 111956702, 111957364], + [111956186, 111957034, 111957522], + 'ncbi', + transcript=1, + cds=None, + select_transcript=False, + version=1)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr6').one(), + 'refseq', + 'NM_000500', + 'CYP21A2', + 'forward', + 32006082, + 32009419, + [32006082, 32006499, 32006871, 32007133, 32007323, 32007526, + 32007782, 32008183, 32008445, 32008646], + [32006401, 32006588, 32007025, 32007234, 32007424, 32007612, + 32007982, 32008361, 32008548, 32009419], + 'ncbi', + transcript=1, + cds=(32006200, 32008911), + select_transcript=False, + version=5)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr22').one(), + 'refseq', + 'NM_001145134', + 'CPT1B', + 'reverse', + 51007290, + 51017096, + [51007290, 51007765, 51008005, 51008722, 51009320, 51009587, + 51009804, 51010435, 51010632, 51011304, 51011949, 51012764, + 51012922, 51014464, 51014627, 51015286, 51015753, 51016204, + 51016978], + [51007510, 51007850, 51008097, 51008835, 51009472, 51009721, + 51009968, 51010551, 51010737, 51011489, 51012144, 51012848, + 51013029, 51014541, 51014764, 51015463, 51015892, 51016363, + 51017096], + 'ncbi', + transcript=1, + cds=(51007767, 51016344), + select_transcript=False, + version=1)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr22').one(), + 'refseq', + 'NR_021492', + 'LOC100144603', + 'forward', + 51021455, + 51022356, + [51021455, 51022027], + [51021752, 51022356], + 'ncbi', + transcript=1, + cds=None, + select_transcript=False, + version=1)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr1').one(), + 'refseq', + 'NM_001007553', + 'CSDE1', + 'reverse', + 115259538, + 115300624, + [115259538, 115261234, 115262200, 115263160, 115266504, 115267842, + 115268832, 115269604, 115272879, 115273129, 115275225, 115276353, + 115276610, 115277063, 115279379, 115280092, 115280584, 115282313, + 115292442, 115300546], + [115260837, 115261366, 115262363, 115263338, 115266623, 115267954, + 115269007, 115269711, 115273043, 115273269, 115275437, 115276478, + 115276738, 115277144, 115279476, 115280184, 115280693, 115282511, + 115292828, 115300624], + 'ncbi', + transcript=1, + cds=(115260790, 115282511), + select_transcript=False, + version=1)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr1').one(), + 'refseq', + 'NM_001130523', + 'CSDE1', + 'reverse', + 115259538, + 115300671, + [115259538, 115261234, 115262200, 115263160, 115266504, 115267842, + 115268832, 115269604, 115272879, 115273129, 115275225, 115276353, + 115276610, 115277063, 115279379, 115280584, 115282313, 115284148, + 115292442, 115300546], + [115260837, 115261366, 115262363, 115263338, 115266623, 115267954, + 115269007, 115269711, 115273043, 115273269, 115275437, 115276478, + 115276738, 115277144, 115279476, 115280693, 115282511, 115284294, + 115292828, 115300671], + 'ncbi', + transcript=1, + cds=(115260790, 115284285), + select_transcript=False, + version=1)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr1').one(), + 'refseq', + 'NM_002241', + 'KCNJ10', + 'reverse', + 160007257, + 160040051, + [160007257, 160039812], + [160012322, 160040051], + 'ncbi', + transcript=1, + cds=(160011183, 160012322), + select_transcript=False, + version=4)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr20').one(), + 'refseq', + 'NM_001162505', + 'TMEM189', + 'reverse', + 48740274, + 48770335, + [48740274, 48744512, 48746083, 48747402, 48760039, 48770054], + [48741716, 48744724, 48746227, 48747484, 48760158, 48770335], + 'ncbi', + transcript=1, + cds=(48741595, 48770174), + select_transcript=False, + version=1)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr8').one(), + 'refseq', + 'NM_017780', + 'CHD7', + 'forward', + 61591339, + 61779465, + [61591339, 61653818, 61693559, 61707545, 61712947, 61714087, + 61720776, 61728946, 61732566, 61734349, 61734583, 61735062, + 61736399, 61741222, 61742881, 61748632, 61749376, 61750227, + 61750635, 61754203, 61754406, 61757423, 61757809, 61761074, + 61761610, 61763052, 61763591, 61763821, 61764578, 61765057, + 61765388, 61766922, 61768534, 61769004, 61773463, 61774755, + 61775107, 61777575], + [61591641, 61655656, 61693989, 61707686, 61713084, 61714152, + 61720831, 61729060, 61732649, 61734486, 61734704, 61735305, + 61736575, 61741365, 61743136, 61748842, 61749571, 61750394, + 61750814, 61754313, 61754611, 61757622, 61757968, 61761163, + 61761713, 61763181, 61763663, 61763878, 61764806, 61765265, + 61766059, 61767082, 61768761, 61769447, 61773684, 61774895, + 61775211, 61779465], + 'ncbi', + transcript=1, + cds=(61653992, 61778492), + select_transcript=False, + version=2)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chrM').one(), + 'refseq', + 'NC_012920', + 'ND4', + 'forward', + 10760, + 12137, + [10760], + [12137], + 'reference', + transcript=1, + cds=(10760, 12137), + select_transcript=True, + version=1)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr1').one(), + 'refseq', + 'NM_002001', + 'FCER1A', + 'forward', + 159259504, + 159278014, + [159259504, 159272096, 159272644, 159273718, 159275778, 159277538], + [159259543, 159272209, 159272664, 159273972, 159276035, 159278014], + 'ncbi', + transcript=1, + cds=(159272155, 159277722), + select_transcript=False, + version=2)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr7').one(), + 'refseq', + 'XM_001715131', + 'LOC100132858', + 'reverse', + 19828, + 36378, + [19828, 20834, 31060, 32957, 35335, 36224], + [19895, 21029, 31437, 33107, 35541, 36378], + 'ncbi', + transcript=1, + cds=(19828, 36378), + select_transcript=False, + version=2)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chrX').one(), + 'refseq', + 'NM_004011', + 'DMD', + 'reverse', + 31137345, + 32430371, + [31137345, 31144759, 31152219, 31164408, 31165392, 31187560, + 31190465, 31191656, 31196049, 31196786, 31198487, 31200855, + 31222078, 31224699, 31227615, 31241164, 31279072, 31341715, + 31366673, 31462598, 31496223, 31497100, 31514905, 31525398, + 31645790, 31676107, 31697492, 31747748, 31792077, 31838092, + 31854835, 31893305, 31947713, 31950197, 31986456, 32235033, + 32305646, 32328199, 32360217, 32361251, 32364060, 32366523, + 32380905, 32382699, 32383137, 32398627, 32404427, 32407618, + 32408188, 32429869, 32430279], + [31140047, 31144790, 31152311, 31164531, 31165635, 31187718, + 31190530, 31191721, 31196087, 31196922, 31198598, 31201021, + 31222235, 31224784, 31227816, 31241238, 31279133, 31341775, + 31366751, 31462744, 31496491, 31497220, 31515061, 31525570, + 31645979, 31676261, 31697703, 31747865, 31792309, 31838200, + 31854936, 31893490, 31947862, 31950344, 31986631, 32235180, + 32305818, 32328393, 32360399, 32361403, 32364197, 32366645, + 32381075, 32382827, 32383316, 32398797, 32404582, 32407791, + 32408298, 32430030, 32430371], + 'ncbi', + transcript=1, + cds=(31140036, 32430326), + select_transcript=False, + version=3)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chrX').one(), + 'refseq', + 'NM_004019', + 'DMD', + 'reverse', + 31196312, + 31285024, + [31196312, 31198487, 31200855, 31222078, 31224699, 31227615, + 31241164, 31279072, 31284927], + [31196922, 31198598, 31201021, 31222235, 31224784, 31227816, + 31241238, 31279133, 31285024], + 'ncbi', + transcript=1, + cds=(31196782, 31284946), + select_transcript=False, + version=2)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chrX').one(), + 'refseq', + 'NM_004007', + 'DMD', + 'reverse', + 31137345, + 33038317, + [31137345, 31144759, 31152219, 31164408, 31165392, 31187560, + 31190465, 31191656, 31196049, 31196786, 31198487, 31200855, + 31222078, 31224699, 31227615, 31241164, 31279072, 31341715, + 31366673, 31462598, 31496223, 31497100, 31514905, 31525398, + 31645790, 31676107, 31697492, 31747748, 31792077, 31838092, + 31854835, 31893305, 31947713, 31950197, 31986456, 32235033, + 32305646, 32328199, 32360217, 32361251, 32364060, 32366523, + 32380905, 32382699, 32383137, 32398627, 32404427, 32407618, + 32408188, 32429869, 32456358, 32459297, 32466573, 32472779, + 32481556, 32482703, 32486615, 32490281, 32503036, 32509394, + 32519872, 32536125, 32563276, 32583819, 32591647, 32591862, + 32613874, 32632420, 32662249, 32663081, 32715987, 32717229, + 32827610, 32834585, 32841412, 32862900, 32867845, 33038256], + [31140047, 31144790, 31152311, 31164531, 31165635, 31187718, + 31190530, 31191721, 31196087, 31196922, 31198598, 31201021, + 31222235, 31224784, 31227816, 31241238, 31279133, 31341775, + 31366751, 31462744, 31496491, 31497220, 31515061, 31525570, + 31645979, 31676261, 31697703, 31747865, 31792309, 31838200, + 31854936, 31893490, 31947862, 31950344, 31986631, 32235180, + 32305818, 32328393, 32360399, 32361403, 32364197, 32366645, + 32381075, 32382827, 32383316, 32398797, 32404582, 32407791, + 32408298, 32430030, 32456507, 32459431, 32466755, 32472949, + 32481711, 32482816, 32486827, 32490426, 32503216, 32509635, + 32519959, 32536248, 32563451, 32583998, 32591754, 32591963, + 32613993, 32632570, 32662430, 32663269, 32716115, 32717410, + 32827728, 32834757, 32841504, 32862977, 32867937, 33038317], + 'ncbi', + transcript=1, + cds=(31140036, 32834745), + select_transcript=False, + version=2)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chrX').one(), + 'refseq', + 'NM_203473', + 'PORCN', + 'forward', + 48367371, + 48379202, + [48367371, 48368172, 48369683, 48370280, 48370714, 48370977, + 48371223, 48372628, 48372913, 48374105, 48374278, 48374449, + 48375571, 48378763], + [48367491, 48368344, 48369875, 48370323, 48370895, 48371107, + 48371240, 48372753, 48373013, 48374181, 48374341, 48374534, + 48375681, 48379202], + 'ncbi', + transcript=1, + cds=(48368209, 48378864), + select_transcript=False, + version=1)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chrX').one(), + 'refseq', + 'NM_000132', + 'F8', + 'reverse', + 154064063, + 154250998, + [154064063, 154088707, 154089993, 154091358, 154124352, 154128141, + 154129646, 154130326, 154132181, 154132571, 154133086, 154134695, + 154156846, 154175973, 154182167, 154185232, 154189350, 154194245, + 154194701, 154197606, 154212962, 154215512, 154221211, 154225248, + 154227754, 154250685], + [154066027, 154088883, 154090141, 154091502, 154124507, 154128226, + 154129717, 154130442, 154132363, 154132799, 154133298, 154134848, + 154159951, 154176182, 154182317, 154185446, 154189443, 154194416, + 154194962, 154197827, 154213078, 154215580, 154221423, 154225370, + 154227875, 154250998], + 'ncbi', + transcript=1, + cds=(154065872, 154250827), + select_transcript=False, + version=3)) + db.session.add(TranscriptMapping( + hg19.chromosomes.filter_by(name='chr3').one(), + 'refseq', + 'NM_000249', + 'MLH1', + 'forward', + 37034841, + 37092337, + [37034841, 37038110, 37042446, 37045892, 37048482, 37050305, + 37053311, 37053502, 37055923, 37058997, 37061801, 37067128, + 37070275, 37081677, 37083759, 37089010, 37090008, 37090395, + 37091977], + [37035154, 37038200, 37042544, 37045965, 37048554, 37050396, + 37053353, 37053590, 37056035, 37059090, 37061954, 37067498, + 37070423, 37081785, 37083822, 37089174, 37090100, 37090508, + 37092337], + 'ncbi', + transcript=1, + cds=(37035039, 37092144), + select_transcript=False, + version=3)) + + db.session.commit() diff --git a/tests/test_crossmap.py b/tests/test_crossmap.py index 990f93fe..af4cc963 100644 --- a/tests/test_crossmap.py +++ b/tests/test_crossmap.py @@ -1,362 +1,376 @@ """ -Tests for the Crossmap module. +Tests for the mutalyzer.Crossmap module. """ from __future__ import unicode_literals -#import logging; logging.basicConfig() - from mutalyzer.Crossmap import Crossmap -from utils import MutalyzerTest - - -class TestCrossmap(MutalyzerTest): - """ - Test the Crossmap class. - """ - def test_splice_sites(self): - """ - Check whether the gene on the forward strand has the right splice - sites in c. notation. - """ - rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, - 103528, 119465, 119537, 144687, 144810, 148418, 149215] - cds = [27925, 74736] - cm = Crossmap(rna, cds, 1) - assert (cm._Crossmap__crossmapping == - [-304, -181, -180, 15, 16, 117, 118, 205, 206, 325, 326, 398, - 399, 522, 523, 1320]) - - def test_splice_sites_reverse(self): - """ - Check whether the gene on the reverse strand has the right splice - sites in c. notation. - """ - rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, - 76535, 92453, 92554, 123276, 123470, 146090, 146213] - cds = [76479, 123290] - cm = Crossmap(rna, cds, -1) - assert (cm._Crossmap__crossmapping == - [1320, 523, 522, 399, 398, 326, 325, 206, 205, 118, 117, 16, - 15, -180, -181, -304]) - - def test_g2x(self): - """ - Do some g. to c. conversion checking for the gene on the forward - strand. - """ - rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, - 103528, 119465, 119537, 144687, 144810, 148418, 149215] - cds = [27925, 74736] - cm = Crossmap(rna, cds, 1) - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(5001)) == '-304-u1' - assert cm.tuple2string(cm.g2x(5001)) == '-305' - assert cm.tuple2string(cm.g2x(5124)) == '-182' - assert cm.tuple2string(cm.g2x(5126)) == '-181+1' - assert cm.tuple2string(cm.g2x(27924)) == '-1' - assert cm.tuple2string(cm.g2x(27925)) == '1' - assert cm.tuple2string(cm.g2x(58660)) == '16-1' - assert cm.tuple2string(cm.g2x(74736)) == '174' - assert cm.tuple2string(cm.g2x(74737)) == '*1' - assert cm.tuple2string(cm.g2x(103408)) == '*32-1' - assert cm.tuple2string(cm.g2x(103410)) == '*33' - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(149216)) == '*1146+d1' - assert cm.tuple2string(cm.g2x(149216)) == '*1147' - - def test_g2x_reverse(self): - """ - Do some g. to c. conversion checking for the gene on the reverse - strand. - """ - rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, - 76535, 92453, 92554, 123276, 123470, 146090, 146213] - cds = [76479, 123290] - cm = Crossmap(rna, cds, -1) - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(146214)) == '-304-u1' - assert cm.tuple2string(cm.g2x(146214)) == '-305' - assert cm.tuple2string(cm.g2x(146091)) == '-182' - assert cm.tuple2string(cm.g2x(146089)) == '-181+1' - assert cm.tuple2string(cm.g2x(123291)) == '-1' - assert cm.tuple2string(cm.g2x(123290)) == '1' - assert cm.tuple2string(cm.g2x(92555)) == '16-1' - assert cm.tuple2string(cm.g2x(76479)) == '174' - assert cm.tuple2string(cm.g2x(76478)) == '*1' - assert cm.tuple2string(cm.g2x(47807)) == '*32-1' - assert cm.tuple2string(cm.g2x(47805)) == '*33' - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(1999)) == '*1146+d1' - assert cm.tuple2string(cm.g2x(1999)) == '*1147' - - def test_x2g_more(self): - """ - Do some c. to g. conversion checking for the gene on the forward - strand. - """ - rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, - 103528, 119465, 119537, 144687, 144810, 148418, 149215] - cds = [27925, 74736] - cm = Crossmap(rna, cds, 1) - assert cm.x2g(-304, -1) == 5001 - assert cm.x2g(-182, 0) == 5124 - assert cm.x2g(-181, 1) == 5126 - assert cm.x2g(-1, 0) == 27924 - assert cm.x2g(1, 0) == 27925 - assert cm.x2g(16, -1) == 58660 - assert cm.x2g(174, 0) == 74736 - assert cm.x2g(cm.main2int('*1'), 0) == 74737 - assert cm.x2g(cm.main2int('*32'), -1) == 103408 - assert cm.x2g(cm.main2int('*33'), 0) == 103410 - assert cm.x2g(cm.main2int('*1146'), 1) == 149216 - - def test_x2g_more_reverse(self): - """ - Do some c. to g. conversion checking for the gene on the reverse - strand. - """ - rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, - 76535, 92453, 92554, 123276, 123470, 146090, 146213] - cds = [76479, 123290] - cm = Crossmap(rna, cds, -1) - assert cm.x2g(-304, -1) == 146214 - assert cm.x2g(-182, 0) == 146091 - assert cm.x2g(-181, 1) == 146089 - assert cm.x2g(-1, 0) == 123291 - assert cm.x2g(1, 0) == 123290 - assert cm.x2g(16, -1) == 92555 - assert cm.x2g(174, 0) == 76479 - assert cm.x2g(cm.main2int('*1'), 0) == 76478 - assert cm.x2g(cm.main2int('*32'), -1) == 47807 - assert cm.x2g(cm.main2int('*33'), 0) == 47805 - assert cm.x2g(cm.main2int('*1146'), 1) == 1999 - - def test_g2x_missing_exons(self): - """ - Hypothetical gene, missing the first exon and the last two exons - should have the same crossmapping on the shared part. - """ - rna1 = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, - 103528, 119465, 119537, 144687, 144810, 148418, 149215] - rna2 = [27745, 27939, 58661, 58762, 74680, 74767, 103409, 103528, - 119465, 119537] - cds = [27925, 74736] - cm1 = Crossmap(rna1, cds, 1) - cm2 = Crossmap(rna2, cds, 1) - assert cm1.g2x(27925) == cm2.g2x(27925) - - def test_g2x_missing_exons_reverse(self): - """ - Hypothetical gene on the reverse strand, missing the first exon and - the last two exons should have the same crossmapping on the shared - part. - """ - rna1 = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, - 76535, 92453, 92554, 123276, 123470, 146090, 146213] - rna2 = [31678, 31750, 47687, 47806, 76448, 76535, 92453, 92554, - 123276, 123470] - cds = [76479, 123290] - cm1 = Crossmap(rna1, cds, -1) - cm2 = Crossmap(rna2, cds, -1) - assert cm1.g2x(123290) == cm2.g2x(123290) - - def test_splice_sites_noncoding(self): - """ - Check whether the gene on the forward strand has the right splice - sites in n. notation. - """ - rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, - 103528, 119465, 119537, 144687, 144810, 148418, 149215] - cm = Crossmap(rna, [], 1) - assert (cm._Crossmap__crossmapping == - [1, 124, 125, 319, 320, 421, 422, 509, 510, 629, 630, 702, - 703, 826, 827, 1624]) - - def test_splice_sites_noncoding_reverse(self): - """ - Check whether the gene on the reverse strand has the right splice - sites in n. notation. - """ - rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, - 76535, 92453, 92554, 123276, 123470, 146090, 146213] - cm = Crossmap(rna, [], -1) - assert (cm._Crossmap__crossmapping == - [1624, 827, 826, 703, 702, 630, 629, 510, 509, 422, 421, 320, - 319, 125, 124, 1]) - - def test_g2x_noncoding(self): - """ - Do some g. to n. conversion checking for the gene on the forward - strand. - """ - rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, - 103528, 119465, 119537, 144687, 144810, 148418, 149215] - cm = Crossmap(rna, [], 1) - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(5001)) == '1-u1' - assert cm.tuple2string(cm.g2x(5001)) == '-1' - assert cm.tuple2string(cm.g2x(5002)) == '1' - assert cm.tuple2string(cm.g2x(5126)) == '124+1' - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(149216)) == '1624+d1' - assert cm.tuple2string(cm.g2x(149216)) == '*1' - - def test_g2x_noncoding_reverse(self): - """ - Do some g. to n. conversion checking for the gene on the reverse - strand. - """ - rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, - 76535, 92453, 92554, 123276, 123470, 146090, 146213] - cm = Crossmap(rna, [], -1) - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(146214)) == '1-u1' - assert cm.tuple2string(cm.g2x(146214)) == '-1' - assert cm.tuple2string(cm.g2x(146213)) == '1' - assert cm.tuple2string(cm.g2x(146089)) == '124+1' - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(1999)) == '1624+d1' - assert cm.tuple2string(cm.g2x(1999)) == '*1' - - def test_x2g_noncoding(self): - """ - Do some n. to g. conversion checking for the gene on the forward - strand. - """ - rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, - 103528, 119465, 119537, 144687, 144810, 148418, 149215] - cm = Crossmap(rna, [], 1) - assert cm.x2g(1, -1) == 5001 - assert cm.x2g(1, 0) == 5002 - assert cm.x2g(124, 1) == 5126 - assert cm.x2g(1624, 1) == 149216 - - def test_x2g_noncoding_reverse(self): - """ - Do some n. to g. conversion checking for the gene on the reverse - strand. - """ - rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, - 76535, 92453, 92554, 123276, 123470, 146090, 146213] - cm = Crossmap(rna, [], -1) - assert cm.x2g(1, -1) == 146214 - assert cm.x2g(1, 0) == 146213 - assert cm.x2g(124, 1) == 146089 - assert cm.x2g(1624, 1) == 1999 - - def test_cds_one_exon(self): - """ - Test a gene that has a CDS that lies entirely in one exon. - """ - rna = [1, 80, 81, 3719] - cds = [162, 2123] - cm = Crossmap(rna, cds, 1) - assert cm._Crossmap__crossmapping == [-161, -82, -81, 3558] - assert cm.x2g(1, 0) == 162 - assert cm.tuple2string(cm.g2x(2123)) == '1962' - assert cm.tuple2string(cm.g2x(2124)) == '*1' - - def test_cds_start_on_splice_site(self): - """ - Test a gene that has a CDS that starts on an exon splice site. - """ - rna = [23755059, 23755214, 23777833, 23778028, 23808749, 23808851, - 23824768, 23824856, 23853497, 23853617, 23869553, 23869626, - 23894775, 23894899, 23898506, 23899304] - cds = [23777833, 23898680] - cm = Crossmap(rna, cds, 1) - assert (cm._Crossmap__crossmapping == - [-156, -1, 1, 196, 197, 299, 300, 388, 389, 509, 510, 583, - 584, 708, 709, 1507]) - assert cm.x2g(1, 0) == 23777833 - # Fix for r536: disable the -u and +d convention. - #assert cm.tuple2string(cm.g2x(2123)) == '-156-u23752936' - #assert cm.tuple2string(cm.g2x(2124)) == '-156-u23752935' - assert cm.tuple2string(cm.g2x(2123)) == '-23753092' - assert cm.tuple2string(cm.g2x(2124)) == '-23753091' - - def test_cds_start_on_splice_site_reverse(self): - """ - Test a gene on the reverse strand that has a CDS that starts on an - exon splice site. - """ - rna = [23777833, 23778028, 23808749, 23808851, 23824768, 23824856, - 23853497, 23853617, 23869553, 23869626, 23894775, 23894899, - 23898506, 23899304] - cds = [23755214, 23778028] - cm = Crossmap(rna, cds, -1) - assert (cm._Crossmap__crossmapping == - [196, 1, -1, -103, -104, -192, -193, -313, -314, -387, -388, - -512, -513, -1311]) - - def test_cds_start_on_splice_site_other(self): - """ - Test a gene that has a CDS that starts on an other exon splice site. - """ - rna = [23755059, 23755214, 23777833, 23778028, 23808749, 23808851, - 23824768, 23824856, 23853497, 23853617, 23869553, 23869626, - 23894775, 23894899, 23898506, 23899304] - cds = [23755214, 23898680] - cm = Crossmap(rna, cds, 1) - assert (cm._Crossmap__crossmapping == - [-155, 1, 2, 197, 198, 300, 301, 389, 390, 510, 511, 584, 585, - 709, 710, 1508]) - - def test_cds_start_on_splice_site_other_reverse(self): - """ - Test a gene on the reverse strand that has a CDS that starts on an - other exon splice site. - """ - rna = [23777833, 23778028, 23808749, 23808851, 23824768, 23824856, - 23853497, 23853617, 23869553, 23869626, 23894775, 23894899, - 23898506, 23899304] - cds = [23755214, 23808749] - cm = Crossmap(rna, cds, -1) - assert (cm._Crossmap__crossmapping == - [197, 2, 1, -102, -103, -191, -192, -312, -313, -386, -387, - -511, -512, -1310]) - - def test_cds_start_on_transcript_start(self): - """ - Upstream correction (forward) for CDS start at the start of - transcript. - """ - rna = [23777833, 23778028, 23808749, 23808851, 23824768, 23824856, - 23853497, 23853617, 23869553, 23869626, 23894775, 23894899, - 23898506, 23899304] - cds = [23777833, 23899304] - cm = Crossmap(rna, cds, 1) - assert cm.x2g(-1, 0) == cm.x2g(1, -1) - - def test_cds_start_on_transcript_start_reverse(self): - """ - Upstream correction (reverse) for CDS start at the start of - transcript. - """ - rna = [23777833, 23778028, 23808749, 23808851, 23824768, 23824856, - 23853497, 23853617, 23869553, 23869626, 23894775, 23894899, - 23898506, 23899304] - cds = [23777833, 23899304] - cm = Crossmap(rna, cds, -1) - assert cm.x2g(-1, 0) == cm.x2g(1, -1) - - def test_cds_is_exon(self): - """ - Gene where CDS is exactly one exon. - """ - rna = [27745, 27939, 58661, 58762, 74680, 74767] - cds = [58661, 58762] - cm = Crossmap(rna, cds, 1) - assert cm._Crossmap__crossmapping == [-195, -1, 1, 102, 103, 190] - - def test_cds_is_exon_reverse(self): - """ - Gene on the reverse strand where CDS is exactly one exon. - """ - rna = [27745, 27939, 58661, 58762, 74680, 74767] - cds = [58661, 58762] - cm = Crossmap(rna, cds, -1) - assert cm._Crossmap__crossmapping == [297, 103, 102, 1, -1, -88] + +def test_splice_sites(): + """ + Check whether the gene on the forward strand has the right splice + sites in c. notation. + """ + rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, + 103528, 119465, 119537, 144687, 144810, 148418, 149215] + cds = [27925, 74736] + cm = Crossmap(rna, cds, 1) + assert (cm._Crossmap__crossmapping == + [-304, -181, -180, 15, 16, 117, 118, 205, 206, 325, 326, 398, + 399, 522, 523, 1320]) + + +def test_splice_sites_reverse(): + """ + Check whether the gene on the reverse strand has the right splice + sites in c. notation. + """ + rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, + 76535, 92453, 92554, 123276, 123470, 146090, 146213] + cds = [76479, 123290] + cm = Crossmap(rna, cds, -1) + assert (cm._Crossmap__crossmapping == + [1320, 523, 522, 399, 398, 326, 325, 206, 205, 118, 117, 16, + 15, -180, -181, -304]) + + +def test_g2x(): + """ + Do some g. to c. conversion checking for the gene on the forward + strand. + """ + rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, + 103528, 119465, 119537, 144687, 144810, 148418, 149215] + cds = [27925, 74736] + cm = Crossmap(rna, cds, 1) + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(5001)) == '-304-u1' + assert cm.tuple2string(cm.g2x(5001)) == '-305' + assert cm.tuple2string(cm.g2x(5124)) == '-182' + assert cm.tuple2string(cm.g2x(5126)) == '-181+1' + assert cm.tuple2string(cm.g2x(27924)) == '-1' + assert cm.tuple2string(cm.g2x(27925)) == '1' + assert cm.tuple2string(cm.g2x(58660)) == '16-1' + assert cm.tuple2string(cm.g2x(74736)) == '174' + assert cm.tuple2string(cm.g2x(74737)) == '*1' + assert cm.tuple2string(cm.g2x(103408)) == '*32-1' + assert cm.tuple2string(cm.g2x(103410)) == '*33' + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(149216)) == '*1146+d1' + assert cm.tuple2string(cm.g2x(149216)) == '*1147' + + +def test_g2x_reverse(): + """ + Do some g. to c. conversion checking for the gene on the reverse + strand. + """ + rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, + 76535, 92453, 92554, 123276, 123470, 146090, 146213] + cds = [76479, 123290] + cm = Crossmap(rna, cds, -1) + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(146214)) == '-304-u1' + assert cm.tuple2string(cm.g2x(146214)) == '-305' + assert cm.tuple2string(cm.g2x(146091)) == '-182' + assert cm.tuple2string(cm.g2x(146089)) == '-181+1' + assert cm.tuple2string(cm.g2x(123291)) == '-1' + assert cm.tuple2string(cm.g2x(123290)) == '1' + assert cm.tuple2string(cm.g2x(92555)) == '16-1' + assert cm.tuple2string(cm.g2x(76479)) == '174' + assert cm.tuple2string(cm.g2x(76478)) == '*1' + assert cm.tuple2string(cm.g2x(47807)) == '*32-1' + assert cm.tuple2string(cm.g2x(47805)) == '*33' + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(1999)) == '*1146+d1' + assert cm.tuple2string(cm.g2x(1999)) == '*1147' + + +def test_x2g_more(): + """ + Do some c. to g. conversion checking for the gene on the forward + strand. + """ + rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, + 103528, 119465, 119537, 144687, 144810, 148418, 149215] + cds = [27925, 74736] + cm = Crossmap(rna, cds, 1) + assert cm.x2g(-304, -1) == 5001 + assert cm.x2g(-182, 0) == 5124 + assert cm.x2g(-181, 1) == 5126 + assert cm.x2g(-1, 0) == 27924 + assert cm.x2g(1, 0) == 27925 + assert cm.x2g(16, -1) == 58660 + assert cm.x2g(174, 0) == 74736 + assert cm.x2g(cm.main2int('*1'), 0) == 74737 + assert cm.x2g(cm.main2int('*32'), -1) == 103408 + assert cm.x2g(cm.main2int('*33'), 0) == 103410 + assert cm.x2g(cm.main2int('*1146'), 1) == 149216 + + +def test_x2g_more_reverse(): + """ + Do some c. to g. conversion checking for the gene on the reverse + strand. + """ + rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, + 76535, 92453, 92554, 123276, 123470, 146090, 146213] + cds = [76479, 123290] + cm = Crossmap(rna, cds, -1) + assert cm.x2g(-304, -1) == 146214 + assert cm.x2g(-182, 0) == 146091 + assert cm.x2g(-181, 1) == 146089 + assert cm.x2g(-1, 0) == 123291 + assert cm.x2g(1, 0) == 123290 + assert cm.x2g(16, -1) == 92555 + assert cm.x2g(174, 0) == 76479 + assert cm.x2g(cm.main2int('*1'), 0) == 76478 + assert cm.x2g(cm.main2int('*32'), -1) == 47807 + assert cm.x2g(cm.main2int('*33'), 0) == 47805 + assert cm.x2g(cm.main2int('*1146'), 1) == 1999 + + +def test_g2x_missing_exons(): + """ + Hypothetical gene, missing the first exon and the last two exons + should have the same crossmapping on the shared part. + """ + rna1 = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, + 103528, 119465, 119537, 144687, 144810, 148418, 149215] + rna2 = [27745, 27939, 58661, 58762, 74680, 74767, 103409, 103528, + 119465, 119537] + cds = [27925, 74736] + cm1 = Crossmap(rna1, cds, 1) + cm2 = Crossmap(rna2, cds, 1) + assert cm1.g2x(27925) == cm2.g2x(27925) + + +def test_g2x_missing_exons_reverse(): + """ + Hypothetical gene on the reverse strand, missing the first exon and + the last two exons should have the same crossmapping on the shared + part. + """ + rna1 = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, + 76535, 92453, 92554, 123276, 123470, 146090, 146213] + rna2 = [31678, 31750, 47687, 47806, 76448, 76535, 92453, 92554, + 123276, 123470] + cds = [76479, 123290] + cm1 = Crossmap(rna1, cds, -1) + cm2 = Crossmap(rna2, cds, -1) + assert cm1.g2x(123290) == cm2.g2x(123290) + + +def test_splice_sites_noncoding(): + """ + Check whether the gene on the forward strand has the right splice + sites in n. notation. + """ + rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, + 103528, 119465, 119537, 144687, 144810, 148418, 149215] + cm = Crossmap(rna, [], 1) + assert (cm._Crossmap__crossmapping == + [1, 124, 125, 319, 320, 421, 422, 509, 510, 629, 630, 702, + 703, 826, 827, 1624]) + + +def test_splice_sites_noncoding_reverse(): + """ + Check whether the gene on the reverse strand has the right splice + sites in n. notation. + """ + rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, + 76535, 92453, 92554, 123276, 123470, 146090, 146213] + cm = Crossmap(rna, [], -1) + assert (cm._Crossmap__crossmapping == + [1624, 827, 826, 703, 702, 630, 629, 510, 509, 422, 421, 320, + 319, 125, 124, 1]) + + +def test_g2x_noncoding(): + """ + Do some g. to n. conversion checking for the gene on the forward + strand. + """ + rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, + 103528, 119465, 119537, 144687, 144810, 148418, 149215] + cm = Crossmap(rna, [], 1) + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(5001)) == '1-u1' + assert cm.tuple2string(cm.g2x(5001)) == '-1' + assert cm.tuple2string(cm.g2x(5002)) == '1' + assert cm.tuple2string(cm.g2x(5126)) == '124+1' + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(149216)) == '1624+d1' + assert cm.tuple2string(cm.g2x(149216)) == '*1' + + +def test_g2x_noncoding_reverse(): + """ + Do some g. to n. conversion checking for the gene on the reverse + strand. + """ + rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, + 76535, 92453, 92554, 123276, 123470, 146090, 146213] + cm = Crossmap(rna, [], -1) + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(146214)) == '1-u1' + assert cm.tuple2string(cm.g2x(146214)) == '-1' + assert cm.tuple2string(cm.g2x(146213)) == '1' + assert cm.tuple2string(cm.g2x(146089)) == '124+1' + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(1999)) == '1624+d1' + assert cm.tuple2string(cm.g2x(1999)) == '*1' + + +def test_x2g_noncoding(): + """ + Do some n. to g. conversion checking for the gene on the forward + strand. + """ + rna = [5002, 5125, 27745, 27939, 58661, 58762, 74680, 74767, 103409, + 103528, 119465, 119537, 144687, 144810, 148418, 149215] + cm = Crossmap(rna, [], 1) + assert cm.x2g(1, -1) == 5001 + assert cm.x2g(1, 0) == 5002 + assert cm.x2g(124, 1) == 5126 + assert cm.x2g(1624, 1) == 149216 + + +def test_x2g_noncoding_reverse(): + """ + Do some n. to g. conversion checking for the gene on the reverse + strand. + """ + rna = [2000, 2797, 6405, 6528, 31678, 31750, 47687, 47806, 76448, + 76535, 92453, 92554, 123276, 123470, 146090, 146213] + cm = Crossmap(rna, [], -1) + assert cm.x2g(1, -1) == 146214 + assert cm.x2g(1, 0) == 146213 + assert cm.x2g(124, 1) == 146089 + assert cm.x2g(1624, 1) == 1999 + + +def test_cds_one_exon(): + """ + Test a gene that has a CDS that lies entirely in one exon. + """ + rna = [1, 80, 81, 3719] + cds = [162, 2123] + cm = Crossmap(rna, cds, 1) + assert cm._Crossmap__crossmapping == [-161, -82, -81, 3558] + assert cm.x2g(1, 0) == 162 + assert cm.tuple2string(cm.g2x(2123)) == '1962' + assert cm.tuple2string(cm.g2x(2124)) == '*1' + + +def test_cds_start_on_splice_site(): + """ + Test a gene that has a CDS that starts on an exon splice site. + """ + rna = [23755059, 23755214, 23777833, 23778028, 23808749, 23808851, + 23824768, 23824856, 23853497, 23853617, 23869553, 23869626, + 23894775, 23894899, 23898506, 23899304] + cds = [23777833, 23898680] + cm = Crossmap(rna, cds, 1) + assert (cm._Crossmap__crossmapping == + [-156, -1, 1, 196, 197, 299, 300, 388, 389, 509, 510, 583, + 584, 708, 709, 1507]) + assert cm.x2g(1, 0) == 23777833 + # Fix for r536: disable the -u and +d convention. + # assert cm.tuple2string(cm.g2x(2123)) == '-156-u23752936' + # assert cm.tuple2string(cm.g2x(2124)) == '-156-u23752935' + assert cm.tuple2string(cm.g2x(2123)) == '-23753092' + assert cm.tuple2string(cm.g2x(2124)) == '-23753091' + + +def test_cds_start_on_splice_site_reverse(): + """ + Test a gene on the reverse strand that has a CDS that starts on an + exon splice site. + """ + rna = [23777833, 23778028, 23808749, 23808851, 23824768, 23824856, + 23853497, 23853617, 23869553, 23869626, 23894775, 23894899, + 23898506, 23899304] + cds = [23755214, 23778028] + cm = Crossmap(rna, cds, -1) + assert (cm._Crossmap__crossmapping == + [196, 1, -1, -103, -104, -192, -193, -313, -314, -387, -388, + -512, -513, -1311]) + + +def test_cds_start_on_splice_site_other(): + """ + Test a gene that has a CDS that starts on an other exon splice site. + """ + rna = [23755059, 23755214, 23777833, 23778028, 23808749, 23808851, + 23824768, 23824856, 23853497, 23853617, 23869553, 23869626, + 23894775, 23894899, 23898506, 23899304] + cds = [23755214, 23898680] + cm = Crossmap(rna, cds, 1) + assert (cm._Crossmap__crossmapping == + [-155, 1, 2, 197, 198, 300, 301, 389, 390, 510, 511, 584, 585, + 709, 710, 1508]) + + +def test_cds_start_on_splice_site_other_reverse(): + """ + Test a gene on the reverse strand that has a CDS that starts on an + other exon splice site. + """ + rna = [23777833, 23778028, 23808749, 23808851, 23824768, 23824856, + 23853497, 23853617, 23869553, 23869626, 23894775, 23894899, + 23898506, 23899304] + cds = [23755214, 23808749] + cm = Crossmap(rna, cds, -1) + assert (cm._Crossmap__crossmapping == + [197, 2, 1, -102, -103, -191, -192, -312, -313, -386, -387, + -511, -512, -1310]) + + +def test_cds_start_on_transcript_start(): + """ + Upstream correction (forward) for CDS start at the start of + transcript. + """ + rna = [23777833, 23778028, 23808749, 23808851, 23824768, 23824856, + 23853497, 23853617, 23869553, 23869626, 23894775, 23894899, + 23898506, 23899304] + cds = [23777833, 23899304] + cm = Crossmap(rna, cds, 1) + assert cm.x2g(-1, 0) == cm.x2g(1, -1) + + +def test_cds_start_on_transcript_start_reverse(): + """ + Upstream correction (reverse) for CDS start at the start of + transcript. + """ + rna = [23777833, 23778028, 23808749, 23808851, 23824768, 23824856, + 23853497, 23853617, 23869553, 23869626, 23894775, 23894899, + 23898506, 23899304] + cds = [23777833, 23899304] + cm = Crossmap(rna, cds, -1) + assert cm.x2g(-1, 0) == cm.x2g(1, -1) + + +def test_cds_is_exon(): + """ + Gene where CDS is exactly one exon. + """ + rna = [27745, 27939, 58661, 58762, 74680, 74767] + cds = [58661, 58762] + cm = Crossmap(rna, cds, 1) + assert cm._Crossmap__crossmapping == [-195, -1, 1, 102, 103, 190] + + +def test_cds_is_exon_reverse(): + """ + Gene on the reverse strand where CDS is exactly one exon. + """ + rna = [27745, 27939, 58661, 58762, 74680, 74767] + cds = [58661, 58762] + cm = Crossmap(rna, cds, -1) + assert cm._Crossmap__crossmapping == [297, 103, 102, 1, -1, -88] diff --git a/tests/test_db_queries.py b/tests/test_db_queries.py index 2f4b2d5e..6eeb4492 100644 --- a/tests/test_db_queries.py +++ b/tests/test_db_queries.py @@ -5,63 +5,55 @@ Tests for the mutalyzer.db.queries module. from __future__ import unicode_literals -#import logging; logging.basicConfig() +import pytest from mutalyzer.db import queries -from fixtures import database, cache -from utils import MutalyzerTest -from utils import fix +pytestmark = [ + pytest.mark.usefixtures('references'), + pytest.mark.parametrize('references', [['MARK1']], indirect=True) +] -class TestMutator(MutalyzerTest): + +def test_get_transcript_protein_link(): + """ + Query a transcript-protein link by transcript. + """ + link = queries.get_transcript_protein_link('NM_018650') + assert link.transcript_accession == 'NM_018650' + assert link.protein_accession == 'NP_061120' + + +def test_get_transcript_protein_link_negative(): + """ + Query a negative transcript-protein link by transcript. + """ + link = queries.get_transcript_protein_link('XM_005273133') + assert link.transcript_accession == 'XM_005273133' + assert link.protein_accession is None + + +def test_get_transcript_protein_link_missing(): + """ + Query a missing transcript-protein link by transcript. + """ + link = queries.get_transcript_protein_link('NM_123456') + assert link is None + + +def test_get_transcript_protein_link_reverse(): + """ + Query a transcript-protein link by protein. + """ + link = queries.get_transcript_protein_link('NP_061120', reverse=True) + assert link.transcript_accession == 'NM_018650' + assert link.protein_accession == 'NP_061120' + + +def test_get_transcript_protein_link_reverse_missing(): """ - Test the queries module. + Query a missing transcript-protein link by protein. """ - fixtures = (database, ) - - def setup(self): - super(TestMutator, self).setup() - - @fix(cache('MARK1')) - def test_get_transcript_protein_link(self): - """ - Query a transcript-protein link by transcript. - """ - link = queries.get_transcript_protein_link('NM_018650') - assert link.transcript_accession == 'NM_018650' - assert link.protein_accession == 'NP_061120' - - @fix(cache('MARK1')) - def test_get_transcript_protein_link_negative(self): - """ - Query a negative transcript-protein link by transcript. - """ - link = queries.get_transcript_protein_link('XM_005273133') - assert link.transcript_accession == 'XM_005273133' - assert link.protein_accession is None - - @fix(cache('MARK1')) - def test_get_transcript_protein_link_missing(self): - """ - Query a missing transcript-protein link by transcript. - """ - link = queries.get_transcript_protein_link('NM_123456') - assert link is None - - @fix(cache('MARK1')) - def test_get_transcript_protein_link_reverse(self): - """ - Query a transcript-protein link by protein. - """ - link = queries.get_transcript_protein_link('NP_061120', reverse=True) - assert link.transcript_accession == 'NM_018650' - assert link.protein_accession == 'NP_061120' - - @fix(cache('MARK1')) - def test_get_transcript_protein_link_reverse_missing(self): - """ - Query a missing transcript-protein link by protein. - """ - link = queries.get_transcript_protein_link('NP_123456') - assert link is None + link = queries.get_transcript_protein_link('NP_123456') + assert link is None diff --git a/tests/test_grammar.py b/tests/test_grammar.py index dad9a9c6..06058c9a 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -5,148 +5,160 @@ Tests for the mutalyzer.grammar module. from __future__ import unicode_literals -#import logging; logging.basicConfig() -import os +import pytest -import mutalyzer from mutalyzer.grammar import Grammar -from mutalyzer.output import Output -from utils import MutalyzerTest +@pytest.fixture +def grammar(output): + return Grammar(output) -class TestGrammar(MutalyzerTest): + +@pytest.fixture +def parser(output, grammar): + def parse(description): + __tracebackhide__ = True + grammar.parse(description) + errors = output.getMessagesWithErrorCode('EPARSE') + if len(errors) > 0: + pytest.fail('failed to parse `%s`: %s' % ( + description, errors[0].description)) + return parse + + +@pytest.mark.parametrize('description', [ + 'NM_002001.2:c.[12del]', + 'NM_002001.2:c.[(12del)]', + 'NM_002001.2:c.[(12del)?]', + 'NM_002001.2:c.[(12del);(12del)]', + 'NM_002001.2:c.[(12del;12del)]', + 'NM_002001.2:c.[((12del)?;12del)?]' +]) +def test_parse_variants(parser, description): + """ + Parse some example variants. + """ + parser(description) + + +@pytest.mark.parametrize('description', [ + 'NM_002001.2:c.15_16insA', + 'NM_002001.2:c.15_16insATC', + 'NM_002001.2:c.15_16ins[A]', + 'NM_002001.2:c.15_16ins[ATC]', + 'NM_002001.2:c.15_16ins28_39', + 'NM_002001.2:c.15_16ins[28_39]', + 'NM_002001.2:c.15_16ins[28_39;A]', + 'NM_002001.2:c.15_16ins[28_39;ATC]', + 'NM_002001.2:c.15_16ins[28_39;A;ATC]', + 'NM_002001.2:c.15_16ins28_39inv', + 'NM_002001.2:c.15_16ins[28_39inv]', + 'NM_002001.2:c.15_16ins[28_39inv;A]', + 'NM_002001.2:c.15_16ins[28_39inv;ATC]', + 'NM_002001.2:c.15_16ins[28_39inv;A;ATC]' +]) +def test_parse_compound_insertion(parser, description): + """ + Parse compound insertions. + """ + parser(description) + + +@pytest.mark.parametrize('description', [ + 'NM_002001.2:c.12_17delinsA', + 'NM_002001.2:c.12_17delinsATC', + 'NM_002001.2:c.12_17delins[A]', + 'NM_002001.2:c.12_17delins[ATC]', + 'NM_002001.2:c.12_17delins28_39', + 'NM_002001.2:c.12_17delins[28_39]', + 'NM_002001.2:c.12_17delins[28_39;A]', + 'NM_002001.2:c.12_17delins[28_39;ATC]', + 'NM_002001.2:c.12_17delins[28_39;A;ATC]', + 'NM_002001.2:c.12_17delins28_39inv', + 'NM_002001.2:c.12_17delins[28_39inv]', + 'NM_002001.2:c.12_17delins[28_39inv;A]', + 'NM_002001.2:c.12_17delins[28_39inv;ATC]', + 'NM_002001.2:c.12_17delins[28_39inv;A;ATC]' +]) +def test_parse_compound_delins(parser, description): + """ + Parse compound deletion-insertions. + """ + parser(description) + + +@pytest.mark.parametrize('description', [ + 'NG_009105.1(OPN1LW):p.=', + 'NG_009105.1(OPN1LW):p.?', + 'NM_000076.2(CDKN1C):p.0', + 'NM_000076.2(CDKN1C):p.0?', + 'NG_009105.1(OPN1LW):p.(=)', + 'NM_000076.2(CDKN1C):p.(Ala123del)', + 'NM_000076.2(CDKN1C):p.(Ala123_Leu126del)', + 'NM_000076.2(CDKN1C):p.(Ala123_Leu126delinsVal)', + 'NM_000076.2(CDKN1C):p.Ala123del', + 'NM_000076.2(CDKN1C):p.Ala123_Leu126del', + 'NM_000076.2(CDKN1C):p.Ala123_Leu126delinsVal', + 'NM_000076.2(CDKN1C):p.Ala123_*317delinsVal', + 'NM_000076.2(CDKN1C):p.Ala123_X317delinsVal', + 'NM_000076.2(CDKN1C):p.Ala123delinsVal', + 'NM_000076.2(CDKN1C):p.Ala123delinsValPro', + 'NM_000076.2(CDKN1C):p.Ala123delinsVP', + 'NM_000076.2(CDKN1C):p.Ala123fs', + 'NM_000076.2(CDKN1C_i001):p.(Glu124Serfs*148)', + 'NM_000076.2(CDKN1C_i001):p.(Glu124SerfsX148)', + 'NM_000076.2(CDKN1C_i001):p.(E124Sfs*148)', + 'NM_000076.2(CDKN1C_i001):p.(E124SfsX148)', + 'NG_009105.1(OPN1LW):p.Met1Leu', + 'NP_064445.1(OPN1LW):p.Met1?', + 'NP_064445.1(OPN1LW):p.M1?', + 'NP_064445.1:p.Gln16del', + 'NP_064445.1:p.Gln16dup', + 'NP_064445.1:p.Gln3del', + 'NP_064445.1:p.Q16del', + 'NP_064445.1:p.Q16dup', + 'NP_064445.1:p.Q16*', + 'NP_064445.1:p.Q16X', + 'NG_009105.1:p.Gln3Leu', + 'NG_009105.1(OPN1LW):p.Gln3Leu', + 'NG_009105.1(OPN1LW_i1):p.Gln3Leu', + 'NG_009105.1(OPN1LW_v1):p.Gln3Leu', + 'NG_009105.1(OPN1LW):p.Gln3_Gln4insLeu', + 'NG_009105.1(OPN1LW):p.Gln3_Gln4insGln', + 'NG_009105.1(OPN1LW):p.Gln3_Gln4dup', + 'NG_009105.1(OPN1LW):p.Q3_Q4insQ', + 'NG_009105.1(OPN1LW):p.Q3_Q4insQQ', + 'NG_009105.1(OPN1LW):p.Q3_Q4dup', + 'NG_009105.1(OPN1LW):p.Gln3_Leu7del', + 'NG_009105.1(OPN1LW):p.Gln3_Leu7delinsValLeu', + 'NG_009105.1(OPN1LW):p.Gln3_Leu7delinsValPro', + 'NG_009105.1(OPN1LW):p.Gln3_Leu7delinsGlnGlnTrpSerLeu', + 'NG_009105.1(OPN1LW):p.Q3_L7delinsGlnGlnTrpSerLeu', + 'NG_009105.1(OPN1LW):p.Gln3_Leu7delinsQQWSL', + # 'NG_009105.1(OPN1LW):p.Met1AlaextMet-1', + # 'NG_009105.1(OPN1LW):p.M1AextM-1', + # 'NG_009105.1(OPN1LW):p.Gln3_Leu7[3]', + 'NG_009105.1(OPN1LW):p.Gln3_Leu7(1_6)', + 'NG_009105.1(OPN1LW):p.Gln3Leu', + 'NG_009105.1(OPN1LW):p.Gln3Leu', + # 'NM_000076.2(CDKN1C_i001):p.(*317Trpext*3)', + 'NM_000076.2(CDKN1C_i001):p.(*317TrpextX3)', + # 'NM_000076.2(CDKN1C_i001):p.(*317Cysext*1)', + 'NM_000076.2(CDKN1C_i001):p.(*317CysextX1)', + # 'NM_000076.2(CDKN1C_i001):p.(*317Cext*1)', + 'NM_000076.2(CDKN1C_i001):p.(*317CextX1)', + # 't(X;17)(DMD:p.Met1_Val1506; SGCA:p.Val250_*387)' +]) +def test_parse_protein_variants(parser, description): + """ + Parse protein variants. + """ + parser(description) + + +def test_parse_minus_in_gene_symbol(parser): """ - Test the mytalyzer.grammar module. + Gene symbol is allowed to contain a minus character. """ - def setup(self): - super(TestGrammar, self).setup() - self.output = Output(__file__) - self.grammar = Grammar(self.output) - - def _parse(self, description): - """ - Parse a variant description. - """ - self.grammar.parse(description) - assert self.output.getOutput('parseError') == [] - - def test_some_variants(self): - """ - Some example variants. - """ - self._parse('NM_002001.2:c.[12del]') - self._parse('NM_002001.2:c.[(12del)]') - self._parse('NM_002001.2:c.[(12del)?]') - self._parse('NM_002001.2:c.[(12del);(12del)]') - self._parse('NM_002001.2:c.[(12del;12del)]') - self._parse('NM_002001.2:c.[((12del)?;12del)?]') - - def test_compound_insertion(self): - """ - Some some compound insertions. - """ - self._parse('NM_002001.2:c.15_16insA') - self._parse('NM_002001.2:c.15_16insATC') - self._parse('NM_002001.2:c.15_16ins[A]') - self._parse('NM_002001.2:c.15_16ins[ATC]') - self._parse('NM_002001.2:c.15_16ins28_39') - self._parse('NM_002001.2:c.15_16ins[28_39]') - self._parse('NM_002001.2:c.15_16ins[28_39;A]') - self._parse('NM_002001.2:c.15_16ins[28_39;ATC]') - self._parse('NM_002001.2:c.15_16ins[28_39;A;ATC]') - self._parse('NM_002001.2:c.15_16ins28_39inv') - self._parse('NM_002001.2:c.15_16ins[28_39inv]') - self._parse('NM_002001.2:c.15_16ins[28_39inv;A]') - self._parse('NM_002001.2:c.15_16ins[28_39inv;ATC]') - self._parse('NM_002001.2:c.15_16ins[28_39inv;A;ATC]') - - def test_compound_delins(self): - """ - Some some compound deletion-insertions. - """ - self._parse('NM_002001.2:c.12_17delinsA') - self._parse('NM_002001.2:c.12_17delinsATC') - self._parse('NM_002001.2:c.12_17delins[A]') - self._parse('NM_002001.2:c.12_17delins[ATC]') - self._parse('NM_002001.2:c.12_17delins28_39') - self._parse('NM_002001.2:c.12_17delins[28_39]') - self._parse('NM_002001.2:c.12_17delins[28_39;A]') - self._parse('NM_002001.2:c.12_17delins[28_39;ATC]') - self._parse('NM_002001.2:c.12_17delins[28_39;A;ATC]') - self._parse('NM_002001.2:c.12_17delins28_39inv') - self._parse('NM_002001.2:c.12_17delins[28_39inv]') - self._parse('NM_002001.2:c.12_17delins[28_39inv;A]') - self._parse('NM_002001.2:c.12_17delins[28_39inv;ATC]') - self._parse('NM_002001.2:c.12_17delins[28_39inv;A;ATC]') - - def test_protein_variants(self): - """ - Some protein variants. - """ - self._parse('NG_009105.1(OPN1LW):p.=') - self._parse('NG_009105.1(OPN1LW):p.?') - self._parse('NM_000076.2(CDKN1C):p.0') - self._parse('NM_000076.2(CDKN1C):p.0?') - self._parse('NG_009105.1(OPN1LW):p.(=)') - self._parse('NM_000076.2(CDKN1C):p.(Ala123del)') - self._parse('NM_000076.2(CDKN1C):p.(Ala123_Leu126del)') - self._parse('NM_000076.2(CDKN1C):p.(Ala123_Leu126delinsVal)') - self._parse('NM_000076.2(CDKN1C):p.Ala123del') - self._parse('NM_000076.2(CDKN1C):p.Ala123_Leu126del') - self._parse('NM_000076.2(CDKN1C):p.Ala123_Leu126delinsVal') - self._parse('NM_000076.2(CDKN1C):p.Ala123_*317delinsVal') - self._parse('NM_000076.2(CDKN1C):p.Ala123_X317delinsVal') - self._parse('NM_000076.2(CDKN1C):p.Ala123delinsVal') - self._parse('NM_000076.2(CDKN1C):p.Ala123delinsValPro') - self._parse('NM_000076.2(CDKN1C):p.Ala123delinsVP') - self._parse('NM_000076.2(CDKN1C):p.Ala123fs') - self._parse('NM_000076.2(CDKN1C_i001):p.(Glu124Serfs*148)') - self._parse('NM_000076.2(CDKN1C_i001):p.(Glu124SerfsX148)') - self._parse('NM_000076.2(CDKN1C_i001):p.(E124Sfs*148)') - self._parse('NM_000076.2(CDKN1C_i001):p.(E124SfsX148)') - self._parse('NG_009105.1(OPN1LW):p.Met1Leu') - self._parse('NP_064445.1(OPN1LW):p.Met1?') - self._parse('NP_064445.1(OPN1LW):p.M1?') - self._parse('NP_064445.1:p.Gln16del') - self._parse('NP_064445.1:p.Gln16dup') - self._parse('NP_064445.1:p.Gln3del') - self._parse('NP_064445.1:p.Q16del') - self._parse('NP_064445.1:p.Q16dup') - self._parse('NP_064445.1:p.Q16*') - self._parse('NP_064445.1:p.Q16X') - self._parse('NG_009105.1:p.Gln3Leu') - self._parse('NG_009105.1(OPN1LW):p.Gln3Leu') - self._parse('NG_009105.1(OPN1LW_i1):p.Gln3Leu') - self._parse('NG_009105.1(OPN1LW_v1):p.Gln3Leu') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4insLeu') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4insGln') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4dup') - self._parse('NG_009105.1(OPN1LW):p.Q3_Q4insQ') - self._parse('NG_009105.1(OPN1LW):p.Q3_Q4insQQ') - self._parse('NG_009105.1(OPN1LW):p.Q3_Q4dup') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7del') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsValLeu') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsValPro') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsGlnGlnTrpSerLeu') - self._parse('NG_009105.1(OPN1LW):p.Q3_L7delinsGlnGlnTrpSerLeu') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsQQWSL') - #self._parse('NG_009105.1(OPN1LW):p.Met1AlaextMet-1') - #self._parse('NG_009105.1(OPN1LW):p.M1AextM-1') - #self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7[3]') - self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7(1_6)') - self._parse('NG_009105.1(OPN1LW):p.Gln3Leu') - self._parse('NG_009105.1(OPN1LW):p.Gln3Leu') - #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Trpext*3)') - self._parse('NM_000076.2(CDKN1C_i001):p.(*317TrpextX3)') - #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Cysext*1)') - self._parse('NM_000076.2(CDKN1C_i001):p.(*317CysextX1)') - #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Cext*1)') - self._parse('NM_000076.2(CDKN1C_i001):p.(*317CextX1)') - #self._parse('t(X;17)(DMD:p.Met1_Val1506; SGCA:p.Val250_*387)') - - def test_minus_in_gene_symbol(self): - """ - Gene symbol is allowed to contain a minus character. - """ - self._parse('UD_132464528477(KRTAP2-4_v001):c.100del') + parser('UD_132464528477(KRTAP2-4_v001):c.100del') diff --git a/tests/test_mapping.py b/tests/test_mapping.py index 620f9d75..80816864 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -1,338 +1,324 @@ """ -Tests for the mapping module. +Tests for the mutalyzer.mapping module. """ from __future__ import unicode_literals -#import logging; logging.basicConfig() -from sqlalchemy import or_ +import pytest -from mutalyzer.db.models import Assembly -from mutalyzer.output import Output from mutalyzer.mapping import Converter -from fixtures import database, hg19, hg19_transcript_mappings -from utils import MutalyzerTest - - -class TestConverter(MutalyzerTest): - """ - Test the Converter class. - """ - fixtures = (database, hg19, hg19_transcript_mappings) - - def setup(self): - super(TestConverter, self).setup() - self.output = Output(__file__) - - def _converter(self, assembly_name_or_alias): - """ - Create a Converter instance for a given genome assembly. - """ - assembly = Assembly.query \ - .filter(or_(Assembly.name == assembly_name_or_alias, - Assembly.alias == assembly_name_or_alias)) \ - .one() - return Converter(assembly, self.output) - - def test_converter(self): - """ - Simple test. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_003002.2:c.274G>T') - assert genomic == 'NC_000011.9:g.111959695G>T' - coding = converter.chrom2c(genomic, 'list') - assert 'NM_003002.2:c.274G>T' in coding - # Fix for r536: disable the -u and +d convention. - #assert 'NR_028383.1:c.1-u2173C>A' in coding - assert 'NR_028383.1:n.-2173C>A' in coding - - def test_converter_non_coding(self): - """ - Test with variant on non-coding transcript. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NR_028383.1:n.-2173C>A') - assert genomic == 'NC_000011.9:g.111959695G>T' - coding = converter.chrom2c(genomic, 'list') - assert 'NM_003002.2:c.274G>T' in coding - # Fix for r536: disable the -u and +d convention. - #assert 'NR_028383.1:c.1-u2173C>A' in coding - assert 'NR_028383.1:n.-2173C>A' in coding - - def test_converter_compound(self): - """ - Test with compound variant. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_003002.2:c.[274G>T;278A>G]') - assert genomic == 'NC_000011.9:g.[111959695G>T;111959699A>G]' - coding = converter.chrom2c(genomic, 'list') - assert 'NM_003002.2:c.[274G>T;278A>G]' in coding - assert 'NR_028383.1:n.[-2173C>A;-2177T>C]' in coding - - def test_hla_cluster(self): - """ - Convert to primary assembly. - - Transcript NM_000500.5 is mapped to different chromosome locations, - but we like to just see the primary assembly mapping to chromosome 6. - - See also bug #58. - """ - # Todo: This test is bogus now that we use a fixture that has just the - # mapping to chromosome 6. However, I think we only get this mapping - # from our current source (NCBI seq_gene.md) anyway, so I'm not sure - # where we got the other mappings from in the past (but haven't - # investigated really). - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_000500.5:c.92C>T') - assert genomic == 'NC_000006.11:g.32006291C>T' - coding = converter.chrom2c(genomic, 'list') - assert 'NM_000500.5:c.92C>T' in coding - - def test_converter_del_length_reverse(self): - """ - Position converter on deletion (denoted by length) on transcripts - located on the reverse strand. - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_000022.10:g.51016285_51017117del123456789', 'list') - # Fix for r536: disable the -u and +d convention. - #assert 'NM_001145134.1:c.-138-u21_60del123456789' in coding - #assert 'NR_021492.1:c.1-u5170_1-u4338del123456789' in coding - assert 'NM_001145134.1:c.-159_60del123456789' in coding - assert 'NR_021492.1:n.-5170_-4338del123456789' in coding - - def test_S_Venkata_Suresh_Kumar(self): - """ - Test for correct mapping information on genes where CDS start or stop - is exactly on the border of an exon. - - Bug reported February 24, 2012 by S Venkata Suresh Kumar. - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_000001.10:g.115259837_115259837delT', 'list') - assert 'NM_001007553.1:c.3863delA' not in coding - assert 'NM_001007553.1:c.*953delA' in coding - assert 'NM_001130523.1:c.*953delA' in coding - - def test_S_Venkata_Suresh_Kumar_more(self): - """ - Another test for correct mapping information on genes where CDS start - or stop is exactly on the border of an exon. - - Bug reported March 21, 2012 by S Venkata Suresh Kumar. - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_000001.10:g.160012314_160012329del16', 'list') - assert 'NM_002241.4:c.-27250-7_-27242del16' not in coding - assert 'NM_002241.4:c.1-7_9del16' in coding - - def test_range_order_forward_correct(self): - """ - Just a normal position converter call, both directions. See Trac #95. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_003002.2:c.-1_274del') - assert genomic == 'NC_000011.9:g.111957631_111959695del' - coding = converter.chrom2c(genomic, 'list') - assert 'NM_003002.2:c.-1_274del' in coding - - def test_range_order_forward_incorrect_c2chrom(self): - """ - Incorrect order of a range on the forward strand. See Trac #95. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_003002.2:c.274_-1del') - assert genomic == None - erange = self.output.getMessagesWithErrorCode('ERANGE') - assert len(erange) == 1 - - def test_range_order_reverse_correct(self): - """ - Just a normal position converter call on the reverse strand, both - directions. See Trac #95. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_001162505.1:c.-1_40del') - assert genomic == 'NC_000020.10:g.48770135_48770175del' - coding = converter.chrom2c(genomic, 'list') - assert 'NM_001162505.1:c.-1_40del' in coding - - def test_range_order_reverse_incorrect_c2chrom(self): - """ - Incorrect order of a range on the reverse strand. See Trac #95. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_001162505.1:c.40_-1del') - assert genomic == None - erange = self.output.getMessagesWithErrorCode('ERANGE') - assert len(erange) == 1 - - def test_range_order_incorrect_chrom2c(self): - """ - Incorrect order of a chromosomal range. See Trac #95. - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_000011.9:g.111959695_111957631del', 'list') - assert coding == None - erange = self.output.getMessagesWithErrorCode('ERANGE') - assert len(erange) == 1 - - def test_delins_large_ins_c2chrom(self): - """ - Delins with multi-base insertion c. to chrom. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_003002.2:c.274delinsTAAA') - assert genomic == 'NC_000011.9:g.111959695delinsTAAA' - coding = converter.chrom2c(genomic, 'list') - assert 'NM_003002.2:c.274delinsTAAA' in coding - - def test_delins_large_ins_explicit_c2chrom(self): - """ - Delins with multi-base insertion and explicit deleted sequence c. to chrom. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_003002.2:c.274delGinsTAAA') - assert genomic == 'NC_000011.9:g.111959695delinsTAAA' - coding = converter.chrom2c(genomic, 'list') - assert 'NM_003002.2:c.274delinsTAAA' in coding - - def test_delins_large_ins_chrom2c(self): - """ - Delins with multi-base insertion chrom to c. - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_000011.9:g.111959695delinsTAAA', 'list') - assert 'NM_003002.2:c.274delinsTAAA' in coding - - def test_delins_large_ins_explicit_chrom2c(self): - """ - Delins with multi-base insertion and explicit deleted sequence chrom to c. - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_000011.9:g.111959695delGinsTAAA', 'list') - assert 'NM_003002.2:c.274delinsTAAA' in coding - - def test_chrm_chrom2c(self): - """ - Mitochondrial m. to c. - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_012920.1:m.12030del', 'list') - assert 'NC_012920.1(ND4_v001):c.1271del' in coding - - def test_chrm_name_chrom2c(self): - """ - Mitochondrial m. (by chromosome name) to c. - """ - converter = self._converter('hg19') - variant = converter.correctChrVariant('chrM:m.12030del') - coding = converter.chrom2c(variant, 'list') - assert 'NC_012920.1(ND4_v001):c.1271del' in coding - - def test_chrm_c2chrom(self): - """ - Mitochondrial c. to m. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NC_012920.1(ND4_v001):c.1271del') - assert genomic == 'NC_012920.1:m.12030del' - - def test_nm_without_selector_chrom2c(self): - """ - NM reference without transcript selection c. to g. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_017780.2:c.109A>T') - assert genomic == 'NC_000008.10:g.61654100A>T' - - def test_nm_with_selector_chrom2c(self): - """ - NM reference with transcript selection c. to g. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_017780.2(CHD7_v001):c.109A>T') - assert genomic == 'NC_000008.10:g.61654100A>T' - - def test_nm_c2chrom_no_selector(self): - """ - To NM reference should never result in transcript selection. - """ - converter = self._converter('hg19') - variant = converter.correctChrVariant('NC_000008.10:g.61654100A>T') - coding = converter.chrom2c(variant, 'list') - assert 'NM_017780.2:c.109A>T' in coding - - def test_incorrect_selector_c2chrom(self): - """ - Incorrect selector. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T') - erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') - assert len(erange) == 1 - - def test_incorrect_selector_version_c2chrom(self): - """ - Incorrect selector version. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_017780.2(CHD7_v002):c.109A>T') - erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') - assert len(erange) == 1 - - def test_no_selector_version_c2chrom(self): - """ - Selector but no selector version. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_017780.2(CHD7):c.109A>T') - assert genomic == 'NC_000008.10:g.61654100A>T' - - def test_incorrect_selector_no_selector_version_c2chrom(self): - """ - Incorrect selector, no selector version. - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T') - erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') - assert len(erange) == 1 - - def test_ins_seq_chrom2c(self): - """ - Insertion of a sequence (chrom2c). - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_000011.9:g.111957482_111957483insGAT', 'list') - assert 'NM_003002.2:c.-150_-149insGAT' in coding - assert 'NM_012459.2:c.10_11insATC' in coding - - def test_ins_seq_seq(self): - """ - Insertion of two sequences (chrom2c). - """ - converter = self._converter('hg19') - coding = converter.chrom2c('NC_000011.9:g.111957482_111957483ins[GAT;AAA]', 'list') - assert 'NM_003002.2:c.-150_-149ins[GAT;AAA]' in coding - assert 'NM_012459.2:c.10_11ins[TTT;ATC]' in coding - - def test_ins_seq_c2chrom_reverse(self): - """ - Insertion of a sequence on reverse strand (c2chrom). - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_012459.2:c.10_11insATC') - assert genomic == 'NC_000011.9:g.111957482_111957483insGAT' - - def test_ins_seq_seq_c2chrom_reverse(self): - """ - Insertion of two sequences on reverse strand (c2chrom). - """ - converter = self._converter('hg19') - genomic = converter.c2chrom('NM_012459.2:c.10_11ins[TTT;ATC]') - assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]' + +pytestmark = pytest.mark.usefixtures('hg19_transcript_mappings') + + +@pytest.fixture +def converter(output, hg19): + return Converter(hg19, output) + + +def test_converter(converter): + """ + Simple test. + """ + genomic = converter.c2chrom('NM_003002.2:c.274G>T') + assert genomic == 'NC_000011.9:g.111959695G>T' + coding = converter.chrom2c(genomic, 'list') + assert 'NM_003002.2:c.274G>T' in coding + # Fix for r536: disable the -u and +d convention. + # assert 'NR_028383.1:c.1-u2173C>A' in coding + assert 'NR_028383.1:n.-2173C>A' in coding + + +def test_converter_non_coding(converter): + """ + Test with variant on non-coding transcript. + """ + genomic = converter.c2chrom('NR_028383.1:n.-2173C>A') + assert genomic == 'NC_000011.9:g.111959695G>T' + coding = converter.chrom2c(genomic, 'list') + assert 'NM_003002.2:c.274G>T' in coding + # Fix for r536: disable the -u and +d convention. + # assert 'NR_028383.1:c.1-u2173C>A' in coding + assert 'NR_028383.1:n.-2173C>A' in coding + + +def test_converter_compound(converter): + """ + Test with compound variant. + """ + genomic = converter.c2chrom('NM_003002.2:c.[274G>T;278A>G]') + assert genomic == 'NC_000011.9:g.[111959695G>T;111959699A>G]' + coding = converter.chrom2c(genomic, 'list') + assert 'NM_003002.2:c.[274G>T;278A>G]' in coding + assert 'NR_028383.1:n.[-2173C>A;-2177T>C]' in coding + + +def test_hla_cluster(converter): + """ + Convert to primary assembly. + + Transcript NM_000500.5 is mapped to different chromosome locations, + but we like to just see the primary assembly mapping to chromosome 6. + + See also bug #58. + """ + # Todo: This test is bogus now that we use a fixture that has just the + # mapping to chromosome 6. However, I think we only get this mapping + # from our current source (NCBI seq_gene.md) anyway, so I'm not sure + # where we got the other mappings from in the past (but haven't + # investigated really). + genomic = converter.c2chrom('NM_000500.5:c.92C>T') + assert genomic == 'NC_000006.11:g.32006291C>T' + coding = converter.chrom2c(genomic, 'list') + assert 'NM_000500.5:c.92C>T' in coding + + +def test_converter_del_length_reverse(converter): + """ + Position converter on deletion (denoted by length) on transcripts + located on the reverse strand. + """ + coding = converter.chrom2c( + 'NC_000022.10:g.51016285_51017117del123456789', 'list') + # Fix for r536: disable the -u and +d convention. + # assert 'NM_001145134.1:c.-138-u21_60del123456789' in coding + # assert 'NR_021492.1:c.1-u5170_1-u4338del123456789' in coding + assert 'NM_001145134.1:c.-159_60del123456789' in coding + assert 'NR_021492.1:n.-5170_-4338del123456789' in coding + + +def test_S_Venkata_Suresh_Kumar(converter): + """ + Test for correct mapping information on genes where CDS start or stop + is exactly on the border of an exon. + + Bug reported February 24, 2012 by S Venkata Suresh Kumar. + """ + coding = converter.chrom2c( + 'NC_000001.10:g.115259837_115259837delT', 'list') + assert 'NM_001007553.1:c.3863delA' not in coding + assert 'NM_001007553.1:c.*953delA' in coding + assert 'NM_001130523.1:c.*953delA' in coding + + +def test_S_Venkata_Suresh_Kumar_more(converter): + """ + Another test for correct mapping information on genes where CDS start + or stop is exactly on the border of an exon. + + Bug reported March 21, 2012 by S Venkata Suresh Kumar. + """ + coding = converter.chrom2c( + 'NC_000001.10:g.160012314_160012329del16', 'list') + assert 'NM_002241.4:c.-27250-7_-27242del16' not in coding + assert 'NM_002241.4:c.1-7_9del16' in coding + + +def test_range_order_forward_correct(converter): + """ + Just a normal position converter call, both directions. See Trac #95. + """ + genomic = converter.c2chrom('NM_003002.2:c.-1_274del') + assert genomic == 'NC_000011.9:g.111957631_111959695del' + coding = converter.chrom2c(genomic, 'list') + assert 'NM_003002.2:c.-1_274del' in coding + + +def test_range_order_forward_incorrect_c2chrom(output, converter): + """ + Incorrect order of a range on the forward strand. See Trac #95. + """ + genomic = converter.c2chrom('NM_003002.2:c.274_-1del') + assert genomic is None + erange = output.getMessagesWithErrorCode('ERANGE') + assert len(erange) == 1 + + +def test_range_order_reverse_correct(converter): + """ + Just a normal position converter call on the reverse strand, both + directions. See Trac #95. + """ + genomic = converter.c2chrom('NM_001162505.1:c.-1_40del') + assert genomic == 'NC_000020.10:g.48770135_48770175del' + coding = converter.chrom2c(genomic, 'list') + assert 'NM_001162505.1:c.-1_40del' in coding + + +def test_range_order_reverse_incorrect_c2chrom(output, converter): + """ + Incorrect order of a range on the reverse strand. See Trac #95. + """ + genomic = converter.c2chrom('NM_001162505.1:c.40_-1del') + assert genomic is None + erange = output.getMessagesWithErrorCode('ERANGE') + assert len(erange) == 1 + + +def test_range_order_incorrect_chrom2c(output, converter): + """ + Incorrect order of a chromosomal range. See Trac #95. + """ + coding = converter.chrom2c('NC_000011.9:g.111959695_111957631del', 'list') + assert coding is None + erange = output.getMessagesWithErrorCode('ERANGE') + assert len(erange) == 1 + + +def test_delins_large_ins_c2chrom(converter): + """ + Delins with multi-base insertion c. to chrom. + """ + genomic = converter.c2chrom('NM_003002.2:c.274delinsTAAA') + assert genomic == 'NC_000011.9:g.111959695delinsTAAA' + coding = converter.chrom2c(genomic, 'list') + assert 'NM_003002.2:c.274delinsTAAA' in coding + + +def test_delins_large_ins_explicit_c2chrom(converter): + """ + Delins with multi-base insertion and explicit deleted sequence c. to chrom. + """ + genomic = converter.c2chrom('NM_003002.2:c.274delGinsTAAA') + assert genomic == 'NC_000011.9:g.111959695delinsTAAA' + coding = converter.chrom2c(genomic, 'list') + assert 'NM_003002.2:c.274delinsTAAA' in coding + + +def test_delins_large_ins_chrom2c(converter): + """ + Delins with multi-base insertion chrom to c. + """ + coding = converter.chrom2c('NC_000011.9:g.111959695delinsTAAA', 'list') + assert 'NM_003002.2:c.274delinsTAAA' in coding + + +def test_delins_large_ins_explicit_chrom2c(converter): + """ + Delins with multi-base insertion and explicit deleted sequence chrom to c. + """ + coding = converter.chrom2c('NC_000011.9:g.111959695delGinsTAAA', 'list') + assert 'NM_003002.2:c.274delinsTAAA' in coding + + +def test_chrm_chrom2c(converter): + """ + Mitochondrial m. to c. + """ + coding = converter.chrom2c('NC_012920.1:m.12030del', 'list') + assert 'NC_012920.1(ND4_v001):c.1271del' in coding + + +def test_chrm_name_chrom2c(converter): + """ + Mitochondrial m. (by chromosome name) to c. + """ + variant = converter.correctChrVariant('chrM:m.12030del') + coding = converter.chrom2c(variant, 'list') + assert 'NC_012920.1(ND4_v001):c.1271del' in coding + + +def test_chrm_c2chrom(converter): + """ + Mitochondrial c. to m. + """ + genomic = converter.c2chrom('NC_012920.1(ND4_v001):c.1271del') + assert genomic == 'NC_012920.1:m.12030del' + + +def test_nm_without_selector_chrom2c(converter): + """ + NM reference without transcript selection c. to g. + """ + genomic = converter.c2chrom('NM_017780.2:c.109A>T') + assert genomic == 'NC_000008.10:g.61654100A>T' + + +def test_nm_with_selector_chrom2c(converter): + """ + NM reference with transcript selection c. to g. + """ + genomic = converter.c2chrom('NM_017780.2(CHD7_v001):c.109A>T') + assert genomic == 'NC_000008.10:g.61654100A>T' + + +def test_nm_c2chrom_no_selector(converter): + """ + To NM reference should never result in transcript selection. + """ + variant = converter.correctChrVariant('NC_000008.10:g.61654100A>T') + coding = converter.chrom2c(variant, 'list') + assert 'NM_017780.2:c.109A>T' in coding + + +def test_incorrect_selector_c2chrom(output, converter): + """ + Incorrect selector. + """ + converter.c2chrom('NM_017780.2(CHD8):c.109A>T') + erange = output.getMessagesWithErrorCode('EACCNOTINDB') + assert len(erange) == 1 + + +def test_incorrect_selector_version_c2chrom(output, converter): + """ + Incorrect selector version. + """ + converter.c2chrom('NM_017780.2(CHD7_v002):c.109A>T') + erange = output.getMessagesWithErrorCode('EACCNOTINDB') + assert len(erange) == 1 + + +def test_no_selector_version_c2chrom(converter): + """ + Selector but no selector version. + """ + genomic = converter.c2chrom('NM_017780.2(CHD7):c.109A>T') + assert genomic == 'NC_000008.10:g.61654100A>T' + + +def test_incorrect_selector_no_selector_version_c2chrom(output, converter): + """ + Incorrect selector, no selector version. + """ + converter.c2chrom('NM_017780.2(CHD8):c.109A>T') + erange = output.getMessagesWithErrorCode('EACCNOTINDB') + assert len(erange) == 1 + + +def test_ins_seq_chrom2c(converter): + """ + Insertion of a sequence (chrom2c). + """ + coding = converter.chrom2c( + 'NC_000011.9:g.111957482_111957483insGAT', 'list') + assert 'NM_003002.2:c.-150_-149insGAT' in coding + assert 'NM_012459.2:c.10_11insATC' in coding + + +def test_ins_seq_seq(converter): + """ + Insertion of two sequences (chrom2c). + """ + coding = converter.chrom2c( + 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]', 'list') + assert 'NM_003002.2:c.-150_-149ins[GAT;AAA]' in coding + assert 'NM_012459.2:c.10_11ins[TTT;ATC]' in coding + + +def test_ins_seq_c2chrom_reverse(converter): + """ + Insertion of a sequence on reverse strand (c2chrom). + """ + genomic = converter.c2chrom('NM_012459.2:c.10_11insATC') + assert genomic == 'NC_000011.9:g.111957482_111957483insGAT' + + +def test_ins_seq_seq_c2chrom_reverse(converter): + """ + Insertion of two sequences on reverse strand (c2chrom). + """ + genomic = converter.c2chrom('NM_012459.2:c.10_11ins[TTT;ATC]') + assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]' diff --git a/tests/test_migrations.py b/tests/test_migrations.py index c367e43c..c2167ce2 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -5,8 +5,6 @@ Test database migrations. from __future__ import unicode_literals -import os - import alembic.autogenerate import alembic.command import alembic.config @@ -17,16 +15,11 @@ from sqlalchemy import create_engine, sql from mutalyzer import db -def test_migrations(): +def test_migrations(database_uri): """ Run all migrations and assert the result is up to date with the model definitions. - - We don't use `utils.MutalyzerTest` here, or `mutalyzer.db.session` in any - way for that matter, since it will bootstrap the database schema. """ - database_uri = os.getenv('MUTALYZER_TEST_DATABASE_URI', 'sqlite://') - alembic_config = alembic.config.Config('migrations/alembic.ini') engine = create_engine(database_uri) diff --git a/tests/test_mutator.py b/tests/test_mutator.py index 05e2c685..5c570419 100644 --- a/tests/test_mutator.py +++ b/tests/test_mutator.py @@ -5,1288 +5,1292 @@ Tests for the mutalyzer.mutator module. from __future__ import unicode_literals -#import logging; logging.basicConfig() -import re -import os +import pytest import random from Bio.Seq import Seq -import mutalyzer -from mutalyzer.output import Output -from mutalyzer import mutator - -from utils import MutalyzerTest - - -def _seq(length): - """ - Return random DNA sequence of given length. - """ - sequence = '' - for i in range(length): - sequence += random.choice('ACGT') - return Seq(sequence) - - -class TestMutator(MutalyzerTest): - """ - Test the mutator module. - """ - def setup(self): - super(TestMutator, self).setup() - self.output = Output(__file__) - - def _mutator(self, sequence): - """ - Create a Mutator instance for a given sequence. - """ - return mutator.Mutator(sequence, self.output) - - def test_shift_no_change(self): - """ - No change, no shifts. - """ - l = 10 - m = self._mutator(_seq(l)) - # Numbering is 1-based - for i in range(1, l + 1): - assert m.shift(i) == i - - def test_shift_del_example(self): - """ - Example of g.2del. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - assert m.shift(1) == 1 - assert m.shift(2) == 2 - assert m.shift(3) == 2 - - def test_shift_del(self): - """ - Starting from the deleted position (not included), shift -1. - """ - l = 10 - for d in range(1, l + 1): - m = self._mutator(_seq(l)) - m.deletion(d, d) - for p in range(1, d + 1): - assert m.shift(p) == p - for p in range(d + 1, l + 1): - assert m.shift(p) == p - 1 - - def test_shift_del2(self): - """ - Starting from the deleted positions (not included), shift -2. - """ - l = 10 - for d in range(1, l): - m = self._mutator(_seq(l)) - m.deletion(d, d + 1) - for p in range(1, d + 2): - assert m.shift(p) == p - for p in range(d + 2, l + 1): - assert m.shift(p) == p - 2 - - def test_shift_ins_example(self): - """ - Example of g.2_3insA. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'A') - assert m.shift(1) == 1 - assert m.shift(2) == 2 - assert m.shift(3) == 4 - - def test_shift_ins(self): - """ - Starting from the interbase insertion position, shift +1. - """ - l = 10 - for i in range(0, l + 1): - m = self._mutator(_seq(l)) - m.insertion(i, 'T') - for p in range(1, i + 1): - assert m.shift(p) == p - for p in range(i + 1, l + 1): - assert m.shift(p) == p + 1 - - def test_shift_ins2(self): - """ - Starting from the interbase insertion position, shift +2. - """ - l = 10 - for i in range(0, l + 1): - m = self._mutator(_seq(l)) - m.insertion(i, 'TT') - for p in range(1, i + 1): - assert m.shift(p) == p - for p in range(i + 1, l + 1): - assert m.shift(p) == p + 2 - - def test_shift_sites_no_change(self): - """ - No change, no shifts. - - @note: Splice sites come in pairs (acceptor and donor site) and the - numbers are the first, respectively last, position in the exon. - - So in this example we have: ---======----======-----===--- - | | | | | | - 4 9 14 19 25 27 - """ - l = 30 - sites = [4, 9, 14, 19, 25, 27] - m = self._mutator(_seq(l)) - assert m.shift_sites(sites) == sites - - def test_shift_sites_acc_del_before(self): - """ - Deletion in intron directly before exon. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(13, 13) # g.13del - assert m.shift_sites(sites) == [4, 9, 13, 16, 24, 26] - - def test_shift_sites_acc_del_after(self): - """ - Deletion at first exon position. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(14, 14) # g.14del - assert m.shift_sites(sites) == [4, 9, 14, 16, 24, 26] - - def test_shift_sites_don_del_before(self): - """ - Deletion at last exon position. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(17, 17) # g.17del - assert m.shift_sites(sites) == [4, 9, 14, 16, 24, 26] - - def test_shift_sites_don_del_after(self): - """ - Deletion in intron directly after exon. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(18, 18) # g.18del - assert m.shift_sites(sites) == [4, 9, 14, 17, 24, 26] - - def test_shift_sites_acc_del2_before(self): - """ - Deletion of 2 in intron directly before exon. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(12, 13) # g.12_13del - assert m.shift_sites(sites) == [4, 9, 12, 15, 23, 25] - - def test_shift_sites_acc_del2_on(self): - """ - Deletion of 2 in intron/exon. - - @note: This hits a splice site, so we don't really support it. - """ - return - - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(13, 14) # g.13_14del - assert m.shift_sites(sites) == [4, 9, 13, 15, 23, 25] - - def test_shift_sites_acc_del2_after(self): - """ - Deletion of 2 at first exon position. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(14, 15) # g.14_15del - assert m.shift_sites(sites) == [4, 9, 14, 15, 23, 25] - - def test_shift_sites_don_del2_before(self): - """ - Deletion of 2 at last exon positions. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(16, 17) # g.16_17del - assert m.shift_sites(sites) == [4, 9, 14, 15, 23, 25] - - def test_shift_sites_don_del2_on(self): - """ - Deletion of 2 in exon/intron. - - @note: This hits a splice site, so we don't really support it. - """ - return - - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(17, 18) # g.17_18del - assert m.shift_sites(sites) == [4, 9, 14, 16, 23, 25] - - def test_shift_sites_don_del2_after(self): - """ - Deletion of 2 in intron directly after exon. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.deletion(18, 19) # g.18_19del - assert m.shift_sites(sites) == [4, 9, 14, 17, 23, 25] - - def test_shift_sites_acc_ins_before(self): - """ - Insertion 1 position before intron/exon boundary. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(12, 'A') # g.12_13insA - assert m.shift_sites(sites) == [4, 9, 15, 18, 26, 28] - - def test_shift_sites_acc_ins_on(self): - """ - Insertion in intron/exon boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(13, 'A') # g.13_14insA - assert m.shift_sites(sites) == [4, 9, 14, 18, 26, 28] - - def test_shift_sites_first_acc_ins_on(self): - """ - Insertion in first intron/exon boundary not be included. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(3, 'A') # g.3_4insA - assert m.shift_sites(sites) == [5, 10, 15, 18, 26, 28] - - def test_shift_sites_acc_ins_after(self): - """ - Insertion 1 position after intron/exon boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(14, 'A') # g.14_15insA - assert m.shift_sites(sites) == [4, 9, 14, 18, 26, 28] - - def test_shift_sites_don_ins_before(self): - """ - Insertion 1 position before exon/intron boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(16, 'A') # g.16_17insA - assert m.shift_sites(sites) == [4, 9, 14, 18, 26, 28] - - def test_shift_sites_don_ins_on(self): - """ - Insertion in exon/intron boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(17, 'A') # g.17_18insA - assert m.shift_sites(sites) == [4, 9, 14, 18, 26, 28] - - def test_shift_sites_last_don_ins_on(self): - """ - Insertion in last exon/intron boundary should not be included. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(27, 'A') # g.27_28insA - assert m.shift_sites(sites) == [4, 9, 14, 17, 25, 27] - - def test_shift_sites_don_ins_after(self): - """ - Insertion 1 position after exon/intron boundary. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(18, 'A') # g.18_19insA - assert m.shift_sites(sites) == [4, 9, 14, 17, 26, 28] - - def test_shift_sites_acc_ins2_before(self): - """ - Insertion of 2 1 position before intron/exon boundary. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(12, 'AT') # g.12_13insAT - assert m.shift_sites(sites) == [4, 9, 16, 19, 27, 29] - - def test_shift_sites_first_acc_ins2_on(self): - """ - Insertion of 2 in last exon/intron boundary should not be included. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(3, 'AT') # g.3_4insAT - assert m.shift_sites(sites) == [6, 11, 16, 19, 27, 29] - - def test_shift_sites_acc_ins2_after(self): - """ - Insertion of 2 1 position after intron/exon boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(14, 'AT') # g.14_15insAT - assert m.shift_sites(sites) == [4, 9, 14, 19, 27, 29] - - def test_shift_sites_don_ins2_before(self): - """ - Insertion of 2 1 position before exon/intron boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(16, 'AT') # g.16_17insAT - assert m.shift_sites(sites) == [4, 9, 14, 19, 27, 29] - - def test_shift_sites_last_don_ins2_on(self): - """ - Insertion of 2 in last exon/intron boundary should not be included. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(27, 'AT') # g.27_28insAT - assert m.shift_sites(sites) == [4, 9, 14, 17, 25, 27] - - def test_shift_sites_don_ins2_after(self): - """ - Insertion of 2 1 position after exon/intron boundary. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(18, 'AT') # g.18_19insAT - assert m.shift_sites(sites) == [4, 9, 14, 17, 27, 29] - - def test_shift_sites_acc_ins3_before(self): - """ - Insertion of 3 1 position before intron/exon boundary. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(12, 'ATT') # g.12_13insATT - assert m.shift_sites(sites) == [4, 9, 17, 20, 28, 30] - - def test_shift_sites_acc_ins3_on(self): - """ - Insertion of 3 in intron/exon boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(13, 'ATT') # g.13_14insATT - assert m.shift_sites(sites) == [4, 9, 14, 20, 28, 30] - - def test_shift_sites_first_acc_ins3_on(self): - """ - Insertion of 3 in first intron/exon boundary should not be included. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(3, 'ATT') # g.3_4insATT - assert m.shift_sites(sites) == [7, 12, 17, 20, 28, 30] - - def test_shift_sites_acc_ins3_after(self): - """ - Insertion of 3 1 position after intron/exon boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(14, 'ATT') # g.14_15insATT - assert m.shift_sites(sites) == [4, 9, 14, 20, 28, 30] - - def test_shift_sites_don_ins3_before(self): - """ - Insertion of 3 1 position before exon/intron boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(16, 'ATT') # g.16_17insATT - assert m.shift_sites(sites) == [4, 9, 14, 20, 28, 30] - - def test_shift_sites_don_ins3_on(self): - """ - Insertion of 3 in exon/intron boundary. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(17, 'ATT') # g.17_18insATT - assert m.shift_sites(sites) == [4, 9, 14, 20, 28, 30] - - def test_shift_sites_last_don_ins3_on(self): - """ - Insertion of 3 in last exon/intron boundary should not be included. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(27, 'ATT') # g.27_28insATT - assert m.shift_sites(sites) == [4, 9, 14, 17, 25, 27] - - def test_shift_sites_don_ins3_after(self): - """ - Insertion of 3 1 position after exon/intron boundary. - - @note: This hits a splice site, so we don't really support it. - """ - l = 30 - sites = [4, 9, 14, 17, 25, 27] - m = self._mutator(_seq(l)) - m.insertion(18, 'ATT') # g.18_19insATT - assert m.shift_sites(sites) == [4, 9, 14, 17, 28, 30] - - def test_shift_sites_adj_del_before1(self): - """ - Adjacent exons: deletion at second-last position of first exon. - - @note: In this example we have adjacent exons (like e.g. in RNA), - which looks like this (the square brackets [ and ] are part of the - exons): - ---[====][======][========]--- - | / \ / \ | - 4 9 10 17 18 27 - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(16, 16) # g.16del - assert m.shift_sites(sites) == [4, 9, 10, 16, 17, 26] - - def test_shift_sites_adj_del_before(self): - """ - Adjacent exons: deletion at last position of first exon. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(17, 17) # g.17del - assert m.shift_sites(sites) == [4, 9, 10, 16, 17, 26] - - def test_shift_sites_adj_del_after(self): - """ - Adjacent exons: deletion at first position of second exon. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(18, 18) # g.18del - assert m.shift_sites(sites) == [4, 9, 10, 17, 18, 26] - - def test_shift_sites_adj_del_after1(self): - """ - Adjacent exons: deletion at second position of second exon. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(19, 19) # g.19del - assert m.shift_sites(sites) == [4, 9, 10, 17, 18, 26] - - def test_shift_sites_adj_ins_before(self): - """ - Adjacent exons: insertion 1 position before exon/exon boundary. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.insertion(16, 'A') # g.16_17insA - assert m.shift_sites(sites) == [4, 9, 10, 18, 19, 28] - - def test_shift_sites_adj_ins_on(self): - """ - Adjacent exons: insertion at exon/exon boundary. - - @note: This insertion could be seen as being - 1) at the end of the first exon, or - 2) at the start of the second exon. - Both would probably be 'correct', but we would like consistent - results. Therefore, we stick to the first option. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.insertion(17, 'A') # g.17_18insA - assert m.shift_sites(sites) == [4, 9, 10, 18, 19, 28] - - def test_shift_sites_adj_ins_after(self): - """ - Adjacent exons: insertion 1 position after exon/exon boundary. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.insertion(18, 'A') # g.18_19insA - assert m.shift_sites(sites) == [4, 9, 10, 17, 18, 28] - - def test_shift_sites_adj_del2_before1(self): - """ - Adjacent exons: deletion of 2 at second-last position of first exon. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(15, 16) # g.15_16del - assert m.shift_sites(sites) == [4, 9, 10, 15, 16, 25] - - def test_shift_sites_adj_del2_before(self): - """ - Adjacent exons: deletion of 2 at last position of first exon. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(16, 17) # g.16_17del - assert m.shift_sites(sites) == [4, 9, 10, 15, 16, 25] - - def test_shift_sites_adj_del2_on(self): - """ - Adjacent exons: deletion of 2 at exon/exon boundary. - - @todo: This is a special case of bug #????. Once fixed, the two - exons will be joined to one new exon. - """ - return - - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(17, 18) # g.17_18del - assert m.shift_sites(sites) == [4, 9, 10, 16, 17, 25] - - def test_shift_sites_adj_del2_after(self): - """ - Adjacent exons: deletion of 2 at first position of second exon. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(18, 19) # g.18_19del - assert m.shift_sites(sites) == [4, 9, 10, 17, 18, 25] - - def test_shift_sites_adj_del2_after1(self): - """ - Adjacent exons: deletion of 2 at second position of second exon. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.deletion(19, 20) # g.19_20del - assert m.shift_sites(sites) == [4, 9, 10, 17, 18, 25] - - def test_shift_sites_adj_ins2_before(self): - """ - Adjacent exons: insertion of 2 1 position before exon/exon boundary. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.insertion(16, 'AT') # g.16_17insAT - assert m.shift_sites(sites) == [4, 9, 10, 19, 20, 29] - - def test_shift_sites_adj_ins2_on(self): - """ - Adjacent exons: insertion of 2 at exon/exon boundary. - - @note: This insertion could be seen as being - 1) at the end of the first exon, or - 2) at the start of the second exon. - Both would probably be 'correct', but we would like consistent - results. Therefore, we stick to the first option. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.insertion(17, 'AT') # g.17_18insAT - assert m.shift_sites(sites) == [4, 9, 10, 19, 20, 29] - - def test_shift_sites_adj_ins2_after(self): - """ - Adjacent exons: insertion of 2 1 position after exon/exon boundary. - """ - l = 30 - sites = [4, 9, 10, 17, 18, 27] - m = self._mutator(_seq(l)) - m.insertion(18, 'AT') # g.18_19insAT - assert m.shift_sites(sites) == [4, 9, 10, 17, 18, 29] - - def test_del(self): - """ - Simple deletion 2del. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - assert unicode(m.mutated) == unicode(Seq('ACGATCG')) - - def test_largedel(self): - """ - Simple large deletion 2_7del. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 7) - assert unicode(m.mutated) == unicode(Seq('AG')) - - def test_ins(self): - """ - Simple insertion 2_3insA. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'A') - assert unicode(m.mutated) == unicode(Seq('ATACGATCG')) - - def test_largeins(self): - """ - Simple large insertion 2_3insATCG. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'ATCG') - assert unicode(m.mutated) == unicode(Seq('ATATCGCGATCG')) - - def test_sub(self): - """ - Simple substitution 3C>G. - """ - m = self._mutator(Seq('ATCGATCG')) - m.substitution(3, 'G') - assert unicode(m.mutated) == unicode(Seq('ATGGATCG')) - - def test_adjecent_del_sub_1(self): - """ - Deletion and substitution directly adjecent to each other [2del;3C>G]. - - See Trac #83. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - m.substitution(3, 'G') - assert unicode(m.mutated) == unicode(Seq('AGGATCG')) - - def test_adjecent_del_sub_2(self): - """ - Deletion and substitution directly adjecent to each other [3del;2T>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(3, 3) - m.substitution(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGGATCG')) - - def test_near_adjecent_del_sub_1(self): - """ - Deletion and substitution almost adjecent to each other [2del;4G>T]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - m.substitution(4, 'T') - assert unicode(m.mutated) == unicode(Seq('ACTATCG')) - - def test_near_adjecent_del_sub_2(self): - """ - Deletion and substitution almost adjecent to each other [4del;2T>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(4, 4) - m.substitution(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGCATCG')) - - def test_adjecent_largedel_sub_1(self): - """ - Large deletion and substitution directly adjecent to each other - [2_6del;7C>T]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 6) - m.substitution(7, 'T') - assert unicode(m.mutated) == unicode(Seq('ATG')) - - def test_adjecent_largedel_sub_2(self): - """ - Large deletion and substitution directly adjecent to each other - [3_7del;2T>C]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(3, 7) - m.substitution(2, 'C') - assert unicode(m.mutated) == unicode(Seq('ACG')) - - def test_near_adjecent_largedel_sub_1(self): - """ - Large deletion and substitution almost adjecent to each other [2_5del;7C>T]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 5) - m.substitution(7, 'T') - assert unicode(m.mutated) == unicode(Seq('ATTG')) - - def test_near_adjecent_largedel_sub_2(self): - """ - Large deletion and substitution almost adjecent to each other [4_7del;2T>C]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(4, 7) - m.substitution(2, 'C') - assert unicode(m.mutated) == unicode(Seq('ACCG')) - - def test_adjectent_del_ins_1(self): - """ - Deletion and insertion adjecent to each other [2del;2_3insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - m.insertion(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGCGATCG')) - - def test_adjectent_del_ins_2(self): - """ - Deletion and insertion adjecent to each other [3del;2_3insA]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(3, 3) - m.insertion(2, 'A') - assert unicode(m.mutated) == unicode(Seq('ATAGATCG')) - - def test_near_adjectent_del_ins(self): - """ - Deletion and insertion almost adjecent to each other [2del;3_4insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - m.insertion(3, 'T') - assert unicode(m.mutated) == unicode(Seq('ACTGATCG')) - - def test_adjecent_ins_sub_1(self): - """ - Insertion and substitution directly adjecent to each other - [2_3insA;3C>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'A') - m.substitution(3, 'G') - assert unicode(m.mutated) == unicode(Seq('ATAGGATCG')) - - def test_adjecent_ins_sub_2(self): - """ - Insertion and substitution directly adjecent to each other - [2_3insA;2T>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'A') - m.substitution(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGACGATCG')) - - def test_near_adjecent_ins_sub(self): - """ - Insertion and substitution almost adjecent to each other - [2_3insA;4C>T]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'A') - m.substitution(4, 'T') - assert unicode(m.mutated) == unicode(Seq('ATACTATCG')) - - def test_adjecent_largeins_sub_1(self): - """ - Large insertion and substitution directly adjecent to each other - [2_3insATCG;3C>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'ATCG') - m.substitution(3, 'G') - assert unicode(m.mutated) == unicode(Seq('ATATCGGGATCG')) - - def test_adjecent_largeins_sub_2(self): - """ - Large insertion and substitution directly adjecent to each other - [2_3insATCG;2T>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'ATCG') - m.substitution(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGATCGCGATCG')) - - def test_near_adjecent_largeins_sub(self): - """ - Large insertion and substitution almost adjecent to each other - [2_3insATCG;4C>T]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'ATCG') - m.substitution(4, 'T') - assert unicode(m.mutated) == unicode(Seq('ATATCGCTATCG')) - - def test_adjecent_del_del_1(self): - """ - Deletion and deletion directly adjecent to each other [2del;3del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - m.deletion(3, 3) - assert unicode(m.mutated) == unicode(Seq('AGATCG')) - - def test_adjecent_del_del_2(self): - """ - Deletion and deletion directly adjecent to each other [3del;2del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(3, 3) - m.deletion(2, 2) - assert unicode(m.mutated) == unicode(Seq('AGATCG')) - - def test_adjecent_delins_snp_1(self): - """ - Delins and deletion directly adjecent to each other [2delinsA;3C>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 2, 'A') - m.substitution(3, 'G') - assert unicode(m.mutated) == unicode(Seq('AAGGATCG')) - - def test_adjecent_delins_snp_2(self): - """ - Delins and deletion directly adjecent to each other [3delinsA;2T>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 3, 'A') - m.substitution(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGAGATCG')) - - def test_adjecent_largedelins_eq_snp_1(self): - """ - Large delins and deletion directly adjecent to each other - [2_6delinsAAAAA;7C>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAAAA') - m.substitution(7, 'G') - assert unicode(m.mutated) == unicode(Seq('AAAAAAGG')) - - def test_adjecent_largedelins_min_snp_1(self): - """ - Large delins (min) and deletion directly adjecent to each other - [2_6delinsAAA;7C>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAA') - m.substitution(7, 'G') - assert unicode(m.mutated) == unicode(Seq('AAAAGG')) - - def test_adjecent_largedelins_plus_snp_1(self): - """ - Large delins (plus) and deletion directly adjecent to each other - [2_6delinsAAAAAAA;7C>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAAAAAA') - m.substitution(7, 'G') - assert unicode(m.mutated) == unicode(Seq('AAAAAAAAGG')) - - def test_adjecent_largedelins_eq_snp_2(self): - """ - Large delins and deletion directly adjecent to each other - [3_7delinsAAAAA;2T>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAAAA') - m.substitution(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGAAAAAG')) - - def test_adjecent_largedelins_min_snp_2(self): - """ - Large delins (min) and deletion directly adjecent to each other - [3_7delinsAAA;2T>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAA') - m.substitution(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGAAAG')) - - def test_adjecent_largedelins_plus_snp_2(self): - """ - Large delins (plus) and deletion directly adjecent to each other - [3_7delinsAAAAAAA;2T>G]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAAAAAA') - m.substitution(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AGAAAAAAAG')) - - def test_adjecent_delins_del_1(self): - """ - Delins and deletion directly adjecent to each other [2delinsA;3del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 2, 'A') - m.deletion(3, 3) - assert unicode(m.mutated) == unicode(Seq('AAGATCG')) - - def test_adjecent_delins_del_2(self): - """ - Delins and deletion directly adjecent to each other [3delinsA;2del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 3, 'A') - m.deletion(2, 2) - assert unicode(m.mutated) == unicode(Seq('AAGATCG')) - - def test_adjecent_largedelins_eq_del_1(self): - """ - Large delins and deletion directly adjecent to each other - [2_6delinsAAAAA;7del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAAAA') - m.deletion(7, 7) - assert unicode(m.mutated) == unicode(Seq('AAAAAAG')) - - def test_adjecent_largedelins_min_del_1(self): - """ - Large delins (min) and deletion directly adjecent to each other - [2_6delinsAAA;7del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAA') - m.deletion(7, 7) - assert unicode(m.mutated) == unicode(Seq('AAAAG')) - - def test_adjecent_largedelins_plus_del_1(self): - """ - Large delins (plus) and deletion directly adjecent to each other - [2_6delinsAAAAAAA;7del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAAAAAA') - m.deletion(7, 7) - assert unicode(m.mutated) == unicode(Seq('AAAAAAAAG')) - - def test_adjecent_largedelins_eq_del_2(self): - """ - Large delins and deletion directly adjecent to each other - [3_7delinsAAAAA;2del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAAAA') - m.deletion(2, 2) - assert unicode(m.mutated) == unicode(Seq('AAAAAAG')) - - def test_adjecent_largedelins_min_del_2(self): - """ - Large delins (min) and deletion directly adjecent to each other - [3_7delinsAAA;2del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAA') - m.deletion(2, 2) - assert unicode(m.mutated) == unicode(Seq('AAAAG')) - - def test_adjecent_largedelins_plus_del_2(self): - """ - Large delins (plus) and deletion directly adjecent to each other - [3_7delinsAAAAAAA;2del]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAAAAAA') - m.deletion(2, 2) - assert unicode(m.mutated) == unicode(Seq('AAAAAAAAG')) - - def test_adjectent_delins_ins_1(self): - """ - Delins and insertion adjecent to each other [2delinsA;2_3insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 2, 'A') - m.insertion(2, 'G') - assert unicode(m.mutated) == unicode(Seq('AAGCGATCG')) - - def test_adjectent_delins_ins_2(self): - """ - Delins and insertion adjecent to each other [3delinsA;2_3insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 3, 'A') - m.insertion(2, 'G') - assert unicode(m.mutated) == unicode(Seq('ATGAGATCG')) - - def test_adjectent_largedelins_eq_ins_1(self): - """ - Large delins and insertion adjecent to each other [2_6delinsAAAAA;6_7insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAAAA') - m.insertion(6, 'G') - assert unicode(m.mutated) == unicode(Seq('AAAAAAGCG')) - - def test_adjectent_largedelins_min_ins_1(self): - """ - Large delins (min) and insertion adjecent to each other [2_6delinsAAA;6_7insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAA') - m.insertion(6, 'G') - assert unicode(m.mutated) == unicode(Seq('AAAAGCG')) - - def test_adjectent_largedelins_plus_ins_1(self): - """ - Large delins (plus) and insertion adjecent to each other [2_6delinsAAAAAAA;6_7insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAAAAAA') - m.insertion(6, 'G') - assert unicode(m.mutated) == unicode(Seq('AAAAAAAAGCG')) - - def test_adjectent_largedelins_eq_ins_2(self): - """ - Large delins and insertion adjecent to each other [3_7delinsAAAAA;2_3insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAAAA') - m.insertion(2, 'G') - assert unicode(m.mutated) == unicode(Seq('ATGAAAAAG')) - - def test_adjectent_largedelins_min_ins_2(self): - """ - Large delins (min) and insertion adjecent to each other [3_7delinsAAA;2_3insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAA') - m.insertion(2, 'G') - assert unicode(m.mutated) == unicode(Seq('ATGAAAG')) - - def test_adjectent_largedelins_plus_ins_2(self): - """ - Large delins (plus) and insertion adjecent to each other [3_7delinsAAAAAAA;2_3insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAAAAAA') - m.insertion(2, 'G') - assert unicode(m.mutated) == unicode(Seq('ATGAAAAAAAG')) - - def test_adjectent_delins_del_delins(self): - """ - Delins (deletion) and delins (SNP) adjecent to each other [2_3delinsA;4delinsT]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 3, 'A') - m.delins(4, 4, 'T') - assert unicode(m.mutated) == unicode(Seq('AATATCG')) - - def test_adjectent_largedelins_plus_delins_1(self): - """ - Large delins (plus) and delins adjecent to each other [2_6delinsAAAAAAA;7delinsT]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAAAAAA') - m.delins(7, 7, 'T') - assert unicode(m.mutated) == unicode(Seq('AAAAAAAATG')) - - def test_adjectent_largedelins_plus_delins_2(self): - """ - Large delins (plus) and delins adjecent to each other [3_7delinsAAAAAAA;2delinsC]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAAAAAA') - m.delins(2, 2, 'C') - assert unicode(m.mutated) == unicode(Seq('ACAAAAAAAG')) - - def test_adjectent_largedelins_min_delins_1(self): - """ - Large delins (min) and delins adjecent to each other [2_6delinsAAA;7delinsT]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(2, 6, 'AAA') - m.delins(7, 7, 'T') - assert unicode(m.mutated) == unicode(Seq('AAAATG')) - - def test_adjectent_largedelins_min_delins_2(self): - """ - Large delins (min) and delins adjecent to each other [3_7delinsAAA;2delinsC]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.delins(3, 7, 'AAA') - m.delins(2, 2, 'C') - assert unicode(m.mutated) == unicode(Seq('ACAAAG')) - - def test_adjectent_del_dup_1(self): - """ - Deletion and duplication adjecent to each other [2del;3dup]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - m.duplication(3, 3) - assert unicode(m.mutated) == unicode(Seq('ACCGATCG')) - - def test_adjectent_del_dup_2(self): - """ - Deletion and duplication adjecent to each other [3del;2dup]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(3, 3) - m.duplication(2, 2) - assert unicode(m.mutated) == unicode(Seq('ATTGATCG')) - - def test_adjectent_ins_dup_1(self): - """ - Insertion and duplication adjecent to each other [2_3insG;3dup]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'G') - m.duplication(3, 3) - assert unicode(m.mutated) == unicode(Seq('ATGCCGATCG')) - - def test_adjectent_ins_dup_2(self): - """ - Insertion and duplication adjecent to each other [2_3insG;2dup]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'G') - m.duplication(2, 2) - assert unicode(m.mutated) == unicode(Seq('ATTGCGATCG')) - - def test_adjectent_ins_ins_1(self): - """ - Insertion and insertion adjecent to each other [2_3insG;3_4insA]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'G') - m.insertion(3, 'A') - assert unicode(m.mutated) == unicode(Seq('ATGCAGATCG')) - - def test_adjectent_ins_ins_2(self): - """ - Insertion and insertion adjecent to each other [3_4insA;2_3insG]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(3, 'A') - m.insertion(2, 'G') - assert unicode(m.mutated) == unicode(Seq('ATGCAGATCG')) - - def test_ins_ins(self): - """ - Insertion and insertion at same position [2_3insG;2_3insA]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'G') - m.insertion(2, 'A') - assert unicode(m.mutated) in (unicode(Seq('ATGACGATCG')), unicode(Seq('ATAGCGATCG'))) - - def test_adjecent_inv_inv_1(self): - """ - Inversion and inversion directly adjecent to each other [2inv;3inv]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.inversion(2, 2) - m.inversion(3, 3) - assert unicode(m.mutated) == unicode(Seq('AAGGATCG')) - - def test_adjecent_inv_inv_2(self): - """ - Inversion and inversion directly adjecent to each other [3inv;2inv]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.inversion(3, 3) - m.inversion(2, 2) - assert unicode(m.mutated) == unicode(Seq('AAGGATCG')) - - def test_adjecent_dup_dup_1(self): - """ - Duplication and duplication directly adjecent to each other [2dup;3dup]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.duplication(2, 2) - m.duplication(3, 3) - assert unicode(m.mutated) == unicode(Seq('ATTCCGATCG')) - - def test_adjecent_dup_dup_2(self): - """ - Duplication and duplication directly adjecent to each other [3dup;2dup]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.duplication(3, 3) - m.duplication(2, 2) - assert unicode(m.mutated) == unicode(Seq('ATTCCGATCG')) - - def test_adjecent_del_inv_1(self): - """ - Deletion and inversion directly adjecent to each other [2del;3inv]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(2, 2) - m.inversion(3, 3) - assert unicode(m.mutated) == unicode(Seq('AGGATCG')) - - def test_adjecent_del_inv_2(self): - """ - Deletion and inversion directly adjecent to each other [3del;2inv]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.deletion(3, 3) - m.inversion(2, 2) - assert unicode(m.mutated) == unicode(Seq('AAGATCG')) - - def test_adjecent_ins_inv_1(self): - """ - Insertion and inversion directly adjecent to each other [2_3insG;3inv]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'G') - m.inversion(3, 3) - assert unicode(m.mutated) == unicode(Seq('ATGGGATCG')) - - def test_adjecent_ins_inv_2(self): - """ - Insertion and inversion directly adjecent to each other [2_3insG;2inv]. - """ - m = self._mutator(Seq('ATCGATCG')) - m.insertion(2, 'G') - m.inversion(2, 2) - assert unicode(m.mutated) == unicode(Seq('AAGCGATCG')) +from mutalyzer.mutator import Mutator + + +@pytest.fixture +def length(): + # Many tests depend on this being at least 30. + return 30 + + +@pytest.fixture +def sequence(length): + return Seq(''.join(random.choice('ACGT') for _ in range(length))) + + +@pytest.fixture +def mutator(output, sequence): + return Mutator(sequence, output) + + +def test_shift_no_change(length, mutator): + """ + No change, no shifts. + """ + # Numbering is 1-based + for i in range(1, length + 1): + assert mutator.shift(i) == i + + +def test_shift_del_example(mutator): + """ + Example of g.2del. + """ + mutator.deletion(2, 2) + assert mutator.shift(1) == 1 + assert mutator.shift(2) == 2 + assert mutator.shift(3) == 2 + + +@pytest.mark.parametrize('length', [10]) +@pytest.mark.parametrize('d', range(1, 11)) +def test_shift_del(length, mutator, d): + """ + Starting from the deleted position (not included), shift -1. + """ + mutator.deletion(d, d) + for p in range(1, d + 1): + assert mutator.shift(p) == p + for p in range(d + 1, length + 1): + assert mutator.shift(p) == p - 1 + + +@pytest.mark.parametrize('length', [10]) +@pytest.mark.parametrize('d', range(1, 10)) +def test_shift_del2(length, mutator, d): + """ + Starting from the deleted positions (not included), shift -2. + """ + mutator.deletion(d, d + 1) + for p in range(1, d + 2): + assert mutator.shift(p) == p + for p in range(d + 2, length + 1): + assert mutator.shift(p) == p - 2 + + +def test_shift_ins_example(mutator): + """ + Example of g.2_3insA. + """ + mutator.insertion(2, 'A') + assert mutator.shift(1) == 1 + assert mutator.shift(2) == 2 + assert mutator.shift(3) == 4 + + +@pytest.mark.parametrize('length', [10]) +@pytest.mark.parametrize('i', range(11)) +def test_shift_ins(length, mutator, i): + """ + Starting from the interbase insertion position, shift +1. + """ + mutator.insertion(i, 'T') + for p in range(1, i + 1): + assert mutator.shift(p) == p + for p in range(i + 1, length + 1): + assert mutator.shift(p) == p + 1 + + +@pytest.mark.parametrize('length', [10]) +@pytest.mark.parametrize('i', range(11)) +def test_shift_ins2(length, mutator, i): + """ + Starting from the interbase insertion position, shift +2. + """ + mutator.insertion(i, 'TT') + for p in range(1, i + 1): + assert mutator.shift(p) == p + for p in range(i + 1, length + 1): + assert mutator.shift(p) == p + 2 + + +def test_shift_sites_no_change(mutator): + """ + No change, no shifts. + + Note: Splice sites come in pairs (acceptor and donor site) and the + numbers are the first, respectively last, position in the exon. + + So in this example we have: ---======----======-----===--- + | | | | | | + 4 9 14 19 25 27 + """ + sites = [4, 9, 14, 19, 25, 27] + assert mutator.shift_sites(sites) == sites + + +def test_shift_sites_acc_del_before(mutator): + """ + Deletion in intron directly before exon. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(13, 13) # g.13del + assert mutator.shift_sites(sites) == [4, 9, 13, 16, 24, 26] + + +def test_shift_sites_acc_del_after(mutator): + """ + Deletion at first exon position. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(14, 14) # g.14del + assert mutator.shift_sites(sites) == [4, 9, 14, 16, 24, 26] + + +def test_shift_sites_don_del_before(mutator): + """ + Deletion at last exon position. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(17, 17) # g.17del + assert mutator.shift_sites(sites) == [4, 9, 14, 16, 24, 26] + + +def test_shift_sites_don_del_after(mutator): + """ + Deletion in intron directly after exon. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(18, 18) # g.18del + assert mutator.shift_sites(sites) == [4, 9, 14, 17, 24, 26] + + +def test_shift_sites_acc_del2_before(mutator): + """ + Deletion of 2 in intron directly before exon. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(12, 13) # g.12_13del + assert mutator.shift_sites(sites) == [4, 9, 12, 15, 23, 25] + + +def test_shift_sites_acc_del2_on(mutator): + """ + Deletion of 2 in intron/exon. + + @note: This hits a splice site, so we don't really support it. + """ + return + + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(13, 14) # g.13_14del + assert mutator.shift_sites(sites) == [4, 9, 13, 15, 23, 25] + + +def test_shift_sites_acc_del2_after(mutator): + """ + Deletion of 2 at first exon position. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(14, 15) # g.14_15del + assert mutator.shift_sites(sites) == [4, 9, 14, 15, 23, 25] + + +def test_shift_sites_don_del2_before(mutator): + """ + Deletion of 2 at last exon positions. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(16, 17) # g.16_17del + assert mutator.shift_sites(sites) == [4, 9, 14, 15, 23, 25] + + +def test_shift_sites_don_del2_on(mutator): + """ + Deletion of 2 in exon/intron. + + @note: This hits a splice site, so we don't really support it. + """ + return + + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(17, 18) # g.17_18del + assert mutator.shift_sites(sites) == [4, 9, 14, 16, 23, 25] + + +def test_shift_sites_don_del2_after(mutator): + """ + Deletion of 2 in intron directly after exon. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.deletion(18, 19) # g.18_19del + assert mutator.shift_sites(sites) == [4, 9, 14, 17, 23, 25] + + +def test_shift_sites_acc_ins_before(mutator): + """ + Insertion 1 position before intron/exon boundary. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(12, 'A') # g.12_13insA + assert mutator.shift_sites(sites) == [4, 9, 15, 18, 26, 28] + + +def test_shift_sites_acc_ins_on(mutator): + """ + Insertion in intron/exon boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(13, 'A') # g.13_14insA + assert mutator.shift_sites(sites) == [4, 9, 14, 18, 26, 28] + + +def test_shift_sites_first_acc_ins_on(mutator): + """ + Insertion in first intron/exon boundary not be included. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(3, 'A') # g.3_4insA + assert mutator.shift_sites(sites) == [5, 10, 15, 18, 26, 28] + + +def test_shift_sites_acc_ins_after(mutator): + """ + Insertion 1 position after intron/exon boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(14, 'A') # g.14_15insA + assert mutator.shift_sites(sites) == [4, 9, 14, 18, 26, 28] + + +def test_shift_sites_don_ins_before(mutator): + """ + Insertion 1 position before exon/intron boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(16, 'A') # g.16_17insA + assert mutator.shift_sites(sites) == [4, 9, 14, 18, 26, 28] + + +def test_shift_sites_don_ins_on(mutator): + """ + Insertion in exon/intron boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(17, 'A') # g.17_18insA + assert mutator.shift_sites(sites) == [4, 9, 14, 18, 26, 28] + + +def test_shift_sites_last_don_ins_on(mutator): + """ + Insertion in last exon/intron boundary should not be included. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(27, 'A') # g.27_28insA + assert mutator.shift_sites(sites) == [4, 9, 14, 17, 25, 27] + + +def test_shift_sites_don_ins_after(mutator): + """ + Insertion 1 position after exon/intron boundary. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(18, 'A') # g.18_19insA + assert mutator.shift_sites(sites) == [4, 9, 14, 17, 26, 28] + + +def test_shift_sites_acc_ins2_before(mutator): + """ + Insertion of 2 1 position before intron/exon boundary. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(12, 'AT') # g.12_13insAT + assert mutator.shift_sites(sites) == [4, 9, 16, 19, 27, 29] + + +def test_shift_sites_first_acc_ins2_on(mutator): + """ + Insertion of 2 in last exon/intron boundary should not be included. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(3, 'AT') # g.3_4insAT + assert mutator.shift_sites(sites) == [6, 11, 16, 19, 27, 29] + + +def test_shift_sites_acc_ins2_after(mutator): + """ + Insertion of 2 1 position after intron/exon boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(14, 'AT') # g.14_15insAT + assert mutator.shift_sites(sites) == [4, 9, 14, 19, 27, 29] + + +def test_shift_sites_don_ins2_before(mutator): + """ + Insertion of 2 1 position before exon/intron boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(16, 'AT') # g.16_17insAT + assert mutator.shift_sites(sites) == [4, 9, 14, 19, 27, 29] + + +def test_shift_sites_last_don_ins2_on(mutator): + """ + Insertion of 2 in last exon/intron boundary should not be included. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(27, 'AT') # g.27_28insAT + assert mutator.shift_sites(sites) == [4, 9, 14, 17, 25, 27] + + +def test_shift_sites_don_ins2_after(mutator): + """ + Insertion of 2 1 position after exon/intron boundary. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(18, 'AT') # g.18_19insAT + assert mutator.shift_sites(sites) == [4, 9, 14, 17, 27, 29] + + +def test_shift_sites_acc_ins3_before(mutator): + """ + Insertion of 3 1 position before intron/exon boundary. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(12, 'ATT') # g.12_13insATT + assert mutator.shift_sites(sites) == [4, 9, 17, 20, 28, 30] + + +def test_shift_sites_acc_ins3_on(mutator): + """ + Insertion of 3 in intron/exon boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(13, 'ATT') # g.13_14insATT + assert mutator.shift_sites(sites) == [4, 9, 14, 20, 28, 30] + + +def test_shift_sites_first_acc_ins3_on(mutator): + """ + Insertion of 3 in first intron/exon boundary should not be included. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(3, 'ATT') # g.3_4insATT + assert mutator.shift_sites(sites) == [7, 12, 17, 20, 28, 30] + + +def test_shift_sites_acc_ins3_after(mutator): + """ + Insertion of 3 1 position after intron/exon boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(14, 'ATT') # g.14_15insATT + assert mutator.shift_sites(sites) == [4, 9, 14, 20, 28, 30] + + +def test_shift_sites_don_ins3_before(mutator): + """ + Insertion of 3 1 position before exon/intron boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(16, 'ATT') # g.16_17insATT + assert mutator.shift_sites(sites) == [4, 9, 14, 20, 28, 30] + + +def test_shift_sites_don_ins3_on(mutator): + """ + Insertion of 3 in exon/intron boundary. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(17, 'ATT') # g.17_18insATT + assert mutator.shift_sites(sites) == [4, 9, 14, 20, 28, 30] + + +def test_shift_sites_last_don_ins3_on(mutator): + """ + Insertion of 3 in last exon/intron boundary should not be included. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(27, 'ATT') # g.27_28insATT + assert mutator.shift_sites(sites) == [4, 9, 14, 17, 25, 27] + + +def test_shift_sites_don_ins3_after(mutator): + """ + Insertion of 3 1 position after exon/intron boundary. + + @note: This hits a splice site, so we don't really support it. + """ + sites = [4, 9, 14, 17, 25, 27] + mutator.insertion(18, 'ATT') # g.18_19insATT + assert mutator.shift_sites(sites) == [4, 9, 14, 17, 28, 30] + + +def test_shift_sites_adj_del_before1(mutator): + """ + Adjacent exons: deletion at second-last position of first exon. + + @note: In this example we have adjacent exons (like e.g. in RNA), + which looks like this (the square brackets [ and ] are part of the + exons): + ---[====][======][========]--- + | / \ / \ | + 4 9 10 17 18 27 + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(16, 16) # g.16del + assert mutator.shift_sites(sites) == [4, 9, 10, 16, 17, 26] + + +def test_shift_sites_adj_del_before(mutator): + """ + Adjacent exons: deletion at last position of first exon. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(17, 17) # g.17del + assert mutator.shift_sites(sites) == [4, 9, 10, 16, 17, 26] + + +def test_shift_sites_adj_del_after(mutator): + """ + Adjacent exons: deletion at first position of second exon. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(18, 18) # g.18del + assert mutator.shift_sites(sites) == [4, 9, 10, 17, 18, 26] + + +def test_shift_sites_adj_del_after1(mutator): + """ + Adjacent exons: deletion at second position of second exon. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(19, 19) # g.19del + assert mutator.shift_sites(sites) == [4, 9, 10, 17, 18, 26] + + +def test_shift_sites_adj_ins_before(mutator): + """ + Adjacent exons: insertion 1 position before exon/exon boundary. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.insertion(16, 'A') # g.16_17insA + assert mutator.shift_sites(sites) == [4, 9, 10, 18, 19, 28] + + +def test_shift_sites_adj_ins_on(mutator): + """ + Adjacent exons: insertion at exon/exon boundary. + + @note: This insertion could be seen as being + 1) at the end of the first exon, or + 2) at the start of the second exon. + Both would probably be 'correct', but we would like consistent + results. Therefore, we stick to the first option. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.insertion(17, 'A') # g.17_18insA + assert mutator.shift_sites(sites) == [4, 9, 10, 18, 19, 28] + + +def test_shift_sites_adj_ins_after(mutator): + """ + Adjacent exons: insertion 1 position after exon/exon boundary. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.insertion(18, 'A') # g.18_19insA + assert mutator.shift_sites(sites) == [4, 9, 10, 17, 18, 28] + + +def test_shift_sites_adj_del2_before1(mutator): + """ + Adjacent exons: deletion of 2 at second-last position of first exon. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(15, 16) # g.15_16del + assert mutator.shift_sites(sites) == [4, 9, 10, 15, 16, 25] + + +def test_shift_sites_adj_del2_before(mutator): + """ + Adjacent exons: deletion of 2 at last position of first exon. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(16, 17) # g.16_17del + assert mutator.shift_sites(sites) == [4, 9, 10, 15, 16, 25] + + +def test_shift_sites_adj_del2_on(mutator): + """ + Adjacent exons: deletion of 2 at exon/exon boundary. + + @todo: This is a special case of bug #????. Once fixed, the two + exons will be joined to one new exon. + """ + return + + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(17, 18) # g.17_18del + assert mutator.shift_sites(sites) == [4, 9, 10, 16, 17, 25] + + +def test_shift_sites_adj_del2_after(mutator): + """ + Adjacent exons: deletion of 2 at first position of second exon. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(18, 19) # g.18_19del + assert mutator.shift_sites(sites) == [4, 9, 10, 17, 18, 25] + + +def test_shift_sites_adj_del2_after1(mutator): + """ + Adjacent exons: deletion of 2 at second position of second exon. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.deletion(19, 20) # g.19_20del + assert mutator.shift_sites(sites) == [4, 9, 10, 17, 18, 25] + + +def test_shift_sites_adj_ins2_before(mutator): + """ + Adjacent exons: insertion of 2 1 position before exon/exon boundary. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.insertion(16, 'AT') # g.16_17insAT + assert mutator.shift_sites(sites) == [4, 9, 10, 19, 20, 29] + + +def test_shift_sites_adj_ins2_on(mutator): + """ + Adjacent exons: insertion of 2 at exon/exon boundary. + + @note: This insertion could be seen as being + 1) at the end of the first exon, or + 2) at the start of the second exon. + Both would probably be 'correct', but we would like consistent + results. Therefore, we stick to the first option. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.insertion(17, 'AT') # g.17_18insAT + assert mutator.shift_sites(sites) == [4, 9, 10, 19, 20, 29] + + +def test_shift_sites_adj_ins2_after(mutator): + """ + Adjacent exons: insertion of 2 1 position after exon/exon boundary. + """ + sites = [4, 9, 10, 17, 18, 27] + mutator.insertion(18, 'AT') # g.18_19insAT + assert mutator.shift_sites(sites) == [4, 9, 10, 17, 18, 29] + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_del(mutator): + """ + Simple deletion 2del. + """ + mutator.deletion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('ACGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_largedel(mutator): + """ + Simple large deletion 2_7del. + """ + mutator.deletion(2, 7) + assert unicode(mutator.mutated) == unicode(Seq('AG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_ins(mutator): + """ + Simple insertion 2_3insA. + """ + mutator.insertion(2, 'A') + assert unicode(mutator.mutated) == unicode(Seq('ATACGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_largeins(mutator): + """ + Simple large insertion 2_3insATCG. + """ + mutator.insertion(2, 'ATCG') + assert unicode(mutator.mutated) == unicode(Seq('ATATCGCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_sub(mutator): + """ + Simple substitution 3C>G. + """ + mutator.substitution(3, 'G') + assert unicode(mutator.mutated) == unicode(Seq('ATGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_del_sub_1(mutator): + """ + Deletion and substitution directly adjecent to each other [2del;3C>G]. + + See Trac #83. + """ + mutator.deletion(2, 2) + mutator.substitution(3, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_del_sub_2(mutator): + """ + Deletion and substitution directly adjecent to each other [3del;2T>G]. + """ + mutator.deletion(3, 3) + mutator.substitution(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_near_adjecent_del_sub_1(mutator): + """ + Deletion and substitution almost adjecent to each other [2del;4G>T]. + """ + mutator.deletion(2, 2) + mutator.substitution(4, 'T') + assert unicode(mutator.mutated) == unicode(Seq('ACTATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_near_adjecent_del_sub_2(mutator): + """ + Deletion and substitution almost adjecent to each other [4del;2T>G]. + """ + mutator.deletion(4, 4) + mutator.substitution(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGCATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedel_sub_1(mutator): + """ + Large deletion and substitution directly adjecent to each other + [2_6del;7C>T]. + """ + mutator.deletion(2, 6) + mutator.substitution(7, 'T') + assert unicode(mutator.mutated) == unicode(Seq('ATG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedel_sub_2(mutator): + """ + Large deletion and substitution directly adjecent to each other + [3_7del;2T>C]. + """ + mutator.deletion(3, 7) + mutator.substitution(2, 'C') + assert unicode(mutator.mutated) == unicode(Seq('ACG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_near_adjecent_largedel_sub_1(mutator): + """ + Large deletion and substitution almost adjecent to each other [2_5del;7C>T]. + """ + mutator.deletion(2, 5) + mutator.substitution(7, 'T') + assert unicode(mutator.mutated) == unicode(Seq('ATTG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_near_adjecent_largedel_sub_2(mutator): + """ + Large deletion and substitution almost adjecent to each other [4_7del;2T>C]. + """ + mutator.deletion(4, 7) + mutator.substitution(2, 'C') + assert unicode(mutator.mutated) == unicode(Seq('ACCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_del_ins_1(mutator): + """ + Deletion and insertion adjecent to each other [2del;2_3insG]. + """ + mutator.deletion(2, 2) + mutator.insertion(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_del_ins_2(mutator): + """ + Deletion and insertion adjecent to each other [3del;2_3insA]. + """ + mutator.deletion(3, 3) + mutator.insertion(2, 'A') + assert unicode(mutator.mutated) == unicode(Seq('ATAGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_near_adjectent_del_ins(mutator): + """ + Deletion and insertion almost adjecent to each other [2del;3_4insG]. + """ + mutator.deletion(2, 2) + mutator.insertion(3, 'T') + assert unicode(mutator.mutated) == unicode(Seq('ACTGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_ins_sub_1(mutator): + """ + Insertion and substitution directly adjecent to each other + [2_3insA;3C>G]. + """ + mutator.insertion(2, 'A') + mutator.substitution(3, 'G') + assert unicode(mutator.mutated) == unicode(Seq('ATAGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_ins_sub_2(mutator): + """ + Insertion and substitution directly adjecent to each other + [2_3insA;2T>G]. + """ + mutator.insertion(2, 'A') + mutator.substitution(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGACGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_near_adjecent_ins_sub(mutator): + """ + Insertion and substitution almost adjecent to each other + [2_3insA;4C>T]. + """ + mutator.insertion(2, 'A') + mutator.substitution(4, 'T') + assert unicode(mutator.mutated) == unicode(Seq('ATACTATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largeins_sub_1(mutator): + """ + Large insertion and substitution directly adjecent to each other + [2_3insATCG;3C>G]. + """ + mutator.insertion(2, 'ATCG') + mutator.substitution(3, 'G') + assert unicode(mutator.mutated) == unicode(Seq('ATATCGGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largeins_sub_2(mutator): + """ + Large insertion and substitution directly adjecent to each other + [2_3insATCG;2T>G]. + """ + mutator.insertion(2, 'ATCG') + mutator.substitution(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGATCGCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_near_adjecent_largeins_sub(mutator): + """ + Large insertion and substitution almost adjecent to each other + [2_3insATCG;4C>T]. + """ + mutator.insertion(2, 'ATCG') + mutator.substitution(4, 'T') + assert unicode(mutator.mutated) == unicode(Seq('ATATCGCTATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_del_del_1(mutator): + """ + Deletion and deletion directly adjecent to each other [2del;3del]. + """ + mutator.deletion(2, 2) + mutator.deletion(3, 3) + assert unicode(mutator.mutated) == unicode(Seq('AGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_del_del_2(mutator): + """ + Deletion and deletion directly adjecent to each other [3del;2del]. + """ + mutator.deletion(3, 3) + mutator.deletion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('AGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_delins_snp_1(mutator): + """ + Delins and deletion directly adjecent to each other [2delinsA;3C>G]. + """ + mutator.delins(2, 2, 'A') + mutator.substitution(3, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AAGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_delins_snp_2(mutator): + """ + Delins and deletion directly adjecent to each other [3delinsA;2T>G]. + """ + mutator.delins(3, 3, 'A') + mutator.substitution(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGAGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_eq_snp_1(mutator): + """ + Large delins and deletion directly adjecent to each other + [2_6delinsAAAAA;7C>G]. + """ + mutator.delins(2, 6, 'AAAAA') + mutator.substitution(7, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAGG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_min_snp_1(mutator): + """ + Large delins (min) and deletion directly adjecent to each other + [2_6delinsAAA;7C>G]. + """ + mutator.delins(2, 6, 'AAA') + mutator.substitution(7, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AAAAGG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_plus_snp_1(mutator): + """ + Large delins (plus) and deletion directly adjecent to each other + [2_6delinsAAAAAAA;7C>G]. + """ + mutator.delins(2, 6, 'AAAAAAA') + mutator.substitution(7, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAAAGG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_eq_snp_2(mutator): + """ + Large delins and deletion directly adjecent to each other + [3_7delinsAAAAA;2T>G]. + """ + mutator.delins(3, 7, 'AAAAA') + mutator.substitution(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_min_snp_2(mutator): + """ + Large delins (min) and deletion directly adjecent to each other + [3_7delinsAAA;2T>G]. + """ + mutator.delins(3, 7, 'AAA') + mutator.substitution(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_plus_snp_2(mutator): + """ + Large delins (plus) and deletion directly adjecent to each other + [3_7delinsAAAAAAA;2T>G]. + """ + mutator.delins(3, 7, 'AAAAAAA') + mutator.substitution(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AGAAAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_delins_del_1(mutator): + """ + Delins and deletion directly adjecent to each other [2delinsA;3del]. + """ + mutator.delins(2, 2, 'A') + mutator.deletion(3, 3) + assert unicode(mutator.mutated) == unicode(Seq('AAGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_delins_del_2(mutator): + """ + Delins and deletion directly adjecent to each other [3delinsA;2del]. + """ + mutator.delins(3, 3, 'A') + mutator.deletion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('AAGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_eq_del_1(mutator): + """ + Large delins and deletion directly adjecent to each other + [2_6delinsAAAAA;7del]. + """ + mutator.delins(2, 6, 'AAAAA') + mutator.deletion(7, 7) + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_min_del_1(mutator): + """ + Large delins (min) and deletion directly adjecent to each other + [2_6delinsAAA;7del]. + """ + mutator.delins(2, 6, 'AAA') + mutator.deletion(7, 7) + assert unicode(mutator.mutated) == unicode(Seq('AAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_plus_del_1(mutator): + """ + Large delins (plus) and deletion directly adjecent to each other + [2_6delinsAAAAAAA;7del]. + """ + mutator.delins(2, 6, 'AAAAAAA') + mutator.deletion(7, 7) + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_eq_del_2(mutator): + """ + Large delins and deletion directly adjecent to each other + [3_7delinsAAAAA;2del]. + """ + mutator.delins(3, 7, 'AAAAA') + mutator.deletion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_min_del_2(mutator): + """ + Large delins (min) and deletion directly adjecent to each other + [3_7delinsAAA;2del]. + """ + mutator.delins(3, 7, 'AAA') + mutator.deletion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('AAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_largedelins_plus_del_2(mutator): + """ + Large delins (plus) and deletion directly adjecent to each other + [3_7delinsAAAAAAA;2del]. + """ + mutator.delins(3, 7, 'AAAAAAA') + mutator.deletion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_delins_ins_1(mutator): + """ + Delins and insertion adjecent to each other [2delinsA;2_3insG]. + """ + mutator.delins(2, 2, 'A') + mutator.insertion(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AAGCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_delins_ins_2(mutator): + """ + Delins and insertion adjecent to each other [3delinsA;2_3insG]. + """ + mutator.delins(3, 3, 'A') + mutator.insertion(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('ATGAGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_eq_ins_1(mutator): + """ + Large delins and insertion adjecent to each other [2_6delinsAAAAA;6_7insG]. + """ + mutator.delins(2, 6, 'AAAAA') + mutator.insertion(6, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAGCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_min_ins_1(mutator): + """ + Large delins (min) and insertion adjecent to each other [2_6delinsAAA;6_7insG]. + """ + mutator.delins(2, 6, 'AAA') + mutator.insertion(6, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AAAAGCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_plus_ins_1(mutator): + """ + Large delins (plus) and insertion adjecent to each other [2_6delinsAAAAAAA;6_7insG]. + """ + mutator.delins(2, 6, 'AAAAAAA') + mutator.insertion(6, 'G') + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAAAGCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_eq_ins_2(mutator): + """ + Large delins and insertion adjecent to each other [3_7delinsAAAAA;2_3insG]. + """ + mutator.delins(3, 7, 'AAAAA') + mutator.insertion(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('ATGAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_min_ins_2(mutator): + """ + Large delins (min) and insertion adjecent to each other [3_7delinsAAA;2_3insG]. + """ + mutator.delins(3, 7, 'AAA') + mutator.insertion(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('ATGAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_plus_ins_2(mutator): + """ + Large delins (plus) and insertion adjecent to each other [3_7delinsAAAAAAA;2_3insG]. + """ + mutator.delins(3, 7, 'AAAAAAA') + mutator.insertion(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('ATGAAAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_delins_del_delins(mutator): + """ + Delins (deletion) and delins (SNP) adjecent to each other [2_3delinsA;4delinsT]. + """ + mutator.delins(2, 3, 'A') + mutator.delins(4, 4, 'T') + assert unicode(mutator.mutated) == unicode(Seq('AATATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_plus_delins_1(mutator): + """ + Large delins (plus) and delins adjecent to each other [2_6delinsAAAAAAA;7delinsT]. + """ + mutator.delins(2, 6, 'AAAAAAA') + mutator.delins(7, 7, 'T') + assert unicode(mutator.mutated) == unicode(Seq('AAAAAAAATG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_plus_delins_2(mutator): + """ + Large delins (plus) and delins adjecent to each other [3_7delinsAAAAAAA;2delinsC]. + """ + mutator.delins(3, 7, 'AAAAAAA') + mutator.delins(2, 2, 'C') + assert unicode(mutator.mutated) == unicode(Seq('ACAAAAAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_min_delins_1(mutator): + """ + Large delins (min) and delins adjecent to each other [2_6delinsAAA;7delinsT]. + """ + mutator.delins(2, 6, 'AAA') + mutator.delins(7, 7, 'T') + assert unicode(mutator.mutated) == unicode(Seq('AAAATG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_largedelins_min_delins_2(mutator): + """ + Large delins (min) and delins adjecent to each other [3_7delinsAAA;2delinsC]. + """ + mutator.delins(3, 7, 'AAA') + mutator.delins(2, 2, 'C') + assert unicode(mutator.mutated) == unicode(Seq('ACAAAG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_del_dup_1(mutator): + """ + Deletion and duplication adjecent to each other [2del;3dup]. + """ + mutator.deletion(2, 2) + mutator.duplication(3, 3) + assert unicode(mutator.mutated) == unicode(Seq('ACCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_del_dup_2(mutator): + """ + Deletion and duplication adjecent to each other [3del;2dup]. + """ + mutator.deletion(3, 3) + mutator.duplication(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('ATTGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_ins_dup_1(mutator): + """ + Insertion and duplication adjecent to each other [2_3insG;3dup]. + """ + mutator.insertion(2, 'G') + mutator.duplication(3, 3) + assert unicode(mutator.mutated) == unicode(Seq('ATGCCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_ins_dup_2(mutator): + """ + Insertion and duplication adjecent to each other [2_3insG;2dup]. + """ + mutator.insertion(2, 'G') + mutator.duplication(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('ATTGCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_ins_ins_1(mutator): + """ + Insertion and insertion adjecent to each other [2_3insG;3_4insA]. + """ + mutator.insertion(2, 'G') + mutator.insertion(3, 'A') + assert unicode(mutator.mutated) == unicode(Seq('ATGCAGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjectent_ins_ins_2(mutator): + """ + Insertion and insertion adjecent to each other [3_4insA;2_3insG]. + """ + mutator.insertion(3, 'A') + mutator.insertion(2, 'G') + assert unicode(mutator.mutated) == unicode(Seq('ATGCAGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_ins_ins(mutator): + """ + Insertion and insertion at same position [2_3insG;2_3insA]. + """ + mutator.insertion(2, 'G') + mutator.insertion(2, 'A') + assert unicode(mutator.mutated) in (unicode(Seq('ATGACGATCG')), unicode(Seq('ATAGCGATCG'))) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_inv_inv_1(mutator): + """ + Inversion and inversion directly adjecent to each other [2inv;3inv]. + """ + mutator.inversion(2, 2) + mutator.inversion(3, 3) + assert unicode(mutator.mutated) == unicode(Seq('AAGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_inv_inv_2(mutator): + """ + Inversion and inversion directly adjecent to each other [3inv;2inv]. + """ + mutator.inversion(3, 3) + mutator.inversion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('AAGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_dup_dup_1(mutator): + """ + Duplication and duplication directly adjecent to each other [2dup;3dup]. + """ + mutator.duplication(2, 2) + mutator.duplication(3, 3) + assert unicode(mutator.mutated) == unicode(Seq('ATTCCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_dup_dup_2(mutator): + """ + Duplication and duplication directly adjecent to each other [3dup;2dup]. + """ + mutator.duplication(3, 3) + mutator.duplication(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('ATTCCGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_del_inv_1(mutator): + """ + Deletion and inversion directly adjecent to each other [2del;3inv]. + """ + mutator.deletion(2, 2) + mutator.inversion(3, 3) + assert unicode(mutator.mutated) == unicode(Seq('AGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_del_inv_2(mutator): + """ + Deletion and inversion directly adjecent to each other [3del;2inv]. + """ + mutator.deletion(3, 3) + mutator.inversion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('AAGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_ins_inv_1(mutator): + """ + Insertion and inversion directly adjecent to each other [2_3insG;3inv]. + """ + mutator.insertion(2, 'G') + mutator.inversion(3, 3) + assert unicode(mutator.mutated) == unicode(Seq('ATGGGATCG')) + + +@pytest.mark.parametrize('sequence', [Seq('ATCGATCG')]) +def test_adjecent_ins_inv_2(mutator): + """ + Insertion and inversion directly adjecent to each other [2_3insG;2inv]. + """ + mutator.insertion(2, 'G') + mutator.inversion(2, 2) + assert unicode(mutator.mutated) == unicode(Seq('AAGCGATCG')) diff --git a/tests/test_parsers_genbank.py b/tests/test_parsers_genbank.py index 65d10655..e491b767 100644 --- a/tests/test_parsers_genbank.py +++ b/tests/test_parsers_genbank.py @@ -5,52 +5,44 @@ Tests for the mutalyzer.parsers.genbank module. from __future__ import unicode_literals -#import logging; logging.basicConfig() import os -from mutalyzer.parsers import genbank -from mutalyzer.config import settings +import pytest -from fixtures import REFERENCES -from fixtures import database, cache -from utils import MutalyzerTest -from utils import fix +from mutalyzer.parsers.genbank import GBparser -class TestMutator(MutalyzerTest): +@pytest.fixture +def parser(): + return GBparser() + + +@pytest.mark.parametrize('products,expected', [ + (['a b c d e', 'a b C D e', 'a b c d e'], (2, 1)), + (['a b c d e', 'a b C d e', 'a B c d e'], (1, 2)), + (['a c d a', 'a b a', 'a a', 'a'], (1, 1)), + ([''], (-1, -1)), + (['', ''], (-1, -1)), + (['a', 'a'], (-1, -1)), + (['a', 'b'], (0, 0)), + (['a b c', 'a b c'], (-1, -1)), + (['a b c d a b', 'a b'], (2, 2)) +]) +def test_product_lists_mismatch(parser, products, expected): + """ + Test finding mismatches in some product lists. + """ + assert parser._find_mismatch(products) == expected + + +@pytest.mark.parametrize('references', [['A1BG']], indirect=True) +def test_only_complete_genes_included(settings, references, parser): """ - Test the mutator module. + Incomplete genes from the reference file should be ignored. """ - fixtures = (database, ) - - def setup(self): - super(TestMutator, self).setup() - self.gb_parser = genbank.GBparser() - - def test_product_lists_mismatch(self): - """ - Test finding mismatches in some product lists. - """ - tests = [(['a b c d e', 'a b C D e', 'a b c d e'], (2, 1)), - (['a b c d e', 'a b C d e', 'a B c d e'], (1, 2)), - (['a c d a', 'a b a', 'a a', 'a'], (1, 1)), - ([''], (-1, -1)), - (['', ''], (-1, -1)), - (['a', 'a'], (-1, -1)), - (['a', 'b'], (0, 0)), - (['a b c', 'a b c'], (-1, -1)), - (['a b c d a b', 'a b'], (2, 2))] - for test in tests: - assert self.gb_parser._find_mismatch(test[0]) == test[1] - - @fix(cache('A1BG')) - def test_only_complete_genes_included(self): - """ - Incomplete genes from the reference file should be ignored. - """ - # contains A1BG (complete) and A1BG-AS1, ZNF497, LOC100419840 - # (incomplete). - genbank_filename = os.path.join(settings.CACHE_DIR, - REFERENCES['A1BG']['filename']) - record = self.gb_parser.create_record(genbank_filename) - assert [g.name for g in record.geneList] == ['A1BG'] + # contains A1BG (complete) and A1BG-AS1, ZNF497, LOC100419840 + # (incomplete). + accession = references[0].accession + filename = os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % accession) + record = parser.create_record(filename) + assert [g.name for g in record.geneList] == ['A1BG'] diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 7dd3a8c2..22c20b3c 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -1,5 +1,5 @@ """ -Tests for the Scheduler module. +Tests for the mutalyzer.Scheduler module. """ @@ -9,7 +9,7 @@ import bz2 import os import io -#import logging; logging.basicConfig() +import pytest from Bio import Entrez from mock import patch @@ -19,352 +19,362 @@ from mutalyzer import File from mutalyzer import output from mutalyzer import Scheduler -from fixtures import database, cache, hg19, hg19_transcript_mappings -from utils import MutalyzerTest -from utils import fix +pytestmark = pytest.mark.usefixtures('db') -class TestScheduler(MutalyzerTest): + +def _batch_job(batch_file, expected, job_type, argument=None): + file_instance = File.File(output.Output('test')) + scheduler = Scheduler.Scheduler() + + job, columns = file_instance.parseBatchFile(batch_file) + result_id = scheduler.addJob('test@test.test', job, columns, + job_type, argument=argument) + + batch_job = BatchJob.query.filter_by(result_id=result_id).one() + + left = batch_job.batch_queue_items.count() + assert left == len(expected) + + scheduler.process() + + left = batch_job.batch_queue_items.count() + assert left == 0 + + filename = 'batch-job-%s.txt' % result_id + result = io.open(os.path.join(settings.CACHE_DIR, filename), + encoding='utf-8') + + next(result) # Header. + assert expected == [line.strip().split('\t') for line in result] + + +def _batch_job_plain_text(variants, expected, job_type, argument=None): + batch_file = io.BytesIO(('\n'.join(variants) + '\n').encode('utf-8')) + _batch_job(batch_file, expected, job_type, argument=argument) + + +def test_syntax_checker(): """ - Test the Scheduler class. + Simple syntax checker batch job. """ - fixtures = (database, ) - - def _batch_job(self, batch_file, expected, job_type, argument=None): - file_instance = File.File(output.Output('test')) - scheduler = Scheduler.Scheduler() - - job, columns = file_instance.parseBatchFile(batch_file) - result_id = scheduler.addJob('test@test.test', job, columns, - job_type, argument=argument) - - batch_job = BatchJob.query.filter_by(result_id=result_id).one() - - left = batch_job.batch_queue_items.count() - assert left == len(expected) - - scheduler.process() - - left = batch_job.batch_queue_items.count() - assert left == 0 - - filename = 'batch-job-%s.txt' % result_id - result = io.open(os.path.join(settings.CACHE_DIR, filename), - encoding='utf-8') - - next(result) # Header. - assert expected == [line.strip().split('\t') for line in result] - - def _batch_job_plain_text(self, variants, expected, job_type, argument=None): - batch_file = io.BytesIO(('\n'.join(variants) + '\n').encode('utf-8')) - self._batch_job(batch_file, expected, job_type, argument=argument) - - def test_syntax_checker(self): - """ - Simple syntax checker batch job. - """ - variants = ['AB026906.1:c.274G>T', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - expected = [['AB026906.1:c.274G>T', - 'OK'], - ['AL449423.14(CDKN2A_v002):c.5_400del', - 'OK']] - self._batch_job_plain_text(variants, expected, 'syntax-checker') - - def test_large_input(self): - """ - Simple batch job with large input. - """ - variants = ['chr13:g.114503915delCACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCCG' - 'TATCTACACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCTATATCTACACCTGAG' - 'GGAGGTGinsTGCCTGCGGGAGGTGAGGGGCGCTGGGGACCCCCGTATCTACACC' - 'TGCGGGAGGTGAGGGGCGCTGGGGACCCCTATATCTACACCTGAGGGAGGTG'] - - expected = [['InputFields: chr13:g.114503915delCACCTGCGGGAGGTGAGGGGC' - 'GCTGGGGACCCCCGTATCTACACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCT' - 'ATATCTACACCTGAGGGAGGTGinsTGCCTGCGGGAGGTGAGGGGCGCTGGGGA' - 'CCCCCGTATCTACACCTGCGGGAGGTGAGGG...', - '(Scheduler): Entry could not be formatted correctly, ' - 'check batch input file help for details']] - self._batch_job_plain_text(variants, expected, 'syntax-checker') - - @fix(cache('AB026906.1', 'NM_000059.3')) - def test_name_checker(self): - """ - Simple name checker batch job. - """ - variants = ['AB026906.1:c.274G>T', - 'NM_000059.3:c.670G>T'] - expected = [['AB026906.1:c.274G>T', - '(GenRecord): No mRNA field found for gene SDHD, ' - 'transcript variant 001 in record, constructing it from ' - 'CDS. Please note that descriptions exceeding CDS ' - 'boundaries are invalid.', - 'AB026906.1', - 'SDHD_v001', - 'c.274G>T', - 'g.7872G>T', - 'c.274G>T', - 'p.(Asp92Tyr)', - 'SDHD_v001:c.274G>T', - 'SDHD_v001:p.(Asp92Tyr)', - '', - '', - 'BAA81889.1', - 'AB026906.1(SDHD_v001):c.274G>T', - 'AB026906.1(SDHD_i001):p.(Asp92Tyr)', - 'CviQI,RsaI', - 'BccI'], - ['NM_000059.3:c.670G>T', - '', - 'NM_000059.3', - 'BRCA2_v001', - 'c.670G>T', - 'n.897G>T', - 'c.670G>T', - 'p.(Asp224Tyr)', - 'BRCA2_v001:c.670G>T', - 'BRCA2_v001:p.(Asp224Tyr)', - '', - 'NM_000059.3', - 'NP_000050.2', - 'NM_000059.3(BRCA2_v001):c.670G>T', - 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', - '', - 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] - self._batch_job_plain_text(variants, expected, 'name-checker') - - def test_name_checker_altered(self): - """ - Name checker job with altered entries. - """ - variants = ['NM_000059:c.670dup', - 'NM_000059:c.670G>T', - 'NM_000059.3:c.670G>T'] - expected = [['NM_000059:c.670dup', - '|'.join(['(Retriever): No version number is given, ' - 'using NM_000059.3. Please use this number to ' - 'reduce downloading overhead.', - '(Scheduler): All further occurrences of ' - 'NM_000059 will be substituted by ' - 'NM_000059.3']), - 'NM_000059', - 'BRCA2_v001', - 'c.670dup', - 'n.897dup', - 'c.670dup', - 'p.(Asp224Glyfs*5)', - 'BRCA2_v001:c.670dup', - 'BRCA2_v001:p.(Asp224Glyfs*5)', - '', - 'NM_000059.3', - 'NP_000050.2', - 'NM_000059(BRCA2_v001):c.670dup', - 'NM_000059(BRCA2_i001):p.(Asp224Glyfs*5)', - 'BciVI', - 'BspHI,Hpy188III'], - ['NM_000059.3:c.670G>T', - '(Scheduler): Entry altered before execution', - 'NM_000059.3', - 'BRCA2_v001', - 'c.670G>T', - 'n.897G>T', - 'c.670G>T', - 'p.(Asp224Tyr)', - 'BRCA2_v001:c.670G>T', - 'BRCA2_v001:p.(Asp224Tyr)', - '', - 'NM_000059.3', - 'NP_000050.2', - 'NM_000059.3(BRCA2_v001):c.670G>T', - 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', - '', - 'BspHI,CviAII,FatI,Hpy188III,NlaIII'], - ['NM_000059.3:c.670G>T', - '', - 'NM_000059.3', - 'BRCA2_v001', - 'c.670G>T', - 'n.897G>T', - 'c.670G>T', - 'p.(Asp224Tyr)', - 'BRCA2_v001:c.670G>T', - 'BRCA2_v001:p.(Asp224Tyr)', - '', - 'NM_000059.3', - 'NP_000050.2', - 'NM_000059.3(BRCA2_v001):c.670G>T', - 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', - '', - 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] - - # Patch GenBankRetriever.fetch to return the contents of NM_000059.3 - # for NM_000059. - def mock_efetch(*args, **kwargs): - if kwargs.get('id') != 'NM_000059': - return Entrez.efetch(*args, **kwargs) - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'NM_000059.3.gb.bz2') - return bz2.BZ2File(path) - - with patch.object(Entrez, 'efetch', mock_efetch): - self._batch_job_plain_text(variants, expected, 'name-checker') - - @fix(cache('NM_000059.3')) - def test_name_checker_skipped(self): - """ - Name checker job with skipped entries. - """ - variants = ['NM_1234567890.3:c.670G>T', - 'NM_1234567890.3:c.570G>T', - 'NM_000059.3:c.670G>T'] - expected = [['NM_1234567890.3:c.670G>T', - '(Retriever): Could not retrieve NM_1234567890.3.|' - '(Scheduler): All further occurrences with ' - '\'NM_1234567890.3\' will be skipped'], - ['NM_1234567890.3:c.570G>T', - '(Scheduler): Skipping entry'], - ['NM_000059.3:c.670G>T', - '', - 'NM_000059.3', - 'BRCA2_v001', - 'c.670G>T', - 'n.897G>T', - 'c.670G>T', - 'p.(Asp224Tyr)', - 'BRCA2_v001:c.670G>T', - 'BRCA2_v001:p.(Asp224Tyr)', - '', - 'NM_000059.3', - 'NP_000050.2', - 'NM_000059.3(BRCA2_v001):c.670G>T', - 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', - '', - 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] - - # Patch GenBankRetriever.fetch to fail on NM_1234567890.3. - def mock_efetch(*args, **kwargs): - if kwargs.get('id') != 'NM_1234567890.3': - return Entrez.efetch(*args, **kwargs) - raise IOError() - - with patch.object(Entrez, 'efetch', mock_efetch): - self._batch_job_plain_text(variants, expected, 'name-checker') - - @fix(hg19, hg19_transcript_mappings) - def test_position_converter(self): - """ - Simple position converter batch job. - """ - variants = ['chr11:g.111959695G>T'] - expected = [['chr11:g.111959695G>T', - '', - 'NC_000011.9:g.111959695G>T', - 'NM_003002.2:c.274G>T', - 'NM_012459.2:c.-2203C>A', - 'NR_028383.1:n.-2173C>A']] - self._batch_job_plain_text(variants, expected, 'position-converter', 'hg19') - - def test_ods_file(self): - """ - OpenDocument Spreadsheet input for batch job. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'batch_input.ods') - batch_file = open(path, 'rb') - expected = [['AB026906.1:c.274G>T', - 'OK'], - ['AL449423.14(CDKN2A_v002):c.5_400del', - 'OK']] - - self._batch_job(batch_file, expected, 'syntax-checker') - - def test_sxc_file(self): - """ - OpenOffice.org 1.x Calc spreadsheet input for batch job. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'batch_input.sxc') - batch_file = open(path, 'rb') - expected = [['AB026906.1:c.274G>T', - 'OK'], - ['AL449423.14(CDKN2A_v002):c.5_400del', - 'OK']] - - self._batch_job(batch_file, expected, 'syntax-checker') - - def test_xls_file(self): - """ - Microsoft Excel 97/2000/XP/2003 input for batch job. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'batch_input.xls') - batch_file = open(path, 'rb') - expected = [['AB026906.1:c.274G>T', - 'OK'], - ['AL449423.14(CDKN2A_v002):c.5_400del', - 'OK']] - - self._batch_job(batch_file, expected, 'syntax-checker') - - def test_xlsx_file(self): - """ - Office Open XML Spreadsheet input for batch job. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'batch_input.xlsx') - batch_file = open(path, 'rb') - expected = [['AB026906.1:c.274G>T', - 'OK'], - ['AL449423.14(CDKN2A_v002):c.5_400del', - 'OK']] - - self._batch_job(batch_file, expected, 'syntax-checker') - - def test_invalid_zip_file(self): - """ - Random zip file input for batch job (invalid). - """ + variants = ['AB026906.1:c.274G>T', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + _batch_job_plain_text(variants, expected, 'syntax-checker') + + +def test_large_input(): + """ + Simple batch job with large input. + """ + variants = ['chr13:g.114503915delCACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCCG' + 'TATCTACACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCTATATCTACACCTGAG' + 'GGAGGTGinsTGCCTGCGGGAGGTGAGGGGCGCTGGGGACCCCCGTATCTACACC' + 'TGCGGGAGGTGAGGGGCGCTGGGGACCCCTATATCTACACCTGAGGGAGGTG'] + + expected = [['InputFields: chr13:g.114503915delCACCTGCGGGAGGTGAGGGGC' + 'GCTGGGGACCCCCGTATCTACACCTGCGGGAGGTGAGGGGCGCTGGGGACCCCT' + 'ATATCTACACCTGAGGGAGGTGinsTGCCTGCGGGAGGTGAGGGGCGCTGGGGA' + 'CCCCCGTATCTACACCTGCGGGAGGTGAGGG...', + '(Scheduler): Entry could not be formatted correctly, ' + 'check batch input file help for details']] + _batch_job_plain_text(variants, expected, 'syntax-checker') + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['AB026906.1', 'NM_000059.3']], + indirect=True) +def test_name_checker(): + """ + Simple name checker batch job. + """ + variants = ['AB026906.1:c.274G>T', + 'NM_000059.3:c.670G>T'] + expected = [['AB026906.1:c.274G>T', + '(GenRecord): No mRNA field found for gene SDHD, ' + 'transcript variant 001 in record, constructing it from ' + 'CDS. Please note that descriptions exceeding CDS ' + 'boundaries are invalid.', + 'AB026906.1', + 'SDHD_v001', + 'c.274G>T', + 'g.7872G>T', + 'c.274G>T', + 'p.(Asp92Tyr)', + 'SDHD_v001:c.274G>T', + 'SDHD_v001:p.(Asp92Tyr)', + '', + '', + 'BAA81889.1', + 'AB026906.1(SDHD_v001):c.274G>T', + 'AB026906.1(SDHD_i001):p.(Asp92Tyr)', + 'CviQI,RsaI', + 'BccI'], + ['NM_000059.3:c.670G>T', + '', + 'NM_000059.3', + 'BRCA2_v001', + 'c.670G>T', + 'n.897G>T', + 'c.670G>T', + 'p.(Asp224Tyr)', + 'BRCA2_v001:c.670G>T', + 'BRCA2_v001:p.(Asp224Tyr)', + '', + 'NM_000059.3', + 'NP_000050.2', + 'NM_000059.3(BRCA2_v001):c.670G>T', + 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', + '', + 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] + _batch_job_plain_text(variants, expected, 'name-checker') + + +def test_name_checker_altered(): + """ + Name checker job with altered entries. + """ + variants = ['NM_000059:c.670dup', + 'NM_000059:c.670G>T', + 'NM_000059.3:c.670G>T'] + expected = [['NM_000059:c.670dup', + '|'.join(['(Retriever): No version number is given, ' + 'using NM_000059.3. Please use this number to ' + 'reduce downloading overhead.', + '(Scheduler): All further occurrences of ' + 'NM_000059 will be substituted by ' + 'NM_000059.3']), + 'NM_000059', + 'BRCA2_v001', + 'c.670dup', + 'n.897dup', + 'c.670dup', + 'p.(Asp224Glyfs*5)', + 'BRCA2_v001:c.670dup', + 'BRCA2_v001:p.(Asp224Glyfs*5)', + '', + 'NM_000059.3', + 'NP_000050.2', + 'NM_000059(BRCA2_v001):c.670dup', + 'NM_000059(BRCA2_i001):p.(Asp224Glyfs*5)', + 'BciVI', + 'BspHI,Hpy188III'], + ['NM_000059.3:c.670G>T', + '(Scheduler): Entry altered before execution', + 'NM_000059.3', + 'BRCA2_v001', + 'c.670G>T', + 'n.897G>T', + 'c.670G>T', + 'p.(Asp224Tyr)', + 'BRCA2_v001:c.670G>T', + 'BRCA2_v001:p.(Asp224Tyr)', + '', + 'NM_000059.3', + 'NP_000050.2', + 'NM_000059.3(BRCA2_v001):c.670G>T', + 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', + '', + 'BspHI,CviAII,FatI,Hpy188III,NlaIII'], + ['NM_000059.3:c.670G>T', + '', + 'NM_000059.3', + 'BRCA2_v001', + 'c.670G>T', + 'n.897G>T', + 'c.670G>T', + 'p.(Asp224Tyr)', + 'BRCA2_v001:c.670G>T', + 'BRCA2_v001:p.(Asp224Tyr)', + '', + 'NM_000059.3', + 'NP_000050.2', + 'NM_000059.3(BRCA2_v001):c.670G>T', + 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', + '', + 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] + + # Patch GenBankRetriever.fetch to return the contents of NM_000059.3 + # for NM_000059. + def mock_efetch(*args, **kwargs): + if kwargs.get('id') != 'NM_000059': + return Entrez.efetch(*args, **kwargs) path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', - 'image.zip') - batch_file = open(path, 'rb') - - file_instance = File.File(output.Output('test')) - job, columns = file_instance.parseBatchFile(batch_file) - assert job is None - - def test_unicode_input(self): - """ - Simple input with some non-ASCII unicode characters. - """ - variants = ['\u2026AB026906.1:c.274G>T', - '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] - expected = [['\u2026AB026906.1:c.274G>T', - '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], - ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', - '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] - self._batch_job_plain_text(variants, expected, 'syntax-checker') - - def test_windows_1252_input(self): - """ - Simple input encoded as WINDOWS-1252. - """ - variants = ['AB026906.1:c.274G>T', - # Encoded as WINDOWS-1252, the following is not valid UTF8. - 'NM_000052.4:c.2407\u20132A>G', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - batch_file = io.BytesIO(('\n'.join(variants) + '\n').encode('WINDOWS-1252')) - expected = [['AB026906.1:c.274G>T', - 'OK'], - ['NM_000052.4:c.2407\u20132A>G', - '(grammar): Expected W:(acgt...) (at char 18), (line:1, col:19)'], - ['AL449423.14(CDKN2A_v002):c.5_400del', - 'OK']] - - self._batch_job(batch_file, expected, 'syntax-checker') + 'NM_000059.3.gb.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + _batch_job_plain_text(variants, expected, 'name-checker') + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NM_000059.3']], indirect=True) +def test_name_checker_skipped(): + """ + Name checker job with skipped entries. + """ + variants = ['NM_1234567890.3:c.670G>T', + 'NM_1234567890.3:c.570G>T', + 'NM_000059.3:c.670G>T'] + expected = [['NM_1234567890.3:c.670G>T', + '(Retriever): Could not retrieve NM_1234567890.3.|' + '(Scheduler): All further occurrences with ' + '\'NM_1234567890.3\' will be skipped'], + ['NM_1234567890.3:c.570G>T', + '(Scheduler): Skipping entry'], + ['NM_000059.3:c.670G>T', + '', + 'NM_000059.3', + 'BRCA2_v001', + 'c.670G>T', + 'n.897G>T', + 'c.670G>T', + 'p.(Asp224Tyr)', + 'BRCA2_v001:c.670G>T', + 'BRCA2_v001:p.(Asp224Tyr)', + '', + 'NM_000059.3', + 'NP_000050.2', + 'NM_000059.3(BRCA2_v001):c.670G>T', + 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', + '', + 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] + + # Patch GenBankRetriever.fetch to fail on NM_1234567890.3. + def mock_efetch(*args, **kwargs): + if kwargs.get('id') != 'NM_1234567890.3': + return Entrez.efetch(*args, **kwargs) + raise IOError() + + with patch.object(Entrez, 'efetch', mock_efetch): + _batch_job_plain_text(variants, expected, 'name-checker') + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_position_converter(): + """ + Simple position converter batch job. + """ + variants = ['chr11:g.111959695G>T'] + expected = [['chr11:g.111959695G>T', + '', + 'NC_000011.9:g.111959695G>T', + 'NM_003002.2:c.274G>T', + 'NM_012459.2:c.-2203C>A', + 'NR_028383.1:n.-2173C>A']] + _batch_job_plain_text(variants, expected, 'position-converter', 'hg19') + + +def test_ods_file(): + """ + OpenDocument Spreadsheet input for batch job. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'batch_input.ods') + batch_file = open(path, 'rb') + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + _batch_job(batch_file, expected, 'syntax-checker') + + +def test_sxc_file(): + """ + OpenOffice.org 1.x Calc spreadsheet input for batch job. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'batch_input.sxc') + batch_file = open(path, 'rb') + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + _batch_job(batch_file, expected, 'syntax-checker') + + +def test_xls_file(): + """ + Microsoft Excel 97/2000/XP/2003 input for batch job. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'batch_input.xls') + batch_file = open(path, 'rb') + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + _batch_job(batch_file, expected, 'syntax-checker') + + +def test_xlsx_file(): + """ + Office Open XML Spreadsheet input for batch job. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'batch_input.xlsx') + batch_file = open(path, 'rb') + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + _batch_job(batch_file, expected, 'syntax-checker') + + +def test_invalid_zip_file(): + """ + Random zip file input for batch job (invalid). + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'image.zip') + batch_file = open(path, 'rb') + + file_instance = File.File(output.Output('test')) + job, columns = file_instance.parseBatchFile(batch_file) + assert job is None + + +def test_unicode_input(): + """ + Simple input with some non-ASCII unicode characters. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + _batch_job_plain_text(variants, expected, 'syntax-checker') + + +def test_windows_1252_input(): + """ + Simple input encoded as WINDOWS-1252. + """ + variants = ['AB026906.1:c.274G>T', + # Encoded as WINDOWS-1252, the following is not valid UTF8. + 'NM_000052.4:c.2407\u20132A>G', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + batch_file = io.BytesIO(('\n'.join(variants) + '\n').encode('WINDOWS-1252')) + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['NM_000052.4:c.2407\u20132A>G', + '(grammar): Expected W:(acgt...) (at char 18), (line:1, col:19)'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + _batch_job(batch_file, expected, 'syntax-checker') diff --git a/tests/test_services_json.py b/tests/test_services_json.py index 259fad6b..0e27947c 100644 --- a/tests/test_services_json.py +++ b/tests/test_services_json.py @@ -1,12 +1,12 @@ """ -Tests for the JSON interface to Mutalyzer. +Tests for the mutalyzer.services.json module. """ from __future__ import unicode_literals +import json import pytest -import simplejson as json from spyne.model.fault import Fault from spyne.server.null import NullServer @@ -16,10 +16,6 @@ from mutalyzer.config import settings from mutalyzer import Scheduler from mutalyzer.services.json import application -from fixtures import database, hg19, hg19_transcript_mappings -from utils import MutalyzerTest -from utils import fix - # Todo: We currently have no way of testing POST requests to the JSON API. We # had some tests for this, but they were removed with the new setup [1]. @@ -29,291 +25,306 @@ from utils import fix # [2] https://github.com/LUMC/spyne/commit/58660dec28d47b1c3bf1e46d20f55a913ad036cd -class TestServicesJson(MutalyzerTest): +@pytest.fixture +def server(): + return NullServer(application, ostr=True) + + +@pytest.fixture +def api(server): + def call(method, *args, **kwargs): + r = getattr(server.service, method)(*args, **kwargs) + return json.loads(''.join(r)) + return call + + +def test_checksyntax_valid(api): """ - Test the Mutalyzer HTTP/RPC+JSON interface. + Running checkSyntax with a valid variant name should return True. """ - def setup(self): - super(TestServicesJson, self).setup() - self.server = NullServer(application, ostr=True) + r = api('checkSyntax', variant='AB026906.1:c.274G>T') + assert r == {'valid': True, 'messages': []} - def _call(self, method, *args, **kwargs): - r = getattr(self.server.service, method)(*args, **kwargs) - return json.loads(''.join(r)) - def test_checksyntax_valid(self): - """ - Running checkSyntax with a valid variant name should return True. - """ - r = self._call('checkSyntax', variant='AB026906.1:c.274G>T') - assert r == {'valid': True, 'messages': []} - - def test_checksyntax_invalid(self): - """ - Running checkSyntax with an invalid variant name should return False - and give at least one error message. - """ - r = self._call('checkSyntax', variant='0:abcd') - assert r['valid'] == False - assert len(r['messages']) >= 1 - - def test_checksyntax_empty(self): - """ - Running checkSyntax with no variant name should raise exception. - """ - # The validator doesn't work with NullServer, so we cannot do this - # test. See https://github.com/arskom/spyne/issues/318 - #r = self._call('checkSyntax') - #assert r['faultcode'] == 'Client.ValidationError' - pass - - @fix(database, hg19, hg19_transcript_mappings) - def test_transcriptinfo_valid(self): - """ - Running transcriptInfo with valid arguments should get us a Transcript - object. - """ - r = self._call('transcriptInfo', LOVD_ver='123', build='hg19', - accNo='NM_002001.2') - assert r['trans_start'] == -99 - assert r['trans_stop'] == 1066 - assert r['CDS_stop'] == 774 - - def test_info(self): - """ - Running the info method should give us some version information. - """ - r = self._call('info') - assert isinstance(r['versionParts'], list) - assert r['version'] == mutalyzer.__version__ - - def test_info_announcement(self): - """ - Running the info method should show us the current announcement - """ - announce.set_announcement('Test announcement') - r = self._call('info') - assert isinstance(r['announcement'], unicode) - assert r['announcement'] == 'Test announcement' - - announce.set_announcement('New announcement') - r = self._call('info') - assert isinstance(r['announcement'], unicode) - assert r['announcement'] == 'New announcement' - - announce.unset_announcement() - r = self._call('info') - assert not r.get('announcement') - - def test_checksyntax_unicode(self): - """ - Run checkSyntax with an invalid variant description containing - non-ASCII unicode characters. - """ - r = self._call('checkSyntax', 'La Pe\xf1a') - assert r['valid'] == False - assert len(r['messages']) == 1 - assert r['messages'][0]['errorcode'] == 'EPARSE' - assert r['messages'][0]['message'] == 'Expected W:(0123...) (at char 2), (line:1, col:3)' - - @fix(database) - def test_batchjob_unicode(self): - """ - Submit a batch job with non-ASCII unicode characters in the input - file. - """ - variants = ['\u2026AB026906.1:c.274G>T', - '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] - expected = [['\u2026AB026906.1:c.274G>T', - '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], - ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', - '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] - - data = '\n'.join(variants) + '\n' #.encode('base64') - - result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') - job_id = unicode(result) - - result = self._call('monitorBatchJob', job_id) - assert int(result) == len(variants) - - scheduler = Scheduler.Scheduler() - scheduler.process() - - result = self._call('monitorBatchJob', job_id) - assert int(result) == 0 - - result = self._call('getBatchJob', job_id) - result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] - assert expected == [line.split('\t') for line in result] - - @fix(database, hg19, hg19_transcript_mappings) - def test_gene_location(self): - """ - Get outer coordinates for gene. - """ - r = self._call('getGeneLocation', 'SDHD', 'hg19') - - assert r == {'gene': 'SDHD', - 'start': 111957571, - 'stop': 111966518, - 'orientation': 'forward', - 'chromosome_name': 'chr11', - 'chromosome_accession': 'NC_000011.9', - 'assembly_name': 'GRCh37', - 'assembly_alias': 'hg19'} - - @fix(database, hg19, hg19_transcript_mappings) - def test_gene_location_reverse(self): - """ - Get outer coordinates for gene on the reverse strand. - """ - r = self._call('getGeneLocation', 'DMD', 'hg19') - - assert r == {'gene': 'DMD', - 'start': 31137345, - 'stop': 33038317, - 'orientation': 'reverse', - 'chromosome_name': 'chrX', - 'chromosome_accession': 'NC_000023.10', - 'assembly_name': 'GRCh37', - 'assembly_alias': 'hg19'} - - @fix(database, hg19, hg19_transcript_mappings) - def test_gene_location_default_build(self): - """ - Get outer coordinates for gene without specifying the build. - """ - r = self._call('getGeneLocation', 'SDHD') - - assert r == {'gene': 'SDHD', - 'start': 111957571, - 'stop': 111966518, - 'orientation': 'forward', - 'chromosome_name': 'chr11', - 'chromosome_accession': 'NC_000011.9', - 'assembly_name': 'GRCh37', - 'assembly_alias': 'hg19'} - - @fix(database, hg19, hg19_transcript_mappings) - def test_gene_location_invalid_gene(self): - """ - Get outer coordinates for gene that does not exist. - """ - with pytest.raises(Fault): - self._call('getGeneLocation', 'THISISNOTAGENE', 'hg19') - - @fix(database, hg19, hg19_transcript_mappings) - def test_get_transcripts_mapping(self): - """ - Test output of getTranscriptsMapping. - """ - r = self._call('getTranscriptsMapping', 'hg19', 'chr11', - 111955524, 111966518) - assert r == [{'cds_start': 111957632, - 'cds_stop': 111965694, - 'name': 'NM_003002', - 'stop': 111966518, - 'start': 111957571, - 'version': 2, - 'gene': 'SDHD', - 'orientation': '+'}, - {'cds_start': 111957492, - 'cds_stop': 111956019, - 'name': 'NM_012459', - 'stop': 111955524, - 'start': 111957522, - 'version': 2, - 'gene': 'TIMM8B', - 'orientation': '-'}, - {'cds_start': None, - 'cds_stop': None, - 'name': 'NR_028383', - 'stop': 111955524, - 'start': 111957522, - 'version': 1, - 'gene': 'TIMM8B', - 'orientation': '-'}] - - def test_description_extract(self): - """ - Test output of descriptionExtract. - """ - r = self._call('descriptionExtract', - 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA') - assert r == {'allele': [{'end': 6, - 'deleted': '', - 'weight': 8, - 'inserted': 'TT', - 'start_offset': 0, - 'start': 5, - 'description': '5_6insTT', - 'shift': 1, - 'end_offset': 0, - 'type': 'ins', - 'sample_start': 6, - 'sample_end': 7, - 'sample_start_offset': 0, - 'sample_end_offset': 0}, - {'end': 17, - 'deleted': 'G', - 'weight': 7, - 'inserted': '', - 'start_offset': 0, - 'start': 17, - 'description': '17del', - 'shift': 0, - 'end_offset': 0, - 'type': 'del', - 'sample_start': 18, - 'sample_end': 19, - 'sample_start_offset': 0, - 'sample_end_offset': 0}, - {'end': 26, - 'deleted': 'A', - 'weight': 3, - 'inserted': 'C', - 'start_offset': 0, - 'start': 26, - 'description': '26A>C', - 'shift': 0, - 'end_offset': 0, - 'type': 'subst', - 'sample_start': 27, - 'sample_end': 27, - 'sample_start_offset': 0, - 'sample_end_offset': 0}, - {'end': 35, - 'deleted': '', - 'weight': 5, - 'inserted': 'G', - 'start_offset': 0, - 'start': 35, - 'description': '35dup', - 'shift': 1, - 'end_offset': 0, - 'type': 'dup', - 'sample_start': 37, - 'sample_end': 37, - 'sample_start_offset': 0, - 'sample_end_offset': 0}], - 'description': '[5_6insTT;17del;26A>C;35dup]'} - - def test_description_extract_ref_too_long(self): - """ - Test output of descriptionExtract with too long reference sequence. - """ - with pytest.raises(Fault): - self._call('descriptionExtract', - 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1), - 'A') - - def test_description_extract_sample_too_long(self): - """ - Test output of descriptionExtract with too long sample sequence. - """ - with pytest.raises(Fault): - self._call('descriptionExtract', - 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH), - 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1)) +def test_checksyntax_invalid(api): + """ + Running checkSyntax with an invalid variant name should return False + and give at least one error message. + """ + r = api('checkSyntax', variant='0:abcd') + assert not r['valid'] + assert len(r['messages']) >= 1 + + +def test_checksyntax_empty(api): + """ + Running checkSyntax with no variant name should raise exception. + """ + # The validator doesn't work with NullServer, so we cannot do this + # test. See https://github.com/arskom/spyne/issues/318 + # r = api('checkSyntax') + # assert r['faultcode'] == 'Client.ValidationError' + pass + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_transcriptinfo_valid(api): + """ + Running transcriptInfo with valid arguments should get us a Transcript + object. + """ + r = api('transcriptInfo', LOVD_ver='123', build='hg19', + accNo='NM_002001.2') + assert r['trans_start'] == -99 + assert r['trans_stop'] == 1066 + assert r['CDS_stop'] == 774 + + +def test_info(api): + """ + Running the info method should give us some version information. + """ + r = api('info') + assert isinstance(r['versionParts'], list) + assert r['version'] == mutalyzer.__version__ + + +def test_info_announcement(api): + """ + Running the info method should show us the current announcement + """ + announce.set_announcement('Test announcement') + r = api('info') + assert isinstance(r['announcement'], unicode) + assert r['announcement'] == 'Test announcement' + + announce.set_announcement('New announcement') + r = api('info') + assert isinstance(r['announcement'], unicode) + assert r['announcement'] == 'New announcement' + + announce.unset_announcement() + r = api('info') + assert not r.get('announcement') + + +def test_checksyntax_unicode(api): + """ + Run checkSyntax with an invalid variant description containing + non-ASCII unicode characters. + """ + r = api('checkSyntax', 'La Pe\xf1a') + assert not r['valid'] + assert len(r['messages']) == 1 + assert r['messages'][0]['errorcode'] == 'EPARSE' + assert r['messages'][0]['message'] == 'Expected W:(0123...) (at char 2), (line:1, col:3)' + + +@pytest.mark.usefixtures('db') +def test_batchjob_unicode(api): + """ + Submit a batch job with non-ASCII unicode characters in the input + file. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = '\n'.join(variants) + '\n' # .encode('base64') + + result = api('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = api('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = api('monitorBatchJob', job_id) + assert int(result) == 0 + + result = api('getBatchJob', job_id) + result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gene_location(api): + """ + Get outer coordinates for gene. + """ + r = api('getGeneLocation', 'SDHD', 'hg19') + + assert r == {'gene': 'SDHD', + 'start': 111957571, + 'stop': 111966518, + 'orientation': 'forward', + 'chromosome_name': 'chr11', + 'chromosome_accession': 'NC_000011.9', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gene_location_reverse(api): + """ + Get outer coordinates for gene on the reverse strand. + """ + r = api('getGeneLocation', 'DMD', 'hg19') + + assert r == {'gene': 'DMD', + 'start': 31137345, + 'stop': 33038317, + 'orientation': 'reverse', + 'chromosome_name': 'chrX', + 'chromosome_accession': 'NC_000023.10', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gene_location_default_build(api): + """ + Get outer coordinates for gene without specifying the build. + """ + r = api('getGeneLocation', 'SDHD') + + assert r == {'gene': 'SDHD', + 'start': 111957571, + 'stop': 111966518, + 'orientation': 'forward', + 'chromosome_name': 'chr11', + 'chromosome_accession': 'NC_000011.9', + 'assembly_name': 'GRCh37', + 'assembly_alias': 'hg19'} + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gene_location_invalid_gene(api): + """ + Get outer coordinates for gene that does not exist. + """ + with pytest.raises(Fault): + api('getGeneLocation', 'THISISNOTAGENE', 'hg19') + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_get_transcripts_mapping(api): + """ + Test output of getTranscriptsMapping. + """ + r = api('getTranscriptsMapping', 'hg19', 'chr11', 111955524, 111966518) + assert r == [{'cds_start': 111957632, + 'cds_stop': 111965694, + 'name': 'NM_003002', + 'stop': 111966518, + 'start': 111957571, + 'version': 2, + 'gene': 'SDHD', + 'orientation': '+'}, + {'cds_start': 111957492, + 'cds_stop': 111956019, + 'name': 'NM_012459', + 'stop': 111955524, + 'start': 111957522, + 'version': 2, + 'gene': 'TIMM8B', + 'orientation': '-'}, + {'cds_start': None, + 'cds_stop': None, + 'name': 'NR_028383', + 'stop': 111955524, + 'start': 111957522, + 'version': 1, + 'gene': 'TIMM8B', + 'orientation': '-'}] + + +def test_description_extract(api): + """ + Test output of descriptionExtract. + """ + r = api('descriptionExtract', + 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA') + assert r == {'allele': [{'end': 6, + 'deleted': '', + 'weight': 8, + 'inserted': 'TT', + 'start_offset': 0, + 'start': 5, + 'description': '5_6insTT', + 'shift': 1, + 'end_offset': 0, + 'type': 'ins', + 'sample_start': 6, + 'sample_end': 7, + 'sample_start_offset': 0, + 'sample_end_offset': 0}, + {'end': 17, + 'deleted': 'G', + 'weight': 7, + 'inserted': '', + 'start_offset': 0, + 'start': 17, + 'description': '17del', + 'shift': 0, + 'end_offset': 0, + 'type': 'del', + 'sample_start': 18, + 'sample_end': 19, + 'sample_start_offset': 0, + 'sample_end_offset': 0}, + {'end': 26, + 'deleted': 'A', + 'weight': 3, + 'inserted': 'C', + 'start_offset': 0, + 'start': 26, + 'description': '26A>C', + 'shift': 0, + 'end_offset': 0, + 'type': 'subst', + 'sample_start': 27, + 'sample_end': 27, + 'sample_start_offset': 0, + 'sample_end_offset': 0}, + {'end': 35, + 'deleted': '', + 'weight': 5, + 'inserted': 'G', + 'start_offset': 0, + 'start': 35, + 'description': '35dup', + 'shift': 1, + 'end_offset': 0, + 'type': 'dup', + 'sample_start': 37, + 'sample_end': 37, + 'sample_start_offset': 0, + 'sample_end_offset': 0}], + 'description': '[5_6insTT;17del;26A>C;35dup]'} + + +def test_description_extract_ref_too_long(api): + """ + Test output of descriptionExtract with too long reference sequence. + """ + with pytest.raises(Fault): + api('descriptionExtract', + 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1), + 'A') + + +def test_description_extract_sample_too_long(api): + """ + Test output of descriptionExtract with too long sample sequence. + """ + with pytest.raises(Fault): + api('descriptionExtract', + 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH), + 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1)) diff --git a/tests/test_services_soap.py b/tests/test_services_soap.py index e60470ad..f1964b30 100644 --- a/tests/test_services_soap.py +++ b/tests/test_services_soap.py @@ -1,5 +1,5 @@ """ -Tests for the SOAP interface to Mutalyzer. +Tests for the mutalyzer.services.soap module. """ @@ -7,9 +7,7 @@ from __future__ import unicode_literals import bz2 import datetime -import logging import os -import tempfile from Bio import Entrez from mock import patch @@ -19,615 +17,637 @@ from spyne.model.fault import Fault from suds.client import Client import mutalyzer -from mutalyzer.config import settings -from mutalyzer.output import Output from mutalyzer.services.soap import application -from mutalyzer.sync import CacheSync from mutalyzer import Scheduler -from fixtures import database, cache, hg19, hg19_transcript_mappings -from utils import MutalyzerTest -from utils import fix +@pytest.fixture +def server(): + return NullServer(application, ostr=True) -# Suds logs an awful lot of things with level=DEBUG, including entire WSDL -# files and SOAP responses. On any error, this is all dumped to the console, -# which is very unconvenient. The following suppresses most of this. -logging.raiseExceptions = 0 -logging.basicConfig(level=logging.INFO) -for logger in ('suds.metrics', 'suds.wsdl', 'suds.xsd.schema', - 'suds.xsd.sxbasic', 'suds.xsd.sxbase', 'suds.xsd.query', - 'suds.transport.http', 'suds.xsd.deplist', 'suds.mx.core', - 'suds.mx.literal', 'suds.resolver', 'suds.client', - 'suds.umx.typed'): - logging.getLogger(logger).setLevel(logging.ERROR) - -def _write_wsdl(server): +@pytest.fixture +def wsdl(tmpdir, server): + wsdl_file = tmpdir.join('wsdl').ensure() server.doc.wsdl11.build_interface_document('/') - wsdl = tempfile.NamedTemporaryFile(mode='w', delete=False) - wsdl_filename = wsdl.name - wsdl.write(server.doc.wsdl11.get_interface_document()) - wsdl.close() - return wsdl_filename + wsdl_file.write(server.doc.wsdl11.get_interface_document()) + return unicode(wsdl_file) -class TestServicesSoap(MutalyzerTest): - """ - Test the Mutalyzer SOAP interface. - """ - def setup(self): - super(TestServicesSoap, self).setup() - self.server = NullServer(application, ostr=True) - # Unfortunately there's no easy way to just give a SUDS client a - # complete WSDL string, it only accepts a URL to it. So we create one. - self.wsdl = _write_wsdl(self.server) - self.client = Client('file://%s' % self.wsdl, cache=None) +@pytest.fixture +def client(wsdl): + return Client('file://%s' % wsdl, cache=None) - def teardown(self): - super(TestServicesSoap, self).teardown() - os.unlink(self.wsdl) - def _call(self, method, *args, **kwargs): - r = getattr(self.server.service, method)(*args, **kwargs) +@pytest.fixture +def api(server, client): + def call(method, *args, **kwargs): + r = getattr(server.service, method)(*args, **kwargs) # This seems to be the way to feed raw SOAP response strings to a # SUDS client, without having it talking to a real server. - return getattr(self.client.service, method)(__inject={'reply': ''.join(r)}) - - def test_ping(self): - """ - Running the ping method should return 'pong'. - """ - r = self._call('ping') - assert r == 'pong' - - def test_checksyntax_valid(self): - """ - Running checkSyntax with a valid variant name should return True. - """ - r = self._call('checkSyntax', 'AB026906.1:c.274G>T') - assert r.valid == True - - def test_checksyntax_invalid(self): - """ - Running checkSyntax with an invalid variant name should return False - and give at least one error message. - """ - r = self._call('checkSyntax', '0:abcd') - assert r.valid == False - assert len(r.messages.SoapMessage) >= 1 - - def test_checksyntax_empty(self): - """ - Running checkSyntax with no variant name should raise exception. - """ - # The validator doesn't work with NullServer, so we cannot really do - # these type of tests. However, in this case we implemented our own - # check instead of relying on the validator. - # See https://github.com/arskom/spyne/issues/318 - with pytest.raises(Fault): - self._call('checkSyntax') - - @fix(database, hg19, hg19_transcript_mappings) - def test_transcriptinfo_valid(self): - """ - Running transcriptInfo with valid arguments should get us a Transcript - object. - """ - r = self._call('transcriptInfo', - LOVD_ver='123', build='hg19', accNo='NM_002001.2') - assert r.trans_start == -99 - assert r.trans_stop == 1066 - assert r.CDS_stop == 774 - - @fix(database, hg19, hg19_transcript_mappings) - def test_numberconversion_gtoc_valid(self): - """ - Running numberConversion with valid g variant should give a list of - c variant names. - """ - r = self._call('numberConversion', - build='hg19', variant='NC_000001.10:g.159272155del') - assert type(r.string) == list - assert 'NM_002001.2:c.1del' in r.string - - @fix(database, hg19, hg19_transcript_mappings) - def test_numberconversion_ctog_valid(self): - """ - Running numberConversion with valid c variant should give a list of - g variant names. - """ - r = self._call('numberConversion', - build='hg19', variant='NM_002001.2:c.1del') - assert type(r.string) == list - assert 'NC_000001.10:g.159272155del' in r.string - - @fix(database, hg19, hg19_transcript_mappings) - def test_numberconversion_gtoc_gene(self): - """ - Running numberConversion with valid g variant and a gene name should - give a list of c variant names on transcripts for the given gene. - """ - r = self._call('numberConversion', - build='hg19', variant='NC_000023.10:g.32827640G>A', gene='DMD') - assert type(r.string) == list - assert 'NM_004007.2:c.250C>T' in r.string - assert 'NM_004011.3:c.-397314C>T' in r.string - assert 'NM_004019.2:c.-1542694C>T' in r.string - - @fix(database, hg19, hg19_transcript_mappings) - def test_numberconversion_gtoc_no_transcripts(self): - """ - Running numberConversion with valid g variant but no transcripts - close to it should give an empty list. - """ - r = self._call('numberConversion', - build='hg19', variant='chr7:g.345T>C') - assert not r - - @fix(database, hg19, hg19_transcript_mappings) - def test_numberconversion_gtoc_required_gene(self): - """ - Running numberConversion with valid g variant but no transcripts - close to it, but with a gene name, should give a list of c variant - names on transcripts for the given gene. - """ - r = self._call('numberConversion', - build='hg19', variant='chr7:g.345T>C', gene='LOC100132858') - assert type(r.string) == list - # Fix for r536: disable the -u and +d convention. - #assert 'XM_001715131.2:c.1155+d19483A>G' in r.string - assert 'XM_001715131.2:c.*19483A>G' in r.string - - @fix(database, hg19, hg19_transcript_mappings) - def test_gettranscriptsbygenename_valid(self): - """ - Running getTranscriptsByGeneName with valid gene name should give a - list of transcripts. - """ - r = self._call('getTranscriptsByGeneName', - build='hg19', name='DMD') - assert type(r.string) == list - for t in ['NM_004011.3', - 'NM_004019.2', - 'NM_004007.2']: - assert t in r.string - - @fix(database, hg19, hg19_transcript_mappings) - def test_gettranscriptsbygenename_invalid(self): - """ - Running getTranscriptsByGeneName with invalid gene name should not - give a result. - """ - r = self._call('getTranscriptsByGeneName', - build='hg19', name='BOGUSGENE') - assert not r - - @fix(database, cache('AF230870.1')) - def test_gettranscriptsandinfo_valid(self): - """ - Running getTranscriptsAndInfo with a valid genomic reference should - give a list of TranscriptInfo objects. - """ - r = self._call('getTranscriptsAndInfo', 'AF230870.1') - assert type(r.TranscriptInfo) == list - names = [t.name for t in r.TranscriptInfo] - for t in ['mtmC2_v001', - 'mtmB2_v001']: - assert t in names - - @fix(database, cache('AL449423.14')) - def test_gettranscriptsandinfo_restricted_valid(self): - """ - Running getTranscriptsAndInfo with a valid genomic reference and a - gene name should give a list of TranscriptInfo objects restricted - to the gene. - """ - r = self._call('getTranscriptsAndInfo', 'AL449423.14', 'CDKN2A') - assert type(r.TranscriptInfo) == list - names = [t.name for t in r.TranscriptInfo] - for t in ['CDKN2A_v008', - 'CDKN2A_v007']: - assert t in names - for t in ['CDKN2B_v002', - 'CDKN2B_v001', - 'MTAP_v005', - 'C9orf53_v001']: - assert t not in names - - @fix(database, hg19, hg19_transcript_mappings) - def test_gettranscriptsmapping(self): - """ - Running getTranscriptsMapping should give a list of - TranscriptMappingInfo objects. - """ - r = self._call('getTranscriptsMapping', - 'hg19', 'chrX', 31200000, 31210000, 1) - assert type(r.TranscriptMappingInfo) == list - names = [t.name for t in r.TranscriptMappingInfo] - for t in ('NM_004011', - 'NM_004019', - 'NM_004007'): - assert t in names - - @fix(database, hg19, hg19_transcript_mappings) - def test_mappinginfo(self): - """ - Running mappingInfo should give a Mapping object. - """ - r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_001100.3', 'g.112037014G>T') - assert r.endoffset == 117529978 - assert r.start_g == 112037014 - assert r.startoffset == 117529978 - assert r.mutationType == "subst" - assert r.end_g == 112037014 - assert r.startmain == 1388 - assert r.endmain == 1388 - - @fix(database, hg19, hg19_transcript_mappings) - def test_mappinginfo(self): - """ - Running mappingInfo should give a Mapping object. - """ - r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_002001.2', 'g.159272168G>T') - assert r.endoffset == 0 - assert r.start_g == 159272168 - assert r.startoffset == 0 - assert r.mutationType == 'subst' - assert r.end_g == 159272168 - assert r.startmain == 14 - assert r.endmain == 14 - - @fix(database, hg19, hg19_transcript_mappings) - def test_mappinginfo_compound(self): - """ - Running mappingInfo with compound variant should give a Mapping - object. - """ - r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_002001.2', 'g.[159272168G>T;159272174T>A]') - assert r.endoffset == 0 - assert r.start_g == 159272168 - assert r.startoffset == 0 - assert r.mutationType == 'compound' - assert r.end_g == 159272174 - assert r.startmain == 14 - assert r.endmain == 20 - - @fix(database, hg19, hg19_transcript_mappings) - def test_mappinginfo_reverse(self): - """ - Running mappingInfo on a reverse transcript should give a Mapping - object. - """ - r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_004011.3', 'g.31152229_31152239del') - assert r.endoffset == 0 - assert r.start_g == 31152229 - assert r.startoffset == 0 - assert r.mutationType == 'del' - assert r.end_g == 31152239 - assert r.startmain == 6981 - assert r.endmain == 6971 - - @fix(database, hg19, hg19_transcript_mappings) - def test_mappinginfo_compound_reverse(self): - """ - Running mappingInfo with compound variant on a reverse transcript - should give a Mapping object. - """ - r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_004011.3', 'g.[31152229_31152232del;31152235_31152239del]') - assert r.endoffset == 0 - assert r.start_g == 31152229 - assert r.startoffset == 0 - assert r.mutationType == 'compound' - assert r.end_g == 31152239 - assert r.startmain == 6981 - assert r.endmain == 6971 - - def test_info(self): - """ - Running the info method should give us some version information. - """ - r = self._call('info') - assert type(r.versionParts.string) == list - assert r.version == mutalyzer.__version__ - - @fix(database, cache('AB026906.1', 'AL449423.14', 'NM_003002.2')) - def test_getcache(self): - """ - Running the getCache method should give us the expected number of - cache entries. - """ - created_since = datetime.datetime.today() - datetime.timedelta(days=14) - - output = Output(__file__) - sync = CacheSync(output) - - r = self._call('getCache', created_since) - assert len(r.CacheEntry) == 3 - - def test_getdbsnpdescriptions(self): - """ - Running getdbSNPDescriptions method should give us the expected HGVS - descriptions for the given dbSNP id. - """ - # Patch Retriever.snpConvert to return rs9919552. - def mock_efetch(*args, **kwargs): - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'rs9919552.xml.bz2') - return bz2.BZ2File(path) - - with patch.object(Entrez, 'efetch', mock_efetch): - r = self._call('getdbSNPDescriptions', 'rs9919552') - - assert 'NC_000011.9:g.111959625C>T' in r.string - assert 'NG_012337.2:g.7055C>T' in r.string - assert 'NM_003002.3:c.204C>T' in r.string - assert 'NP_002993.1:p.Ser68=' in r.string - - @fix(database, hg19, hg19_transcript_mappings) - def test_gettranscripts(self): - """ - Running getTranscripts should give a list of transcripts. - """ - r = self._call('getTranscripts', - build='hg19', chrom='chrX', pos=32237295) - assert type(r.string) == list - for t in ['NM_004011', - 'NM_004007']: - assert t in r.string - - @fix(database, hg19, hg19_transcript_mappings) - def test_gettranscripts_with_versions(self): - """ - Running getTranscripts with versions=True should give a list - of transcripts with version numbers. - """ - r = self._call('getTranscripts', - build='hg19', chrom='chrX', pos=32237295, versions=True) - assert type(r.string) == list - for t in ['NM_004011.3', - 'NM_004007.2']: - assert t in r.string - - @fix(database, cache('NM_003002.2')) - def test_runmutalyzer(self): - """ - Just a runMutalyzer test. - """ - r = self._call('runMutalyzer', 'NM_003002.2:c.274G>T') - assert r.errors == 0 - assert r.genomicDescription == 'NM_003002.2:n.335G>T' - assert 'NM_003002.2(SDHD_v001):c.274G>T' in r.transcriptDescriptions.string - - @fix(database) - def test_runmutalyzer_reference_info_nm(self): - """ - Get reference info for an NM variant without version. - """ - # Patch GenBankRetriever.fetch to return the contents of NM_003002.2 - # for NM_003002. - def mock_efetch(*args, **kwargs): - if kwargs.get('id') != 'NM_003002': - return Entrez.efetch(*args, **kwargs) - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'NM_003002.2.gb.bz2') - return bz2.BZ2File(path) - - with patch.object(Entrez, 'efetch', mock_efetch): - r = self._call('runMutalyzer', 'NM_003002:c.274G>T') - - assert r.errors == 0 - assert r.referenceId == 'NM_003002.2' - assert r.sourceId == 'NM_003002.2' - assert r.sourceAccession == 'NM_003002' - assert r.sourceVersion == '2' - assert r.sourceGi == '222352156' - assert r.molecule == 'n' - - @fix(database, cache('NM_003002.2')) - def test_runmutalyzer_reference_info_nm_version(self): - """ - Get reference info for an NM variant with version. - """ - r = self._call('runMutalyzer', 'NM_003002.2:c.274G>T') - assert r.errors == 0 - assert r.referenceId == 'NM_003002.2' - assert r.sourceId == 'NM_003002.2' - assert r.sourceAccession == 'NM_003002' - assert r.sourceVersion == '2' - assert r.sourceGi == '222352156' - assert r.molecule == 'n' - - @fix(database, cache('LRG_1')) - def test_runmutalyzer_reference_info_lrg(self): - """ - Get reference info for an LRG variant. - """ - r = self._call('runMutalyzer', 'LRG_1t1:c.266G>T') - assert r.errors == 0 - assert r.referenceId == 'LRG_1' - assert r.sourceId == 'LRG_1' - assert r.molecule == 'g' - - @fix(database, cache('NG_012772.1')) - def test_runmutalyzer_reference_info_ng(self): - """ - Get reference info for an NG variant without version. - """ - # Patch GenBankRetriever.fetch to return the contents of NG_012772.1 - # for NG_012772. - def mock_efetch(*args, **kwargs): - if kwargs.get('id') != 'NG_012772': - return Entrez.efetch(*args, **kwargs) - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'NG_012772.1.gb.bz2') - return bz2.BZ2File(path) - - with patch.object(Entrez, 'efetch', mock_efetch): - r = self._call('runMutalyzer', 'NG_012772:g.18964del') - - assert r.errors == 0 - assert r.referenceId == 'NG_012772.1' - assert r.sourceId == 'NG_012772.1' - assert r.sourceAccession == 'NG_012772' - assert r.sourceVersion == '1' - assert r.sourceGi == '256574794' - assert r.molecule == 'g' - - @fix(database, cache('NG_009105.1')) - def test_runmutalyzer_reference_info_ng_version(self): - """ - Get reference info for an NG variant with version. - """ - r = self._call('runMutalyzer', 'NG_009105.1:g.18964del') - assert r.errors == 0 - assert r.referenceId == 'NG_009105.1' - assert r.sourceId == 'NG_009105.1' - assert r.sourceAccession == 'NG_009105' - assert r.sourceVersion == '1' - assert r.sourceGi == '216548283' - assert r.molecule == 'g' - - @fix(database, cache('NG_012772.1')) - def test_runmutalyzer_reference_info_gi(self): - """ - Get reference info for a GI variant. - """ - r = self._call('runMutalyzer', 'gi256574794:g.18964del') - assert r.errors == 0 - assert r.referenceId == 'NG_012772.1' - assert r.sourceId == 'NG_012772.1' - assert r.sourceAccession == 'NG_012772' - assert r.sourceVersion == '1' - assert r.sourceGi == '256574794' - assert r.molecule == 'g' - - @fix(database, cache('NM_000143.3')) - def test_runmutalyzer_exons(self): - """ - Exon table in runMutalyzer output. - """ - r = self._call('runMutalyzer', 'NM_000143.3:c.630_636del') - assert r.errors == 0 - expected_exons = [(1, 195, '-63', '132'), - (196, 330, '133', '267'), - (331, 441, '268', '378'), - (442, 618, '379', '555'), - (619, 801, '556', '738'), - (802, 967, '739', '904'), - (968, 1171, '905', '1108'), - (1172, 1299, '1109', '1236'), - (1300, 1453, '1237', '1390'), - (1454, 1867, '1391', '*271')] - assert len(r.exons.ExonInfo) == len(expected_exons) - for exon, expected_exon in zip(r.exons.ExonInfo, expected_exons): - assert (exon.gStart, exon.gStop, exon.cStart, exon.cStop) == expected_exon - - @fix(database, cache('AB026906.1', 'NM_003002.2', 'AL449423.14')) - def test_batchjob(self): - """ - Submit a batch job. - """ - variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.2:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - data = '\n'.join(variants) + '\n' #.encode('base64') - - result = self._call('submitBatchJob', data.encode('utf-8'), 'NameChecker') - job_id = unicode(result) - - result = self._call('monitorBatchJob', job_id) - assert int(result) == len(variants) - - scheduler = Scheduler.Scheduler() - scheduler.process() - - result = self._call('monitorBatchJob', job_id) - assert int(result) == 0 - - result = self._call('getBatchJob', job_id) - assert len(result.decode('base64').strip().split('\n')) - 1 == len(variants) - - @fix(database) - def test_batchjob_newlines_unix(self): - """ - Submit a batch job with UNIX newlines. - """ - variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.2:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - data = '\n'.join(variants) + '\n' - - result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') - job_id = unicode(result) - - result = self._call('monitorBatchJob', job_id) - assert int(result) == len(variants) - - scheduler = Scheduler.Scheduler() - scheduler.process() - - result = self._call('monitorBatchJob', job_id) - assert int(result) == 0 - - @fix(database) - def test_batchjob_newlines_mac(self): - """ - Submit a batch job with Mac newlines. - """ - variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.2:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - data = '\r'.join(variants) + '\r' - - result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') - job_id = unicode(result) - - result = self._call('monitorBatchJob', job_id) - assert int(result) == len(variants) - - scheduler = Scheduler.Scheduler() - scheduler.process() - - result = self._call('monitorBatchJob', job_id) - assert int(result) == 0 - - @fix(database) - def test_batchjob_newlines_windows(self): - """ - Submit a batch job with Windows newlines. - """ - variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.2:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - data = '\r\n'.join(variants) + '\r\n' - - result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') - job_id = unicode(result) - - result = self._call('monitorBatchJob', job_id) - assert int(result) == len(variants) - - scheduler = Scheduler.Scheduler() - scheduler.process() - - result = self._call('monitorBatchJob', job_id) - assert int(result) == 0 - - @fix(database) - def test_batchjob_toobig(self): - """ - Submit the batch name checker with a too big input file. - """ - seed = """ + return getattr(client.service, method)(__inject={'reply': ''.join(r)}) + return call + + +def test_ping(api): + """ + Running the ping method should return 'pong'. + """ + r = api('ping') + assert r == 'pong' + + +def test_checksyntax_valid(api): + """ + Running checkSyntax with a valid variant name should return True. + """ + r = api('checkSyntax', 'AB026906.1:c.274G>T') + assert r.valid + + +def test_checksyntax_invalid(api): + """ + Running checkSyntax with an invalid variant name should return False + and give at least one error message. + """ + r = api('checkSyntax', '0:abcd') + assert not r.valid + assert len(r.messages.SoapMessage) >= 1 + + +def test_checksyntax_empty(api): + """ + Running checkSyntax with no variant name should raise exception. + """ + # The validator doesn't work with NullServer, so we cannot really do + # these type of tests. However, in this case we implemented our own + # check instead of relying on the validator. + # See https://github.com/arskom/spyne/issues/318 + with pytest.raises(Fault): + api('checkSyntax') + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_transcriptinfo_valid(api): + """ + Running transcriptInfo with valid arguments should get us a Transcript + object. + """ + r = api('transcriptInfo', + LOVD_ver='123', build='hg19', accNo='NM_002001.2') + assert r.trans_start == -99 + assert r.trans_stop == 1066 + assert r.CDS_stop == 774 + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_numberconversion_gtoc_valid(api): + """ + Running numberConversion with valid g variant should give a list of + c variant names. + """ + r = api('numberConversion', + build='hg19', variant='NC_000001.10:g.159272155del') + assert type(r.string) == list + assert 'NM_002001.2:c.1del' in r.string + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_numberconversion_ctog_valid(api): + """ + Running numberConversion with valid c variant should give a list of + g variant names. + """ + r = api('numberConversion', + build='hg19', variant='NM_002001.2:c.1del') + assert type(r.string) == list + assert 'NC_000001.10:g.159272155del' in r.string + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_numberconversion_gtoc_gene(api): + """ + Running numberConversion with valid g variant and a gene name should + give a list of c variant names on transcripts for the given gene. + """ + r = api('numberConversion', + build='hg19', variant='NC_000023.10:g.32827640G>A', gene='DMD') + assert type(r.string) == list + assert 'NM_004007.2:c.250C>T' in r.string + assert 'NM_004011.3:c.-397314C>T' in r.string + assert 'NM_004019.2:c.-1542694C>T' in r.string + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_numberconversion_gtoc_no_transcripts(api): + """ + Running numberConversion with valid g variant but no transcripts + close to it should give an empty list. + """ + r = api('numberConversion', + build='hg19', variant='chr7:g.345T>C') + assert not r + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_numberconversion_gtoc_required_gene(api): + """ + Running numberConversion with valid g variant but no transcripts + close to it, but with a gene name, should give a list of c variant + names on transcripts for the given gene. + """ + r = api('numberConversion', + build='hg19', variant='chr7:g.345T>C', gene='LOC100132858') + assert type(r.string) == list + # Fix for r536: disable the -u and +d convention. + # assert 'XM_001715131.2:c.1155+d19483A>G' in r.string + assert 'XM_001715131.2:c.*19483A>G' in r.string + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gettranscriptsbygenename_valid(api): + """ + Running getTranscriptsByGeneName with valid gene name should give a + list of transcripts. + """ + r = api('getTranscriptsByGeneName', build='hg19', name='DMD') + assert type(r.string) == list + for t in ['NM_004011.3', + 'NM_004019.2', + 'NM_004007.2']: + assert t in r.string + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gettranscriptsbygenename_invalid(api): + """ + Running getTranscriptsByGeneName with invalid gene name should not + give a result. + """ + r = api('getTranscriptsByGeneName', build='hg19', name='BOGUSGENE') + assert not r + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['AF230870.1']], indirect=True) +def test_gettranscriptsandinfo_valid(api): + """ + Running getTranscriptsAndInfo with a valid genomic reference should + give a list of TranscriptInfo objects. + """ + r = api('getTranscriptsAndInfo', 'AF230870.1') + assert type(r.TranscriptInfo) == list + names = [t.name for t in r.TranscriptInfo] + for t in ['mtmC2_v001', + 'mtmB2_v001']: + assert t in names + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_gettranscriptsandinfo_restricted_valid(api): + """ + Running getTranscriptsAndInfo with a valid genomic reference and a + gene name should give a list of TranscriptInfo objects restricted + to the gene. + """ + r = api('getTranscriptsAndInfo', 'AL449423.14', 'CDKN2A') + assert type(r.TranscriptInfo) == list + names = [t.name for t in r.TranscriptInfo] + for t in ['CDKN2A_v008', + 'CDKN2A_v007']: + assert t in names + for t in ['CDKN2B_v002', + 'CDKN2B_v001', + 'MTAP_v005', + 'C9orf53_v001']: + assert t not in names + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gettranscriptsmapping(api): + """ + Running getTranscriptsMapping should give a list of + TranscriptMappingInfo objects. + """ + r = api('getTranscriptsMapping', + 'hg19', 'chrX', 31200000, 31210000, 1) + assert type(r.TranscriptMappingInfo) == list + names = [t.name for t in r.TranscriptMappingInfo] + for t in ('NM_004011', + 'NM_004019', + 'NM_004007'): + assert t in names + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_mappinginfo(api): + """ + Running mappingInfo should give a Mapping object. + """ + r = api('mappingInfo', + '3.0-beta-06', 'hg19', 'NM_001100.3', 'g.112037014G>T') + assert r.endoffset == 117529978 + assert r.start_g == 112037014 + assert r.startoffset == 117529978 + assert r.mutationType == "subst" + assert r.end_g == 112037014 + assert r.startmain == 1388 + assert r.endmain == 1388 + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_mappinginfo(api): + """ + Running mappingInfo should give a Mapping object. + """ + r = api('mappingInfo', + '3.0-beta-06', 'hg19', 'NM_002001.2', 'g.159272168G>T') + assert r.endoffset == 0 + assert r.start_g == 159272168 + assert r.startoffset == 0 + assert r.mutationType == 'subst' + assert r.end_g == 159272168 + assert r.startmain == 14 + assert r.endmain == 14 + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_mappinginfo_compound(api): + """ + Running mappingInfo with compound variant should give a Mapping + object. + """ + r = api('mappingInfo', + '3.0-beta-06', 'hg19', 'NM_002001.2', 'g.[159272168G>T;159272174T>A]') + assert r.endoffset == 0 + assert r.start_g == 159272168 + assert r.startoffset == 0 + assert r.mutationType == 'compound' + assert r.end_g == 159272174 + assert r.startmain == 14 + assert r.endmain == 20 + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_mappinginfo_reverse(api): + """ + Running mappingInfo on a reverse transcript should give a Mapping + object. + """ + r = api('mappingInfo', + '3.0-beta-06', 'hg19', 'NM_004011.3', 'g.31152229_31152239del') + assert r.endoffset == 0 + assert r.start_g == 31152229 + assert r.startoffset == 0 + assert r.mutationType == 'del' + assert r.end_g == 31152239 + assert r.startmain == 6981 + assert r.endmain == 6971 + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_mappinginfo_compound_reverse(api): + """ + Running mappingInfo with compound variant on a reverse transcript + should give a Mapping object. + """ + r = api('mappingInfo', + '3.0-beta-06', 'hg19', 'NM_004011.3', 'g.[31152229_31152232del;31152235_31152239del]') + assert r.endoffset == 0 + assert r.start_g == 31152229 + assert r.startoffset == 0 + assert r.mutationType == 'compound' + assert r.end_g == 31152239 + assert r.startmain == 6981 + assert r.endmain == 6971 + + +def test_info(api): + """ + Running the info method should give us some version information. + """ + r = api('info') + assert type(r.versionParts.string) == list + assert r.version == mutalyzer.__version__ + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize( + 'references', [['AB026906.1', 'AL449423.14', 'NM_003002.2']], indirect=True) +def test_getcache(output, api): + """ + Running the getCache method should give us the expected number of + cache entries. + """ + created_since = datetime.datetime.today() - datetime.timedelta(days=14) + r = api('getCache', created_since) + assert len(r.CacheEntry) == 3 + + +def test_getdbsnpdescriptions(api): + """ + Running getdbSNPDescriptions method should give us the expected HGVS + descriptions for the given dbSNP id. + """ + # Patch Retriever.snpConvert to return rs9919552. + def mock_efetch(*args, **kwargs): + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'rs9919552.xml.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + r = api('getdbSNPDescriptions', 'rs9919552') + + assert 'NC_000011.9:g.111959625C>T' in r.string + assert 'NG_012337.2:g.7055C>T' in r.string + assert 'NM_003002.3:c.204C>T' in r.string + assert 'NP_002993.1:p.Ser68=' in r.string + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gettranscripts(api): + """ + Running getTranscripts should give a list of transcripts. + """ + r = api('getTranscripts', + build='hg19', chrom='chrX', pos=32237295) + assert type(r.string) == list + for t in ['NM_004011', + 'NM_004007']: + assert t in r.string + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_gettranscripts_with_versions(api): + """ + Running getTranscripts with versions=True should give a list + of transcripts with version numbers. + """ + r = api('getTranscripts', + build='hg19', chrom='chrX', pos=32237295, versions=True) + assert type(r.string) == list + for t in ['NM_004011.3', + 'NM_004007.2']: + assert t in r.string + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_runmutalyzer(api): + """ + Just a runMutalyzer test. + """ + r = api('runMutalyzer', 'NM_003002.2:c.274G>T') + assert r.errors == 0 + assert r.genomicDescription == 'NM_003002.2:n.335G>T' + assert 'NM_003002.2(SDHD_v001):c.274G>T' in r.transcriptDescriptions.string + + +@pytest.mark.usefixtures('db') +def test_runmutalyzer_reference_info_nm(api): + """ + Get reference info for an NM variant without version. + """ + # Patch GenBankRetriever.fetch to return the contents of NM_003002.2 + # for NM_003002. + def mock_efetch(*args, **kwargs): + if kwargs.get('id') != 'NM_003002': + return Entrez.efetch(*args, **kwargs) + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'NM_003002.2.gb.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + r = api('runMutalyzer', 'NM_003002:c.274G>T') + + assert r.errors == 0 + assert r.referenceId == 'NM_003002.2' + assert r.sourceId == 'NM_003002.2' + assert r.sourceAccession == 'NM_003002' + assert r.sourceVersion == '2' + assert r.sourceGi == '222352156' + assert r.molecule == 'n' + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_runmutalyzer_reference_info_nm_version(api): + """ + Get reference info for an NM variant with version. + """ + r = api('runMutalyzer', 'NM_003002.2:c.274G>T') + assert r.errors == 0 + assert r.referenceId == 'NM_003002.2' + assert r.sourceId == 'NM_003002.2' + assert r.sourceAccession == 'NM_003002' + assert r.sourceVersion == '2' + assert r.sourceGi == '222352156' + assert r.molecule == 'n' + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['LRG_1']], indirect=True) +def test_runmutalyzer_reference_info_lrg(api): + """ + Get reference info for an LRG variant. + """ + r = api('runMutalyzer', 'LRG_1t1:c.266G>T') + assert r.errors == 0 + assert r.referenceId == 'LRG_1' + assert r.sourceId == 'LRG_1' + assert r.molecule == 'g' + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_runmutalyzer_reference_info_ng(api): + """ + Get reference info for an NG variant without version. + """ + # Patch GenBankRetriever.fetch to return the contents of NG_012772.1 + # for NG_012772. + def mock_efetch(*args, **kwargs): + if kwargs.get('id') != 'NG_012772': + return Entrez.efetch(*args, **kwargs) + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'NG_012772.1.gb.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + r = api('runMutalyzer', 'NG_012772:g.18964del') + + assert r.errors == 0 + assert r.referenceId == 'NG_012772.1' + assert r.sourceId == 'NG_012772.1' + assert r.sourceAccession == 'NG_012772' + assert r.sourceVersion == '1' + assert r.sourceGi == '256574794' + assert r.molecule == 'g' + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NG_009105.1']], indirect=True) +def test_runmutalyzer_reference_info_ng_version(api): + """ + Get reference info for an NG variant with version. + """ + r = api('runMutalyzer', 'NG_009105.1:g.18964del') + assert r.errors == 0 + assert r.referenceId == 'NG_009105.1' + assert r.sourceId == 'NG_009105.1' + assert r.sourceAccession == 'NG_009105' + assert r.sourceVersion == '1' + assert r.sourceGi == '216548283' + assert r.molecule == 'g' + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_runmutalyzer_reference_info_gi(api): + """ + Get reference info for a GI variant. + """ + r = api('runMutalyzer', 'gi256574794:g.18964del') + assert r.errors == 0 + assert r.referenceId == 'NG_012772.1' + assert r.sourceId == 'NG_012772.1' + assert r.sourceAccession == 'NG_012772' + assert r.sourceVersion == '1' + assert r.sourceGi == '256574794' + assert r.molecule == 'g' + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NM_000143.3']], indirect=True) +def test_runmutalyzer_exons(api): + """ + Exon table in runMutalyzer output. + """ + r = api('runMutalyzer', 'NM_000143.3:c.630_636del') + assert r.errors == 0 + expected_exons = [(1, 195, '-63', '132'), + (196, 330, '133', '267'), + (331, 441, '268', '378'), + (442, 618, '379', '555'), + (619, 801, '556', '738'), + (802, 967, '739', '904'), + (968, 1171, '905', '1108'), + (1172, 1299, '1109', '1236'), + (1300, 1453, '1237', '1390'), + (1454, 1867, '1391', '*271')] + assert len(r.exons.ExonInfo) == len(expected_exons) + for exon, expected_exon in zip(r.exons.ExonInfo, expected_exons): + assert (exon.gStart, exon.gStop, exon.cStart, exon.cStop) == expected_exon + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize( + 'references', [['AB026906.1', 'NM_003002.2', 'AL449423.14']], + indirect=True) +def test_batchjob(api): + """ + Submit a batch job. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.2:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + data = '\n'.join(variants) + '\n' #.encode('base64') + + result = api('submitBatchJob', data.encode('utf-8'), 'NameChecker') + job_id = unicode(result) + + result = api('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = api('monitorBatchJob', job_id) + assert int(result) == 0 + + result = api('getBatchJob', job_id) + assert len(result.decode('base64').strip().split('\n')) - 1 == len(variants) + + +@pytest.mark.usefixtures('db') +def test_batchjob_newlines_unix(api): + """ + Submit a batch job with UNIX newlines. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.2:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + data = '\n'.join(variants) + '\n' + + result = api('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = api('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = api('monitorBatchJob', job_id) + assert int(result) == 0 + + +@pytest.mark.usefixtures('db') +def test_batchjob_newlines_mac(api): + """ + Submit a batch job with Mac newlines. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.2:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + data = '\r'.join(variants) + '\r' + + result = api('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = api('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = api('monitorBatchJob', job_id) + assert int(result) == 0 + + +@pytest.mark.usefixtures('db') +def test_batchjob_newlines_windows(api): + """ + Submit a batch job with Windows newlines. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.2:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + data = '\r\n'.join(variants) + '\r\n' + + result = api('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = api('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = api('monitorBatchJob', job_id) + assert int(result) == 0 + + +@pytest.mark.usefixtures('db') +def test_batchjob_toobig(settings, api): + """ + Submit the batch name checker with a too big input file. + """ + seed = """ Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis @@ -636,183 +656,188 @@ hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.""" - data = seed - # Very crude way of creating something big. - while len(data) <= settings.MAX_FILE_SIZE: - data += data - - try: - self._call('submitBatchJob', data.encode('utf-8'), 'NameChecker') - assert False - except Fault as e: - # - senv:Client.RequestTooLong: Raised by Spyne, depending on - # the max_content_length argument to the HttpBase constructor. - # - EMAXSIZE: Raised by Mutalyzer, depending on the - # batchInputMaxSize configuration setting. - assert e.faultcode in ('senv:Client.RequestTooLong', 'EMAXSIZE') - - @fix(database) - def test_upload_local_genbank(self): - """ - Upload local genbank file. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'AB026906.1.gb.bz2') - with bz2.BZ2File(path) as f: - data = f.read() - - result = self._call('uploadGenBankLocalFile', data) - ud = unicode(result) - - r = self._call('runMutalyzer', ud + '(SDHD):g.7872G>T') - assert r.errors == 0 - assert r.genomicDescription == ud + ':g.7872G>T' - assert ud + '(SDHD_v001):c.274G>T' in r.transcriptDescriptions.string - - def test_checksyntax_unicode(self): - """ - Run checkSyntax with an invalid variant description containing - non-ASCII unicode characters. - """ - r = self._call('checkSyntax', 'La Pe\xf1a') - assert r.valid == False - assert len(r.messages.SoapMessage) == 1 - assert r.messages.SoapMessage[0]['errorcode'] == 'EPARSE' - assert r.messages.SoapMessage[0]['message'] == 'Expected W:(0123...) (at char 2), (line:1, col:3)' - - @fix(database) - def test_batchjob_unicode(self): - """ - Submit a batch job with non-ASCII unicode characters in the input - file. - """ - variants = ['\u2026AB026906.1:c.274G>T', - '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] - expected = [['\u2026AB026906.1:c.274G>T', - '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], - ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', - '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] - - data = '\n'.join(variants) + '\n' #.encode('base64') - - result = self._call('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') - job_id = unicode(result) - - result = self._call('monitorBatchJob', job_id) - assert int(result) == len(variants) - - scheduler = Scheduler.Scheduler() - scheduler.process() - - result = self._call('monitorBatchJob', job_id) - assert int(result) == 0 - - result = self._call('getBatchJob', job_id) - result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] - assert expected == [line.split('\t') for line in result] - - @fix(database, hg19, hg19_transcript_mappings) - def test_get_transcripts_mapping(self): - """ - Test output of getTranscriptsMapping. - """ - r = self._call('getTranscriptsMapping', 'hg19', 'chr11', - 111955524, 111966518) - assert len(r.TranscriptMappingInfo) == 3 - assert all(all(t_real[k] == t_expected[k] for k in t_expected) - for t_real, t_expected in - zip(r.TranscriptMappingInfo, [{'cds_start': 111957632, - 'cds_stop': 111965694, - 'name': 'NM_003002', - 'stop': 111966518, - 'start': 111957571, - 'version': 2, - 'gene': 'SDHD', - 'orientation': '+'}, - {'cds_start': 111957492, - 'cds_stop': 111956019, - 'name': 'NM_012459', - 'stop': 111955524, - 'start': 111957522, - 'version': 2, - 'gene': 'TIMM8B', - 'orientation': '-'}, - {'cds_start': None, - 'cds_stop': None, - 'name': 'NR_028383', - 'stop': 111955524, - 'start': 111957522, - 'version': 1, - 'gene': 'TIMM8B', - 'orientation': '-'}])) - - def test_description_extract(self): - """ - Test output of descriptionExtract. - """ - r = self._call('descriptionExtract', - 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA') - assert r['description'] == '[5_6insTT;17del;26A>C;35dup]' - assert len(r['allele'].RawVar) == 4 - # For some reason, we get the empty string as `None` in SOAP. - assert all(all((v_real[k] == v_expected[k]) or not(v_real[k] or v_expected[k]) - for k in v_expected) - for v_real, v_expected in - zip(r['allele'].RawVar, [{'end': 6, - 'deleted': '', - 'weight': 8, - 'inserted': 'TT', - 'start_offset': 0, - 'start': 5, - 'description': '5_6insTT', - 'shift': 1, - 'end_offset': 0, - 'type': 'ins', - 'sample_start': 6, - 'sample_end': 7, - 'sample_start_offset': 0, - 'sample_end_offset': 0}, - {'end': 17, - 'deleted': 'G', - 'weight': 7, - 'inserted': '', - 'start_offset': 0, - 'start': 17, - 'description': '17del', - 'shift': 0, - 'end_offset': 0, - 'type': 'del', - 'sample_start': 18, - 'sample_end': 19, - 'sample_start_offset': 0, - 'sample_end_offset': 0}, - {'end': 26, - 'deleted': 'A', - 'weight': 3, - 'inserted': 'C', - 'start_offset': 0, - 'start': 26, - 'description': '26A>C', - 'shift': 0, - 'end_offset': 0, - 'type': 'subst', - 'sample_start': 27, - 'sample_end': 27, - 'sample_start_offset': 0, - 'sample_end_offset': 0}, - {'end': 35, - 'deleted': '', - 'weight': 5, - 'inserted': 'G', - 'start_offset': 0, - 'start': 35, - 'description': '35dup', - 'shift': 1, - 'end_offset': 0, - 'type': 'dup', - 'sample_start': 37, - 'sample_end': 37, - 'sample_start_offset': 0, - 'sample_end_offset': 0}])) + data = seed + # Very crude way of creating something big. + while len(data) <= settings.MAX_FILE_SIZE: + data += data + + try: + api('submitBatchJob', data.encode('utf-8'), 'NameChecker') + assert False + except Fault as e: + # - senv:Client.RequestTooLong: Raised by Spyne, depending on + # the max_content_length argument to the HttpBase constructor. + # - EMAXSIZE: Raised by Mutalyzer, depending on the + # batchInputMaxSize configuration setting. + assert e.faultcode in ('senv:Client.RequestTooLong', 'EMAXSIZE') + + +@pytest.mark.usefixtures('db') +def test_upload_local_genbank(api): + """ + Upload local genbank file. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'AB026906.1.gb.bz2') + with bz2.BZ2File(path) as f: + data = f.read() + + result = api('uploadGenBankLocalFile', data) + ud = unicode(result) + + r = api('runMutalyzer', ud + '(SDHD):g.7872G>T') + assert r.errors == 0 + assert r.genomicDescription == ud + ':g.7872G>T' + assert ud + '(SDHD_v001):c.274G>T' in r.transcriptDescriptions.string + + +def test_checksyntax_unicode(api): + """ + Run checkSyntax with an invalid variant description containing + non-ASCII unicode characters. + """ + r = api('checkSyntax', 'La Pe\xf1a') + assert not r.valid + assert len(r.messages.SoapMessage) == 1 + assert r.messages.SoapMessage[0]['errorcode'] == 'EPARSE' + assert r.messages.SoapMessage[0]['message'] == 'Expected W:(0123...) (at char 2), (line:1, col:3)' + + +@pytest.mark.usefixtures('db') +def test_batchjob_unicode(api): + """ + Submit a batch job with non-ASCII unicode characters in the input + file. + """ + variants = ['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del'] + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = '\n'.join(variants) + '\n' # .encode('base64') + + result = api('submitBatchJob', data.encode('utf-8'), 'SyntaxChecker') + job_id = unicode(result) + + result = api('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = api('monitorBatchJob', job_id) + assert int(result) == 0 + + result = api('getBatchJob', job_id) + result = result.decode('base64').decode('utf-8').strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_get_transcripts_mapping(api): + """ + Test output of getTranscriptsMapping. + """ + r = api('getTranscriptsMapping', 'hg19', 'chr11', + 111955524, 111966518) + assert len(r.TranscriptMappingInfo) == 3 + assert all(all(t_real[k] == t_expected[k] for k in t_expected) + for t_real, t_expected in + zip(r.TranscriptMappingInfo, [{'cds_start': 111957632, + 'cds_stop': 111965694, + 'name': 'NM_003002', + 'stop': 111966518, + 'start': 111957571, + 'version': 2, + 'gene': 'SDHD', + 'orientation': '+'}, + {'cds_start': 111957492, + 'cds_stop': 111956019, + 'name': 'NM_012459', + 'stop': 111955524, + 'start': 111957522, + 'version': 2, + 'gene': 'TIMM8B', + 'orientation': '-'}, + {'cds_start': None, + 'cds_stop': None, + 'name': 'NR_028383', + 'stop': 111955524, + 'start': 111957522, + 'version': 1, + 'gene': 'TIMM8B', + 'orientation': '-'}])) + + +def test_description_extract(api): + """ + Test output of descriptionExtract. + """ + r = api('descriptionExtract', + 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA') + assert r['description'] == '[5_6insTT;17del;26A>C;35dup]' + assert len(r['allele'].RawVar) == 4 + # For some reason, we get the empty string as `None` in SOAP. + assert all(all((v_real[k] == v_expected[k]) or not(v_real[k] or v_expected[k]) + for k in v_expected) + for v_real, v_expected in + zip(r['allele'].RawVar, [{'end': 6, + 'deleted': '', + 'weight': 8, + 'inserted': 'TT', + 'start_offset': 0, + 'start': 5, + 'description': '5_6insTT', + 'shift': 1, + 'end_offset': 0, + 'type': 'ins', + 'sample_start': 6, + 'sample_end': 7, + 'sample_start_offset': 0, + 'sample_end_offset': 0}, + {'end': 17, + 'deleted': 'G', + 'weight': 7, + 'inserted': '', + 'start_offset': 0, + 'start': 17, + 'description': '17del', + 'shift': 0, + 'end_offset': 0, + 'type': 'del', + 'sample_start': 18, + 'sample_end': 19, + 'sample_start_offset': 0, + 'sample_end_offset': 0}, + {'end': 26, + 'deleted': 'A', + 'weight': 3, + 'inserted': 'C', + 'start_offset': 0, + 'start': 26, + 'description': '26A>C', + 'shift': 0, + 'end_offset': 0, + 'type': 'subst', + 'sample_start': 27, + 'sample_end': 27, + 'sample_start_offset': 0, + 'sample_end_offset': 0}, + {'end': 35, + 'deleted': '', + 'weight': 5, + 'inserted': 'G', + 'start_offset': 0, + 'start': 35, + 'description': '35dup', + 'shift': 1, + 'end_offset': 0, + 'type': 'dup', + 'sample_start': 37, + 'sample_end': 37, + 'sample_start_offset': 0, + 'sample_end_offset': 0}])) diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py index 3fac19f9..f1159049 100644 --- a/tests/test_variantchecker.py +++ b/tests/test_variantchecker.py @@ -1,1475 +1,1581 @@ """ -Tests for the variantchecker module. +Tests for the mutalyzer.variantchecker module. """ from __future__ import unicode_literals -#import logging; logging.basicConfig() +import pytest -from mutalyzer.output import Output from mutalyzer.variantchecker import check_variant -from fixtures import REFERENCES -from fixtures import database, cache, hg19, hg19_transcript_mappings -from utils import MutalyzerTest -from utils import fix - # Todo: We had a test for checking a variant on a CONTIG RefSeq reference # (NG_005990.1), but instead we should have separate tests for the retriever # module, including a test for fetching a CONTIG RefSeq reference. -class TestVariantchecker(MutalyzerTest): - """ - Test the variantchecker module. - """ - fixtures = (database, ) - - def setup(self): - """ - Initialize test variantchecker module. - """ - super(TestVariantchecker, self).setup() - self.output = Output(__file__) - - @fix(cache('AL449423.14')) - def test_deletion_in_frame(self): - """ - Simple in-frame deletion should give a simple description on protein - level. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_163del', self.output) - assert (self.output.getIndexedOutput('genomicDescription', 0) == - 'AL449423.14:g.61937_61939del') - assert 'AL449423.14(CDKN2A_v001):c.161_163del' \ - in self.output.getOutput('descriptions') - assert 'AL449423.14(CDKN2A_i001):p.(Met54_Gly55delinsSer)' \ - in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newProtein') - - @fix(cache('AL449423.14')) - def test_insertion_in_frame(self): - """ - Simple in-frame insertion should give a simple description on protein - level. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_162insATC', self.output) - assert (self.output.getIndexedOutput('genomicDescription', 0) == - 'AL449423.14:g.61938_61939insGAT') - assert 'AL449423.14(CDKN2A_v001):c.161_162insATC' \ - in self.output.getOutput('descriptions') - assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsIleSer)' \ - in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newProtein') - - @fix(cache('AL449423.14')) - def test_insertion_list_in_frame(self): - """ - Simple in-frame insertion of a list should give a simple description - on protein level. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_162ins[ATC]', self.output) - assert (self.output.getIndexedOutput('genomicDescription', 0) == - 'AL449423.14:g.61938_61939insGAT') - assert 'AL449423.14(CDKN2A_v001):c.161_162insATC' \ - in self.output.getOutput('descriptions') - assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsIleSer)' \ - in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newProtein') - - @fix(cache('AL449423.14')) - def test_deletion_insertion_in_frame(self): - """ - Simple in-frame deletion/insertion should give a simple description on - protein level. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_162delinsATCCC', - self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61938_61939delinsGGGAT' - assert 'AL449423.14(CDKN2A_v001):c.161_162delinsATCCC' \ - in self.output.getOutput('descriptions') - assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ - in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newProtein') - - @fix(cache('AL449423.14')) - def test_deletion_insertion_list_in_frame(self): - """ - Simple in-frame deletion-insertion of a list should give a simple - description on protein level. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_162delins[ATCCC]', - self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61938_61939delinsGGGAT' - assert 'AL449423.14(CDKN2A_v001):c.161_162delinsATCCC' \ - in self.output.getOutput('descriptions') - assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ - in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newProtein') - - @fix(cache('AL449423.14')) - def test_deletion_insertion_in_frame_complete(self): - """ - Simple in-frame deletion/insertion should give a simple description on - protein level, also with the optional deleted sequence argument. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_162delTGinsATCCC', - self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61938_61939delinsGGGAT' - assert 'AL449423.14(CDKN2A_v001):c.161_162delinsATCCC' \ - in self.output.getOutput('descriptions') - assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ - in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newProtein') - - @fix(cache('AL449423.14')) - def test_deletion_insertion_list_in_frame_complete(self): - """ - Simple in-frame deletion-insertion of a list should give a simple - description on protein level, also with the optional deleted sequence - argument. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_162delTGins[ATCCC]', - self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61938_61939delinsGGGAT' - assert 'AL449423.14(CDKN2A_v001):c.161_162delinsATCCC' \ - in self.output.getOutput('descriptions') - assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ - in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newProtein') - - @fix(cache('NM_003002.2')) - def test_est_warning_nm_est(self): - """ - Warning for EST positioning on NM reference. - """ - check_variant('NM_003002.2:274del', self.output) - west = self.output.getMessagesWithErrorCode('WEST') - assert len(west) == 1 - - @fix(cache('NM_003002.2')) - def test_no_est_warning_nm_c(self): - """ - No EST warning for c. positioning on NM reference. - """ - check_variant('NM_003002.2:c.274del', self.output) - west = self.output.getMessagesWithErrorCode('WEST') - assert len(west) == 0 - - @fix(cache('NM_003002.2')) - def test_no_est_warning_nm_n(self): - """ - No EST warning for n. positioning on NM reference. - """ - check_variant('NM_003002.2:n.274del', self.output) - west = self.output.getMessagesWithErrorCode('WEST') - assert len(west) == 0 - - @fix(cache('NG_012772.1')) - def test_est_warning_ng_est(self): - """ - Warning for EST positioning on NG reference. - """ - check_variant('NG_012772.1:128del', self.output) - west = self.output.getMessagesWithErrorCode('WEST') - assert len(west) == 1 - - @fix(cache('NG_012772.1')) - def test_no_est_warning_ng_g(self): - """ - No EST warning for g. positioning on NG reference. - """ - check_variant('NG_012772.1:g.128del', self.output) - west = self.output.getMessagesWithErrorCode('WEST') - assert len(west) == 0 - - @fix(cache('AA010203.1')) - def test_no_est_warning_est_est(self): - """ - No warning for EST positioning on EST reference. - """ - check_variant('AA010203.1:54_55insG', self.output) - west = self.output.getMessagesWithErrorCode('WEST') - assert len(west) == 0 - - @fix(cache('NM_003002.2')) - def test_roll(self): - """ - Just a variant where we should roll. - """ - check_variant('NM_003002.2:c.273del', self.output) - wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') - assert len(wroll) > 0 - - @fix(cache('NM_003002.2')) - def test_no_roll(self): - """ - Just a variant where we cannot roll. - """ - check_variant('NM_003002.2:c.274del', self.output) - wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') - assert len(wroll) == 0 - - @fix(cache('NM_000088.3')) - def test_no_roll_splice(self): - """ - Here we can roll but should not, because it is over a splice site. - """ - check_variant('NM_000088.3:g.459del', self.output) - wrollback = self.output.getMessagesWithErrorCode('IROLLBACK') - assert len(wrollback) > 0 - wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') - assert len(wroll) == 0 - - @fix(cache('NM_000088.3')) - def test_partial_roll_splice(self): - """ - Here we can roll two positions, but should roll only one because - otherwise it is over a splice site. - """ - check_variant('NM_000088.3:g.494del', self.output) - wrollback = self.output.getMessagesWithErrorCode('IROLLBACK') - assert len(wrollback) > 0 - wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') - assert len(wroll) > 0 - - @fix(cache('NM_000088.3')) - def test_roll_after_splice(self): - """ - Here we can roll and should, we stay in the same exon. - """ - check_variant('NM_000088.3:g.460del', self.output) - wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') - assert len(wroll) > 0 - - @fix(cache('AL449423.14')) - def test_roll_both_ins(self): - """ - Insertion that rolls should not use the same inserted sequence in - descriptions on forward and reverse strands. - - Here we have the following situation on the forward strand: - - 65470 (genomic) - | - CGGTGCGTTGGGCAGCGCCCCCGCCTCCAGCAGCGCCCGCACCTCCTCTA - - Now, an insertion of TAC after 65470 should be rolled to an insertion - of ACT after 65471: - - CGGTGCGTTGGGCAGCGCCCCCGCC --- TCCAGCAGCGCCCGCACCTCCTCTA - CGGTGCGTTGGGCAGCGCCCCCGCC TAC TCCAGCAGCGCCCGCACCTCCTCTA => - - CGGTGCGTTGGGCAGCGCCCCCGCCT --- CCAGCAGCGCCCGCACCTCCTCTA - CGGTGCGTTGGGCAGCGCCCCCGCCT ACT CCAGCAGCGCCCGCACCTCCTCTA - - However, in CDKN2A_v001 (on the reverse strand), this insertion should - roll the other direction and the inserted sequence should be the reverse - complement of CTA, which is TAG, and not that of ACT, which is AGT. - - The next test (test_roll_reverse_ins) tests the situation for an input - of AL449423.14:g.65471_65472insACT, where only the reverse roll should - be done. - """ - check_variant('AL449423.14:g.65470_65471insTAC', self.output) - assert 'AL449423.14(CDKN2A_v001):c.99_100insTAG' in self.output.getOutput('descriptions') - assert 'AL449423.14:g.65471_65472insACT' == self.output.getIndexedOutput('genomicDescription', 0, '') - assert len(self.output.getMessagesWithErrorCode('WROLLFORWARD')) == 1 - - @fix(cache('AL449423.14')) - def test_roll_reverse_ins(self): - """ - Insertion that rolls on the reverse strand should not use the same - inserted sequence in descriptions on forward and reverse strands. - """ - check_variant('AL449423.14:g.65471_65472insACT', self.output) - assert 'AL449423.14(CDKN2A_v001):c.99_100insTAG' in self.output.getOutput('descriptions') - assert 'AL449423.14:g.65471_65472insACT' == self.output.getIndexedOutput('genomicDescription', 0, '') - assert len(self.output.getMessagesWithErrorCode('WROLLFORWARD')) == 0 - - @fix(cache('AL449423.14')) - def test_roll_message_forward(self): - """ - Roll warning message should only be shown for currently selected - strand (forward). - """ - check_variant('AL449423.14:g.65470_65471insTAC', self.output) - assert len(self.output.getMessagesWithErrorCode('WROLLFORWARD')) == 1 - assert len(self.output.getMessagesWithErrorCode('WROLLREVERSE')) == 0 - - @fix(cache('AL449423.14')) - def test_roll_message_reverse(self): - """ - Roll warning message should only be shown for currently selected - strand (reverse). - """ - check_variant('AL449423.14(CDKN2A_v001):c.98_99insGTA', self.output) - assert len(self.output.getMessagesWithErrorCode('WROLLFORWARD')) == 0 - assert len(self.output.getMessagesWithErrorCode('WROLLREVERSE')) == 1 - - @fix(cache('NM_000143.3')) - def test_ins_cds_start(self): - """ - Insertion on CDS start boundary should not be included in CDS. - """ - check_variant('NM_000143.3:c.-1_1insCAT', self.output) - assert self.output.getIndexedOutput("newProtein", 0) == None - # Todo: Is this a good test? - - @fix(cache('NM_000143.3')) - def test_ins_cds_start_after(self): - """ - Insertion after CDS start boundary should be included in CDS. - """ - check_variant('NM_000143.3:c.1_2insCAT', self.output) - assert self.output.getIndexedOutput("newProtein", 0) == '?' - # Todo: Is this a good test? - - @fix(cache('NG_012772.1')) - def test_del_splice_site(self): - """ - Deletion hitting one splice site should not do a protein prediction. - """ - check_variant('NG_012772.1(BRCA2_v001):c.632-5_670del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert self.output.getOutput('removedSpliceSites') == [] - # Todo: For now, the following is how to check if no protein - # prediction is done. - assert not self.output.getOutput('newProtein') - - @fix(cache('NG_012772.1')) - def test_del_exon(self): - """ - Deletion of an entire exon should be possible. - """ - check_variant('NG_012772.1(BRCA2_v001):c.632-5_681+7del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert self.output.getOutput('removedSpliceSites') == [2] - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - - @fix(cache('NG_012772.1')) - def test_del_exon_exact(self): - """ - Deletion of exactly an exon should be possible. - """ - check_variant('NG_012772.1(BRCA2_v001):c.632_681del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) == 0 - assert self.output.getOutput('removedSpliceSites') == [2] - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - - @fix(cache('NG_012772.1')) - def test_del_exon_in_frame(self): - """ - Deletion of an entire exon with length a triplicate should give a - proteine product with just this deletion (and possibly substitutions - directly before and after). - - NG_012772.1(BRCA2_v001):c.68-7_316+7del is such a variant, since - positions 68 through 316 are exactly one exon and (316-68+1)/3 = 83. - """ - check_variant('NG_012772.1(BRCA2_v001):c.68-7_316+7del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert self.output.getOutput('removedSpliceSites') == [2] - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - # Todo: assert that protein products indeed have only this difference. - - @fix(cache('NG_012772.1')) - def test_del_exons(self): - """ - Deletion of two entire exons should be possible. - """ - check_variant('NG_012772.1(BRCA2_v001):c.632-5_793+7del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert self.output.getOutput('removedSpliceSites') == [4] - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - - @fix(cache('NG_012772.1')) - def test_del_intron(self): - """ - Deletion of an entire intron should be possible (fusion of remaining - exonic parts). - """ - check_variant('NG_012772.1(BRCA2_v001):c.622_674del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert self.output.getOutput('removedSpliceSites') == [2] - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - - @fix(cache('NG_012772.1')) - def test_del_intron_exact(self): - """ - Deletion of exactly an intron should be possible (fusion of flanking - exons). - """ - check_variant('NG_012772.1(BRCA2_v001):c.681+1_682-1del', self.output) - assert self.output.getMessagesWithErrorCode('WOVERSPLICE') == [] - assert self.output.getOutput('removedSpliceSites') == [2] - # Note: The protein prediction is done, but 'newProtein' is not set - # because we have no change. So to check if the prediction is done, we - # check if 'oldProtein' is set and to check if the prediction is - # correct, we check if 'newProtein' is not set. - assert self.output.getOutput('oldProtein') - assert not self.output.getOutput('newProtein') - - @fix(cache('NG_012772.1')) - def test_del_intron_in_frame(self): - """ - Deletion of an entire intron should be possible (fusion of remaining - exonic parts). - """ - check_variant('NG_012772.1(BRCA2_v001):c.622_672del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert self.output.getOutput('removedSpliceSites') == [2] - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - # Todo: assert that protein products indeed have only this difference. - - @fix(cache('NG_012772.1')) - def test_del_exon_unknown_offsets(self): - """ - Deletion of an entire exon with unknown offsets should be possible. - """ - check_variant('NG_012772.1(BRCA2_v001):c.632-?_681+?del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - # Genomic positions should be centered in flanking introns and unsure. - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.(17550_19725)del' - assert 'NG_012772.1(BRCA2_v001):c.632-?_681+?del' \ - in self.output.getOutput('descriptions') - assert 'NG_012772.1(BRCA2_i001):p.(Val211Glufs*10)' \ - in self.output.getOutput('protDescriptions') - # Todo: .c notation should still be c.632-?_681+?del, but what about - # other transcripts? - - @fix(cache('NG_012772.1')) - def test_del_exon_unknown_offsets_in_frame(self): - """ - Deletion of an entire exon with unknown offsets and length a - triplicate should give a proteine product with just this deletion - (and possibly substitutions directly before and after). - - NG_012772.1(BRCA2_v001):c.68-?_316+?del is such a variant, since - positions 68 through 316 are exactly one exon and (316-68+1)/3 = 83. - """ - check_variant('NG_012772.1(BRCA2_v001):c.68-?_316+?del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - # Genomic positions should be centered in flanking introns and unsure. - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.(7324_11720)del' - assert 'NG_012772.1(BRCA2_v001):c.68-?_316+?del' \ - in self.output.getOutput('descriptions') - # Todo: .c notation should still be c.632-?_681+?del, but what about - # other transcripts? - - @fix(cache('NG_012772.1')) - def test_del_exon_unknown_offsets_composed(self): - """ - Deletion of an entire exon with unknown offsets and another composed - variant with exact positioning should be possible. - """ - check_variant('NG_012772.1(BRCA2_v001):c.[632-?_681+?del;681+4del]', - self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - # Genomic positions should be centered in flanking introns and unsure. - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.[(17550_19725)del;19017del]' - assert 'NG_012772.1(BRCA2_v001):c.[632-?_681+?del;681+4del]' \ - in self.output.getOutput('descriptions') - # Todo: .c notation should still be c.632-?_681+?del, but what about - # other transcripts? - - @fix(cache('AL449423.14')) - def test_del_exon_unknown_offsets_reverse(self): - """ - Deletion of an entire exon with unknown offsets should be possible, - also on the reverse strand. - """ - check_variant('AL449423.14(CDKN2A_v001):c.151-?_457+?del', - self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - # Genomic positions should be centered in flanking introns and unsure. - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.(60314_63683)del' - assert 'AL449423.14(CDKN2A_v001):c.151-?_457+?del' \ - in self.output.getOutput('descriptions') - # Todo: .c notation should still be c.632-?_681+?del, but what about - # other transcripts? - - @fix(cache('NM_000143.3')) - def test_del_exon_transcript_reference(self): - """ - Deletion of entire exon on a transcript reference should remove the - expected splice sites (only that of the deleted exon), and not those - of the flanking exons (as would happen using the mechanism for genomic - references). - """ - #check_variant('NM_018723.3:c.758_890del', self.output) - check_variant('NM_000143.3:c.739_904del', self.output) - assert len(self.output.getMessagesWithErrorCode('WOVERSPLICE')) == 0 - assert self.output.getOutput('removedSpliceSites') == [2] - # Todo: For now, the following is how to check if protein - # prediction is done. - assert self.output.getOutput('newProtein') - - @fix(cache('NG_008939.1')) - def test_ins_seq(self): - """ - Insertion of a sequence. - """ - check_variant('NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - - @fix(cache('NG_012337.1')) - def test_ins_seq_reverse(self): - """ - Insertion of a sequence on reverse strand. - """ - check_variant('NG_012337.1(TIMM8B_v001):c.12_13insGATC', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012337.1:g.4911_4912insATCG' - assert 'NG_012337.1(TIMM8B_v001):c.12_13insGATC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_range(self): - """ - Insertion of a range. - """ - check_variant('NG_008939.1:g.5207_5208ins4300_4320', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_ins_range_inv(self): - """ - Insertion of an inverse range. - """ - check_variant('NG_008939.1:g.5207_5208ins4300_4320inv', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGCCAGATAATGAGCACAGGAC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGCCAGATAATGAGCACAGGAC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_ins_seq_list(self): - """ - Insertion of a sequence as a list. - """ - check_variant('NG_008939.1:g.5207_5208ins[GTCCTGTGCTCATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_012337.1')) - def test_ins_seq_list_reverse(self): - """ - Insertion of a sequence as a list on reverse strand. - """ - check_variant('NG_012337.1(TIMM8B_v001):c.12_13ins[GATC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012337.1:g.4911_4912insATCG' - assert 'NG_012337.1(TIMM8B_v001):c.12_13insGATC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_range_list(self): - """ - Insertion of a range as a list. - """ - check_variant('NG_008939.1:g.5207_5208ins[4300_4320]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_ins_range_inv_list(self): - """ - Insertion of an inverse range as a list. - """ - check_variant('NG_008939.1:g.5207_5208ins[4300_4320inv]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGCCAGATAATGAGCACAGGAC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGCCAGATAATGAGCACAGGAC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_ins_seq_seq(self): - """ - Insertion of two sequences. - """ - check_variant('NG_008939.1:g.5207_5208ins[GTCCTGTGCTC;ATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_012337.1')) - def test_ins_seq_seq_reverse(self): - """ - Insertion of two sequences on reverse strand. - """ - check_variant('NG_012337.1(TIMM8B_v001):c.12_13ins[TTT;GATC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012337.1:g.4911_4912insATCAAAG' - assert 'NG_012337.1(TIMM8B_v001):c.12_13insTTTGATC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_range_range(self): - """ - Insertion of two ranges. - """ - check_variant('NG_008939.1:g.5207_5208ins[4300_4309;4310_4320]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_ins_range_range_inv(self): - """ - Insertion of a range and an inverse range. - """ - check_variant('NG_008939.1:g.5207_5208ins[4300_4309;4310_4320inv]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTGCCAGATAATG' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTGCCAGATAATG' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_ins_seq_range(self): - """ - Insertion of a sequence and a range. - """ - check_variant('NG_008939.1:g.5207_5208ins[GTCCTGTGCT;4310_4320]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_seq_range_inv(self): - """ - Insertion of a sequence and an inverse range. - """ - check_variant('NG_008939.1:g.5207_5208ins[GTCCTGTGCT;4310_4320inv]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTGCCAGATAATG' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTGCCAGATAATG' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_range_seq(self): - """ - Insertion of a range and a sequence. - """ - check_variant('NG_008939.1:g.5207_5208ins[4300_4309;CATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_range_inv_seq(self): - """ - Insertion of an inverse range and a sequence. - """ - check_variant('NG_008939.1:g.5207_5208ins[4300_4309inv;CATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insAGCACAGGACCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insAGCACAGGACCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_seq_coding(self): - """ - Insertion of a sequence (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_seq_list_coding(self): - """ - Insertion of a sequence as a list (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_157ins[GTCCTGTGCTCATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_seq_seq_coding(self): - """ - Insertion of two sequences (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_157ins[GTCCTGTGCTC;ATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_ins_range_coding(self): - """ - Insertion of a range (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_157ins180_188', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('NG_008939.1')) - def test_ins_range_inv_coding(self): - """ - Insertion of an inverse range (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_157ins180_188inv', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('NG_008939.1')) - def test_ins_range_list_coding(self): - """ - Insertion of a range as a list (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_157ins[180_188]', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('NG_008939.1')) - def test_ins_range_inv_list_coding(self): - """ - Insertion of an inverse range as a list (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_157ins[180_188inv]', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('NG_008939.1')) - def test_delins_seq(self): - """ - Insertion-deletion of a sequence. - """ - check_variant('NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_range(self): - """ - Insertion-deletion of a range. - """ - check_variant('NG_008939.1:g.5207_5212delins4300_4320', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_delins_range_inv(self): - """ - Insertion-deletion of an inverse range. - """ - check_variant('NG_008939.1:g.5207_5212delins4300_4320inv', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGCCAGATAATGAGCACAGGAC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGCCAGATAATGAGCACAGGAC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_delins_seq_list(self): - """ - Insertion-deletion of a sequence as a list. - """ - check_variant('NG_008939.1:g.5207_5212delins[GTCCTGTGCTCATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_range_list(self): - """ - Insertion-deletion of a range as a list. - """ - check_variant('NG_008939.1:g.5207_5212delins[4300_4320]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_delins_range_inv_list(self): - """ - Insertion-deletion of an inverse range as a list. - """ - check_variant('NG_008939.1:g.5207_5212delins[4300_4320inv]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGCCAGATAATGAGCACAGGAC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGCCAGATAATGAGCACAGGAC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_delins_seq_seq(self): - """ - Insertion-deletion of two sequences. - """ - check_variant('NG_008939.1:g.5207_5212delins[GTCCTGTGCT;CATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_range_range(self): - """ - Insertion-deletion of two ranges. - """ - check_variant('NG_008939.1:g.5207_5212delins[4300_4309;4310_4320]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_delins_range_inv_range(self): - """ - Insertion-deletion of an inverse range and a range. - - Note that the delins is also shortened by one position here. - """ - check_variant('NG_008939.1:g.5207_5212delins[4300_4309inv;4310_4320]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5208_5212delinsGCACAGGACCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.157_161delinsGCACAGGACCATTATCTGGC' \ - in self.output.getOutput('descriptions') - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 - - @fix(cache('NG_008939.1')) - def test_delins_seq_range(self): - """ - Insertion-deletion of a sequence and a range. - """ - check_variant('NG_008939.1:g.5207_5212delins[GTCCTGTGCT;4310_4320]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_seq_range_inv(self): - """ - Insertion-deletion of a sequence and an inverse range. - - Note that the delins is also shortened by one position here. - """ - check_variant('NG_008939.1:g.5207_5212delins[GTCCTGTGCT;4310_4320inv]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5211delinsGTCCTGTGCTGCCAGATAAT' - assert 'NG_008939.1(PCCB_v001):c.156_160delinsGTCCTGTGCTGCCAGATAAT' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_range_seq(self): - """ - Insertion-deletion of a range and a sequence. - """ - check_variant('NG_008939.1:g.5207_5212delins[4300_4309;CATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_range_inv_seq(self): - """ - Insertion-deletion of an inverse range and a sequence. - - Note that the delins is also shortened by one position here. - """ - check_variant('NG_008939.1:g.5207_5212delins[4300_4309inv;CATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5208_5212delinsGCACAGGACCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.157_161delinsGCACAGGACCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_seq_coding(self): - """ - Insertion-deletion of a sequence (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_seq_list_coding(self): - """ - Insertion-deletion of a sequence as a list (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_161delins[GTCCTGTGCTCATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_seq_seq_coding(self): - """ - Insertion-deletion of two sequences (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_161delins[GTCCTGTGCT;CATTATCTGGC]', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' - assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_delins_range_coding(self): - """ - Insertion-deletion of a range (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_161delins180_188', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('NG_008939.1')) - def test_delins_range_inv_coding(self): - """ - Insertion-deletion of an inverse range (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_161delins180_188inv', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('NG_008939.1')) - def test_delins_range_list_coding(self): - """ - Insertion-deletion of a range as a list (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_161delins[180_188]', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('NG_008939.1')) - def test_delins_range_inv_list_coding(self): - """ - Insertion-deletion of an inverse range as a list (coding). - """ - check_variant('NG_008939.1(PCCB_v001):c.156_161delins[180_188inv]', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - def test_no_reference(self): - """ - Variant description without a reference. - """ - check_variant('g.244355733del', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOREF')) == 1 - - @fix(cache('NM_003002.2'), hg19, hg19_transcript_mappings) - def test_chromosomal_positions(self): - """ - Variants on transcripts in c. notation should have chromosomal positions - defined. - """ - check_variant('NM_003002.2:c.274G>T', self.output) - assert self.output.getIndexedOutput('rawVariantsChromosomal', 0) == ('chr11', '+', [('274G>T', (111959695, 111959695))]) - - @fix(cache('NM_002001.2')) - def test_ex_notation(self): - """ - Variant description using EX notation should not crash but deletion of - one exon should delete two splice sites. - """ - check_variant('NM_002001.2:c.EX1del', self.output) - assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) == 1 - - @fix(cache('LRG_1')) - def test_lrg_reference(self): - """ - We should be able to use LRG reference sequence without error. - """ - check_variant('LRG_1t1:c.266G>T', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicDescription', 0) == 'LRG_1:g.6855G>T' - - @fix(cache('NM_002001.2')) - def test_gi_reference_plain(self): - """ - Test reference sequence notation with GI number. - """ - check_variant('31317229:c.6del', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicDescription', 0) == '31317229:n.105del' - assert '31317229(FCER1A_v001):c.6del' \ - in self.output.getOutput('descriptions') - - @fix(cache('NM_002001.2')) - def test_gi_reference_prefix(self): - """ - Test reference sequence notation with GI number and prefix. - """ - check_variant('GI31317229:c.6del', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicDescription', 0) == '31317229:n.105del' - assert '31317229(FCER1A_v001):c.6del' \ - in self.output.getOutput('descriptions') - - @fix(cache('NM_002001.2')) - def test_gi_reference_prefix_colon(self): - """ - Test reference sequence notation with GI number and prefix with colon. - """ - check_variant('GI:31317229:c.6del', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicDescription', 0) == '31317229:n.105del' - assert '31317229(FCER1A_v001):c.6del' \ - in self.output.getOutput('descriptions') - - @fix(cache('NM_002001.2')) - def test_nop_nm(self): - """ - Variant on NM without effect should be described as '='. - """ - check_variant('NM_002001.2:c.1_3delinsATG', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NM_002001.2:n.=' - assert 'NM_002001.2(FCER1A_v001):c.=' \ - in self.output.getOutput('descriptions') - - @fix(cache('DMD')) - def test_nop_ud(self): - """ - Variant on UD without effect should be described as '='. - """ - ud = REFERENCES['DMD']['accession'] - check_variant(ud + ':g.5T>T', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000023.11:g.=' - assert self.output.getIndexedOutput('genomicDescription', 0) == ud + ':g.=' - assert ud + '(DMD_v001):c.=' \ - in self.output.getOutput('descriptions') - - @fix(cache('DPYD')) - def test_ud_reverse_sequence(self): - """ - Variant on UD from reverse strand should have reverse complement - sequence. - """ - ud = REFERENCES['DPYD']['accession'] - check_variant(ud + '(DPYD_v1):c.85C>T', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.98348885G>A' - assert self.output.getIndexedOutput('genomicDescription', 0) == ud + ':g.42731C>T' - assert ud + '(DPYD_v001):c.85C>T' \ - in self.output.getOutput('descriptions') - - @fix(cache('MARK1')) - def test_ud_forward_sequence(self): - """ - Variant on UD from forward strand should have forward sequence. - """ - ud = REFERENCES['MARK1']['accession'] - check_variant(ud + '(MARK1_v001):c.400T>C', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.220773181T>C' - assert self.output.getIndexedOutput('genomicDescription', 0) == ud + ':g.76614T>C' - assert ud + '(MARK1_v001):c.400T>C' \ - in self.output.getOutput('descriptions') - - @fix(cache('chr9_reverse')) - def test_ud_reverse_range(self): - """ - Variant on UD from reverse strand should have reversed range - positions. - """ - # This is just some slice on from the reverse strand of hg19 chr9. - ud = REFERENCES['chr9_reverse']['accession'] - check_variant(ud + ':g.10624_78132del', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000009.11:g.32928508_32996016del' - assert self.output.getIndexedOutput('genomicDescription', 0) == ud + ':g.10624_78132del' - - @fix(cache('MARK1')) - def test_ud_forward_range(self): - """ - Variant on UD from forward strand should have forward range positions. - """ - ud = REFERENCES['MARK1']['accession'] - check_variant(ud + '(MARK1_v001):c.400_415del', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.220773181_220773196del' - assert self.output.getIndexedOutput('genomicDescription', 0) == ud + ':g.76614_76629del' - - @fix(cache('chr9_reverse')) - def test_ud_reverse_del_length(self): - """ - Variant on UD from reverse strand should have reversed range - positions, but not reverse complement of first argument (it is not a - sequence, but a length). - """ - # This is just some slice on from the reverse strand of hg19 chr9. - ud = REFERENCES['chr9_reverse']['accession'] - check_variant(ud + ':g.10624_78132del67509', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000009.11:g.32928508_32996016del' - assert self.output.getIndexedOutput('genomicDescription', 0) == ud + ':g.10624_78132del' - - @fix(cache('DPYD')) - def test_ud_reverse_roll(self): - """ - Variant on UD from reverse strand should roll the oposite direction. - - The situation is as follows: - - G A A A T T - c. 102 103 104 105 106 107 - g. 748 749 750 751 752 753 - chr g. 868 867 866 865 864 863 - """ - ud = REFERENCES['DPYD']['accession'] - check_variant(ud + '(DPYD_v001):c.104del', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.98348867del' - assert self.output.getIndexedOutput('genomicDescription', 0) == ud + ':g.42751del' - assert ud + '(DPYD_v001):c.105del' \ - in self.output.getOutput('descriptions') - - @fix(cache('MARK1')) - def test_ud_forward_roll(self): - """ - Variant on UD from forward strand should roll the same. - - The situation is as follows: - - A T T T A - c. 398 399 400 401 402 - g. 612 613 614 615 616 - chr g. 179 180 181 182 183 - """ - ud = REFERENCES['MARK1']['accession'] - check_variant(ud + '(MARK1_v001):c.400del', self.output) - error_count, _, _ = self.output.Summary() - assert error_count == 0 - assert self.output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.220773182del' - assert self.output.getIndexedOutput('genomicDescription', 0) == ud + ':g.76615del' - assert ud + '(MARK1_v001):c.401del' \ - in self.output.getOutput('descriptions') - - @fix(cache('AL449423.14')) - def test_deletion_with_sequence_forward_genomic(self): - """ - Specify the deleted sequence in a deletion. - """ - check_variant('AL449423.14:g.65471_65472delTC', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.65471_65472del' - assert 'AL449423.14(CDKN2A_v001):c.98_99del' \ - in self.output.getOutput('descriptions') - - @fix(cache('AL449423.14')) - def test_deletion_with_length_forward_genomic(self): - """ - Specify the deleted sequence length in a deletion. - """ - check_variant('AL449423.14:g.65471_65472del2', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.65471_65472del' - assert 'AL449423.14(CDKN2A_v001):c.98_99del' \ - in self.output.getOutput('descriptions') - - @fix(cache('AL449423.14')) - def test_deletion_with_sequence_reverse_coding(self): - """ - Specify the deleted sequence in a deletion on the reverse strand. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_163delTGG', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61937_61939del' - assert 'AL449423.14(CDKN2A_v001):c.161_163del' \ - in self.output.getOutput('descriptions') - - @fix(cache('AL449423.14')) - def test_deletion_with_length_reverse_coding(self): - """ - Specify the deleted sequence length in a deletion on the reverse strand. - """ - check_variant('AL449423.14(CDKN2A_v001):c.161_163del3', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61937_61939del' - assert 'AL449423.14(CDKN2A_v001):c.161_163del' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_deletion_with_sequence_reverse_ng_coding(self): - """ - Specify the deleted sequence in a deletion on the reverse strand - using a genomic reference. - """ - check_variant('NG_008939.1:c.155_157delAAC', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5206_5208del' - assert 'NG_008939.1(PCCB_v001):c.155_157del' \ - in self.output.getOutput('descriptions') - - @fix(cache('NG_008939.1')) - def test_deletion_with_length_reverse_ng_coding(self): - """ - Specify the deleted sequence length in a deletion on the reverse strand - using a genomic reference. - """ - check_variant('NG_008939.1:c.155_157del3', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5206_5208del' - assert 'NG_008939.1(PCCB_v001):c.155_157del' \ - in self.output.getOutput('descriptions') - - @fix(cache('AB026906.1')) - def test_inversion(self): - """ - Inversion variant. - """ - check_variant('AB026906.1:c.274_275inv', self.output) - assert self.output.getIndexedOutput('genomicDescription', 0) == 'AB026906.1:g.7872_7873inv' - assert 'AB026906.1(SDHD_v001):c.274_275inv' \ - in self.output.getOutput('descriptions') - - @fix(cache('NM_000193.2')) - def test_delins_with_length(self): - """ - Delins with explicit length of deleted sequence (bug #108). - """ - check_variant('NM_000193.2:c.108_109del2insG', self.output) - assert 'NM_000193.2(SHH_i001):p.(Lys38Serfs*2)' in self.output.getOutput('protDescriptions') - - @fix(cache('NG_009105.1')) - def test_protein_level_description(self): - """ - Currently protein level descriptions are not implemented. - """ - check_variant('NG_009105.1(OPN1LW):p.=', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('NP_064445.1')) - def test_protein_reference(self): - """ - Currently protein references are not implemented. - """ - check_variant('NP_064445.1:p.=', self.output) - assert len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 - - @fix(cache('AF230870.1')) - def test_wnomrna_other(self): - """ - Warning for no mRNA field on other than currently selected transcript - should give WNOMRNA_OTHER warning. - """ - # Contains mtmC2 and mtmB2, both without mRNA - check_variant('AF230870.1(mtmC2_v001):c.13del', self.output) - wnomrna_other = self.output.getMessagesWithErrorCode('WNOMRNA_OTHER') - assert len(wnomrna_other) == 1 - - @fix(cache('AF230870.1')) - def test_wnomrna(self): - """ - Warning for no mRNA field on currently selected transcript should give - WNOMRNA warning. - """ - # Contains mtmC2 and mtmB2, both without mRNA - check_variant('AF230870.1(mtmC2_v001):c.13del', self.output) - wnomrna = self.output.getMessagesWithErrorCode('WNOMRNA') - wnomrna_other = self.output.getMessagesWithErrorCode('WNOMRNA_OTHER') - assert len(wnomrna) == 1 - assert len(wnomrna_other) == 1 - - @fix(cache('L41870.1')) - def test_mrna_ref_adjacent_exons_warn(self): - """ - Warning for mRNA reference where exons are not adjacent. - - In L41870.1 exon 15 ends on 1558 and 16 starts on 1636. - """ - check_variant('L41870.1:c.1del', self.output) - w_exon_annotation = self.output.getMessagesWithErrorCode('WEXON_ANNOTATION') - assert len(w_exon_annotation) == 1 - - @fix(cache('NM_003002.2')) - def test_mrna_ref_adjacent_exons_no_warn(self): - """ - No warning for mRNA reference where exons are adjacent. - """ - check_variant('NM_003002.2:c.1del', self.output) - w_exon_annotation = self.output.getMessagesWithErrorCode('WEXON_ANNOTATION') - assert len(w_exon_annotation) == 0 - - @fix(cache('NM_001199.3')) - def test_fs_no_stop(self): - """ - Frame shift yielding no stop codon should be described with - uncertainty of the stop codon. - - http://www.hgvs.org/mutnomen/FAQ.html#nostop - """ - check_variant('NM_001199.3(BMP1):c.2188dup', self.output) - assert 'NM_001199.3(BMP1_i001):p.(Gln730Profs*?)' in self.output.getOutput('protDescriptions') - - @fix(cache('NM_000193.2')) - def test_ext_no_stop(self): - """ - Extension yielding no stop codon should be described with - uncertainty of the stop codon. - - http://www.hgvs.org/mutnomen/FAQ.html#nostop - """ - check_variant('NM_000193.2:c.1388G>C', self.output) - assert 'NM_000193.2(SHH_i001):p.(*463Serext*?)' in self.output.getOutput('protDescriptions') - - @fix(cache('NM_000193.2')) - def test_fs_ext_no_stop(self): - """ - Extension yielding no stop codon should be described with - uncertainty of the stop codon. - - http://www.hgvs.org/mutnomen/FAQ.html#nostop - """ - check_variant('NM_000193.2:c.1388_1389insC', self.output) - assert 'NM_000193.2(SHH_i001):p.(*463Cysext*?)' in self.output.getOutput('protDescriptions') - - @fix(cache('AB026906.1')) - def test_synonymous_p_is(self): - """ - Synonymous mutation should yield a p.(=) description. - """ - check_variant('AB026906.1:c.276C>T', self.output) - assert 'AB026906.1(SDHD_i001):p.(=)' in self.output.getOutput('protDescriptions') - assert not self.output.getOutput('newProteinFancy') - - @fix(cache('NM_024426.4')) - def test_synonymous_p_is_alt_start(self): - """ - Synonymous mutation should yield a p.(=) description, also with an - alternative start codon. - """ - check_variant('NM_024426.4:c.1107A>G', self.output) - assert 'NM_024426.4(WT1_i001):p.(=)' in self.output.getOutput('protDescriptions') - assert not self.output.getOutput('newProteinFancy') - waltstart = self.output.getMessagesWithErrorCode('WALTSTART') - assert len(waltstart) == 1 - assert self.output.getOutput('oldProtein')[0].startswith('M') - assert not self.output.getOutput('newProtein') - assert not self.output.getOutput('altStart') - assert not self.output.getOutput('altProteinFancy') - - @fix(cache('AB026906.1')) - def test_start_codon(self): - """ - Mutation of start codon should yield a p.? description. - """ - check_variant('AB026906.1:c.1A>G', self.output) - assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions') - wstart = self.output.getMessagesWithErrorCode('WSTART') - assert len(wstart) == 1 - assert self.output.getOutput('newProtein')[0] == '?' - waltstart = self.output.getMessagesWithErrorCode('WALTSTART') - assert len(waltstart) == 0 - assert not self.output.getOutput('altStart') - - @fix(cache('NM_024426.4')) - def test_start_codon_alt_start(self): - """ - Mutation of start codon should yield a p.? description, also with an - alternative start codon. - """ - check_variant('NM_024426.4:c.1C>G', self.output) - assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions') - west = self.output.getMessagesWithErrorCode('WSTART') - assert len(west) == 1 - assert self.output.getOutput('newProtein')[0] == '?' - waltstart = self.output.getMessagesWithErrorCode('WALTSTART') - assert len(waltstart) == 1 - assert not self.output.getOutput('altStart') - - @fix(cache('AB026906.1')) - def test_start_codon_yield_start_p_is(self): - """ - Silent mutation creating new start codon should yield a p.? - description. The visualisation should also render the case for the new - start codon. - """ - check_variant('AB026906.1:c.1A>T', self.output) # yields TTG start codon - assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions') - wstart = self.output.getMessagesWithErrorCode('WSTART') - assert len(wstart) == 1 - assert self.output.getOutput('newProtein')[0] == '?' - waltstart = self.output.getMessagesWithErrorCode('WALTSTART') - assert len(waltstart) == 0 - assert self.output.getOutput('oldProtein')[0].startswith('M') - assert 'TTG' in self.output.getOutput('altStart') - assert not self.output.getOutput('altProteinFancy') - - @fix(cache('NM_024426.4')) - def test_start_codon_alt_start_yield_start_p_is(self): - """ - Silent mutation creating new start codon should yield a p.? - description, also with an alternative start codon. The visualisation - should also render the case for the new start codon. - """ - check_variant('NM_024426.4:c.1C>A', self.output) # yields ATG start codon - assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions') - west = self.output.getMessagesWithErrorCode('WSTART') - assert len(west) == 1 - assert self.output.getOutput('newProtein')[0] == '?' - waltstart = self.output.getMessagesWithErrorCode('WALTSTART') - assert len(waltstart) == 1 - assert self.output.getOutput('oldProtein')[0].startswith('M') - assert 'ATG' in self.output.getOutput('altStart') - assert not self.output.getOutput('altProteinFancy') - - @fix(cache('AB026906.1')) - def test_start_codon_yield_start(self): - """ - Mutation creating new start codon should yield a p.? description. The - visualisation should also render the case for the new start codon. - """ - check_variant('AB026906.1:c.1_4delinsTTGA', self.output) # yields TTG start codon - assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions') - wstart = self.output.getMessagesWithErrorCode('WSTART') - assert len(wstart) == 1 - assert self.output.getOutput('newProtein')[0] == '?' - waltstart = self.output.getMessagesWithErrorCode('WALTSTART') - assert len(waltstart) == 0 - assert 'TTG' in self.output.getOutput('altStart') - assert self.output.getOutput('altProtein')[0].startswith('M') - - @fix(cache('NM_024426.4')) - def test_start_codon_alt_start_yield_start(self): - """ - Mutation creating new start codon should yield a p.? description, also - with an alternative start codon. The visualisation should also render - the new start codon. - """ - check_variant('NM_024426.4:c.1_4delinsATGA', self.output) # yields ATG start codon - assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions') - west = self.output.getMessagesWithErrorCode('WSTART') - assert len(west) == 1 - assert self.output.getOutput('newProtein')[0] == '?' - waltstart = self.output.getMessagesWithErrorCode('WALTSTART') - assert len(waltstart) == 1 - assert self.output.getOutput('oldProtein')[0].startswith('M') - assert 'ATG' in self.output.getOutput('altStart') - assert self.output.getOutput('altProtein')[0].startswith('M') +pytestmark = pytest.mark.usefixtures('references') + + +@pytest.fixture +def checker(output): + def check(description): + check_variant(description, output) + return check + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_in_frame(output, checker): + """ + Simple in-frame deletion should give a simple description on protein + level. + """ + checker('AL449423.14(CDKN2A_v001):c.161_163del') + assert (output.getIndexedOutput('genomicDescription', 0) == + 'AL449423.14:g.61937_61939del') + assert 'AL449423.14(CDKN2A_v001):c.161_163del' \ + in output.getOutput('descriptions') + assert 'AL449423.14(CDKN2A_i001):p.(Met54_Gly55delinsSer)' \ + in output.getOutput('protDescriptions') + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_insertion_in_frame(output, checker): + """ + Simple in-frame insertion should give a simple description on protein + level. + """ + checker('AL449423.14(CDKN2A_v001):c.161_162insATC') + assert (output.getIndexedOutput('genomicDescription', 0) == + 'AL449423.14:g.61938_61939insGAT') + assert 'AL449423.14(CDKN2A_v001):c.161_162insATC' \ + in output.getOutput('descriptions') + assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsIleSer)' \ + in output.getOutput('protDescriptions') + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_insertion_list_in_frame(output, checker): + """ + Simple in-frame insertion of a list should give a simple description + on protein level. + """ + checker('AL449423.14(CDKN2A_v001):c.161_162ins[ATC]') + assert (output.getIndexedOutput('genomicDescription', 0) == + 'AL449423.14:g.61938_61939insGAT') + assert 'AL449423.14(CDKN2A_v001):c.161_162insATC' \ + in output.getOutput('descriptions') + assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsIleSer)' \ + in output.getOutput('protDescriptions') + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_insertion_in_frame(output, checker): + """ + Simple in-frame deletion/insertion should give a simple description on + protein level. + """ + check_variant('AL449423.14(CDKN2A_v001):c.161_162delinsATCCC', + output) + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61938_61939delinsGGGAT' + assert 'AL449423.14(CDKN2A_v001):c.161_162delinsATCCC' \ + in output.getOutput('descriptions') + assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ + in output.getOutput('protDescriptions') + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_insertion_list_in_frame(output, checker): + """ + Simple in-frame deletion-insertion of a list should give a simple + description on protein level. + """ + check_variant('AL449423.14(CDKN2A_v001):c.161_162delins[ATCCC]', + output) + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61938_61939delinsGGGAT' + assert 'AL449423.14(CDKN2A_v001):c.161_162delinsATCCC' \ + in output.getOutput('descriptions') + assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ + in output.getOutput('protDescriptions') + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_insertion_in_frame_complete(output, checker): + """ + Simple in-frame deletion/insertion should give a simple description on + protein level, also with the optional deleted sequence argument. + """ + check_variant('AL449423.14(CDKN2A_v001):c.161_162delTGinsATCCC', + output) + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61938_61939delinsGGGAT' + assert 'AL449423.14(CDKN2A_v001):c.161_162delinsATCCC' \ + in output.getOutput('descriptions') + assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ + in output.getOutput('protDescriptions') + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_insertion_list_in_frame_complete(output, checker): + """ + Simple in-frame deletion-insertion of a list should give a simple + description on protein level, also with the optional deleted sequence + argument. + """ + check_variant('AL449423.14(CDKN2A_v001):c.161_162delTGins[ATCCC]', + output) + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61938_61939delinsGGGAT' + assert 'AL449423.14(CDKN2A_v001):c.161_162delinsATCCC' \ + in output.getOutput('descriptions') + assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ + in output.getOutput('protDescriptions') + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_est_warning_nm_est(output, checker): + """ + Warning for EST positioning on NM reference. + """ + checker('NM_003002.2:274del') + west = output.getMessagesWithErrorCode('WEST') + assert len(west) == 1 + + +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_no_est_warning_nm_c(output, checker): + """ + No EST warning for c. positioning on NM reference. + """ + checker('NM_003002.2:c.274del') + west = output.getMessagesWithErrorCode('WEST') + assert len(west) == 0 + + +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_no_est_warning_nm_n(output, checker): + """ + No EST warning for n. positioning on NM reference. + """ + checker('NM_003002.2:n.274del') + west = output.getMessagesWithErrorCode('WEST') + assert len(west) == 0 + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_est_warning_ng_est(output, checker): + """ + Warning for EST positioning on NG reference. + """ + checker('NG_012772.1:128del') + west = output.getMessagesWithErrorCode('WEST') + assert len(west) == 1 + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_no_est_warning_ng_g(output, checker): + """ + No EST warning for g. positioning on NG reference. + """ + checker('NG_012772.1:g.128del') + west = output.getMessagesWithErrorCode('WEST') + assert len(west) == 0 + + +@pytest.mark.parametrize('references', [['AA010203.1']], indirect=True) +def test_no_est_warning_est_est(output, checker): + """ + No warning for EST positioning on EST reference. + """ + checker('AA010203.1:54_55insG') + west = output.getMessagesWithErrorCode('WEST') + assert len(west) == 0 + + +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_roll(output, checker): + """ + Just a variant where we should roll. + """ + checker('NM_003002.2:c.273del') + wroll = output.getMessagesWithErrorCode('WROLLFORWARD') + assert len(wroll) > 0 + + +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_no_roll(output, checker): + """ + Just a variant where we cannot roll. + """ + checker('NM_003002.2:c.274del') + wroll = output.getMessagesWithErrorCode('WROLLFORWARD') + assert len(wroll) == 0 + + +@pytest.mark.parametrize('references', [['NM_000088.3']], indirect=True) +def test_no_roll_splice(output, checker): + """ + Here we can roll but should not, because it is over a splice site. + """ + checker('NM_000088.3:g.459del') + wrollback = output.getMessagesWithErrorCode('IROLLBACK') + assert len(wrollback) > 0 + wroll = output.getMessagesWithErrorCode('WROLLFORWARD') + assert len(wroll) == 0 + + +@pytest.mark.parametrize('references', [['NM_000088.3']], indirect=True) +def test_partial_roll_splice(output, checker): + """ + Here we can roll two positions, but should roll only one because + otherwise it is over a splice site. + """ + checker('NM_000088.3:g.494del') + wrollback = output.getMessagesWithErrorCode('IROLLBACK') + assert len(wrollback) > 0 + wroll = output.getMessagesWithErrorCode('WROLLFORWARD') + assert len(wroll) > 0 + + +@pytest.mark.parametrize('references', [['NM_000088.3']], indirect=True) +def test_roll_after_splice(output, checker): + """ + Here we can roll and should, we stay in the same exon. + """ + checker('NM_000088.3:g.460del') + wroll = output.getMessagesWithErrorCode('WROLLFORWARD') + assert len(wroll) > 0 + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_roll_both_ins(output, checker): + """ + Insertion that rolls should not use the same inserted sequence in + descriptions on forward and reverse strands. + + Here we have the following situation on the forward strand: + + 65470 (genomic) + | + CGGTGCGTTGGGCAGCGCCCCCGCCTCCAGCAGCGCCCGCACCTCCTCTA + + Now, an insertion of TAC after 65470 should be rolled to an insertion + of ACT after 65471: + + CGGTGCGTTGGGCAGCGCCCCCGCC --- TCCAGCAGCGCCCGCACCTCCTCTA + CGGTGCGTTGGGCAGCGCCCCCGCC TAC TCCAGCAGCGCCCGCACCTCCTCTA => + + CGGTGCGTTGGGCAGCGCCCCCGCCT --- CCAGCAGCGCCCGCACCTCCTCTA + CGGTGCGTTGGGCAGCGCCCCCGCCT ACT CCAGCAGCGCCCGCACCTCCTCTA + + However, in CDKN2A_v001 (on the reverse strand), this insertion should + roll the other direction and the inserted sequence should be the reverse + complement of CTA, which is TAG, and not that of ACT, which is AGT. + + The next test (test_roll_reverse_ins) tests the situation for an input + of AL449423.14:g.65471_65472insACT, where only the reverse roll should + be done. + """ + checker('AL449423.14:g.65470_65471insTAC') + assert 'AL449423.14(CDKN2A_v001):c.99_100insTAG' in output.getOutput('descriptions') + assert 'AL449423.14:g.65471_65472insACT' == output.getIndexedOutput('genomicDescription', 0, '') + assert len(output.getMessagesWithErrorCode('WROLLFORWARD')) == 1 + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_roll_reverse_ins(output, checker): + """ + Insertion that rolls on the reverse strand should not use the same + inserted sequence in descriptions on forward and reverse strands. + """ + checker('AL449423.14:g.65471_65472insACT') + assert 'AL449423.14(CDKN2A_v001):c.99_100insTAG' in output.getOutput('descriptions') + assert 'AL449423.14:g.65471_65472insACT' == output.getIndexedOutput('genomicDescription', 0, '') + assert len(output.getMessagesWithErrorCode('WROLLFORWARD')) == 0 + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_roll_message_forward(output, checker): + """ + Roll warning message should only be shown for currently selected + strand (forward). + """ + checker('AL449423.14:g.65470_65471insTAC') + assert len(output.getMessagesWithErrorCode('WROLLFORWARD')) == 1 + assert len(output.getMessagesWithErrorCode('WROLLREVERSE')) == 0 + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_roll_message_reverse(output, checker): + """ + Roll warning message should only be shown for currently selected + strand (reverse). + """ + checker('AL449423.14(CDKN2A_v001):c.98_99insGTA') + assert len(output.getMessagesWithErrorCode('WROLLFORWARD')) == 0 + assert len(output.getMessagesWithErrorCode('WROLLREVERSE')) == 1 + + +@pytest.mark.parametrize('references', [['NM_000143.3']], indirect=True) +def test_ins_cds_start(output, checker): + """ + Insertion on CDS start boundary should not be included in CDS. + """ + checker('NM_000143.3:c.-1_1insCAT') + assert output.getIndexedOutput("newProtein", 0) is None + # Todo: Is this a good test? + + +@pytest.mark.parametrize('references', [['NM_000143.3']], indirect=True) +def test_ins_cds_start_after(output, checker): + """ + Insertion after CDS start boundary should be included in CDS. + """ + checker('NM_000143.3:c.1_2insCAT') + assert output.getIndexedOutput("newProtein", 0) == '?' + # Todo: Is this a good test? + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_splice_site(output, checker): + """ + Deletion hitting one splice site should not do a protein prediction. + """ + checker('NG_012772.1(BRCA2_v001):c.632-5_670del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert output.getOutput('removedSpliceSites') == [] + # Todo: For now, the following is how to check if no protein + # prediction is done. + assert not output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_exon(output, checker): + """ + Deletion of an entire exon should be possible. + """ + checker('NG_012772.1(BRCA2_v001):c.632-5_681+7del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert output.getOutput('removedSpliceSites') == [2] + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_exon_exact(output, checker): + """ + Deletion of exactly an exon should be possible. + """ + checker('NG_012772.1(BRCA2_v001):c.632_681del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) == 0 + assert output.getOutput('removedSpliceSites') == [2] + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_exon_in_frame(output, checker): + """ + Deletion of an entire exon with length a triplicate should give a + proteine product with just this deletion (and possibly substitutions + directly before and after). + + NG_012772.1(BRCA2_v001):c.68-7_316+7del is such a variant, since + positions 68 through 316 are exactly one exon and (316-68+1)/3 = 83. + """ + checker('NG_012772.1(BRCA2_v001):c.68-7_316+7del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert output.getOutput('removedSpliceSites') == [2] + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + # Todo: assert that protein products indeed have only this difference. + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_exons(output, checker): + """ + Deletion of two entire exons should be possible. + """ + checker('NG_012772.1(BRCA2_v001):c.632-5_793+7del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert output.getOutput('removedSpliceSites') == [4] + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_intron(output, checker): + """ + Deletion of an entire intron should be possible (fusion of remaining + exonic parts). + """ + checker('NG_012772.1(BRCA2_v001):c.622_674del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert output.getOutput('removedSpliceSites') == [2] + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_intron_exact(output, checker): + """ + Deletion of exactly an intron should be possible (fusion of flanking + exons). + """ + checker('NG_012772.1(BRCA2_v001):c.681+1_682-1del') + assert output.getMessagesWithErrorCode('WOVERSPLICE') == [] + assert output.getOutput('removedSpliceSites') == [2] + # Note: The protein prediction is done, but 'newProtein' is not set + # because we have no change. So to check if the prediction is done, we + # check if 'oldProtein' is set and to check if the prediction is + # correct, we check if 'newProtein' is not set. + assert output.getOutput('oldProtein') + assert not output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_intron_in_frame(output, checker): + """ + Deletion of an entire intron should be possible (fusion of remaining + exonic parts). + """ + checker('NG_012772.1(BRCA2_v001):c.622_672del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert output.getOutput('removedSpliceSites') == [2] + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + # Todo: assert that protein products indeed have only this difference. + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_exon_unknown_offsets(output, checker): + """ + Deletion of an entire exon with unknown offsets should be possible. + """ + checker('NG_012772.1(BRCA2_v001):c.632-?_681+?del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert len(output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + # Genomic positions should be centered in flanking introns and unsure. + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.(17550_19725)del' + assert 'NG_012772.1(BRCA2_v001):c.632-?_681+?del' \ + in output.getOutput('descriptions') + assert 'NG_012772.1(BRCA2_i001):p.(Val211Glufs*10)' \ + in output.getOutput('protDescriptions') + # Todo: .c notation should still be c.632-?_681+?del, but what about + # other transcripts? + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_exon_unknown_offsets_in_frame(output, checker): + """ + Deletion of an entire exon with unknown offsets and length a + triplicate should give a proteine product with just this deletion + (and possibly substitutions directly before and after). + + NG_012772.1(BRCA2_v001):c.68-?_316+?del is such a variant, since + positions 68 through 316 are exactly one exon and (316-68+1)/3 = 83. + """ + checker('NG_012772.1(BRCA2_v001):c.68-?_316+?del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert len(output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + # Genomic positions should be centered in flanking introns and unsure. + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.(7324_11720)del' + assert 'NG_012772.1(BRCA2_v001):c.68-?_316+?del' \ + in output.getOutput('descriptions') + # Todo: .c notation should still be c.632-?_681+?del, but what about + # other transcripts? + + +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_del_exon_unknown_offsets_composed(output, checker): + """ + Deletion of an entire exon with unknown offsets and another composed + variant with exact positioning should be possible. + """ + check_variant('NG_012772.1(BRCA2_v001):c.[632-?_681+?del;681+4del]', + output) + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert len(output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + # Genomic positions should be centered in flanking introns and unsure. + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.[(17550_19725)del;19017del]' + assert 'NG_012772.1(BRCA2_v001):c.[632-?_681+?del;681+4del]' \ + in output.getOutput('descriptions') + # Todo: .c notation should still be c.632-?_681+?del, but what about + # other transcripts? + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_del_exon_unknown_offsets_reverse(output, checker): + """ + Deletion of an entire exon with unknown offsets should be possible, + also on the reverse strand. + """ + check_variant('AL449423.14(CDKN2A_v001):c.151-?_457+?del', + output) + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) > 0 + assert len(output.getMessagesWithErrorCode('IDELSPLICE')) > 0 + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + # Genomic positions should be centered in flanking introns and unsure. + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.(60314_63683)del' + assert 'AL449423.14(CDKN2A_v001):c.151-?_457+?del' \ + in output.getOutput('descriptions') + # Todo: .c notation should still be c.632-?_681+?del, but what about + # other transcripts? + + +@pytest.mark.parametrize('references', [['NM_000143.3']], indirect=True) +def test_del_exon_transcript_reference(output, checker): + """ + Deletion of entire exon on a transcript reference should remove the + expected splice sites (only that of the deleted exon), and not those + of the flanking exons (as would happen using the mechanism for genomic + references). + """ + # checker('NM_018723.3:c.758_890del') + checker('NM_000143.3:c.739_904del') + assert len(output.getMessagesWithErrorCode('WOVERSPLICE')) == 0 + assert output.getOutput('removedSpliceSites') == [2] + # Todo: For now, the following is how to check if protein + # prediction is done. + assert output.getOutput('newProtein') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_seq(output, checker): + """ + Insertion of a sequence. + """ + checker('NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_012337.1']], indirect=True) +def test_ins_seq_reverse(output, checker): + """ + Insertion of a sequence on reverse strand. + """ + checker('NG_012337.1(TIMM8B_v001):c.12_13insGATC') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_012337.1:g.4911_4912insATCG' + assert 'NG_012337.1(TIMM8B_v001):c.12_13insGATC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range(output, checker): + """ + Insertion of a range. + """ + checker('NG_008939.1:g.5207_5208ins4300_4320') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_inv(output, checker): + """ + Insertion of an inverse range. + """ + checker('NG_008939.1:g.5207_5208ins4300_4320inv') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGCCAGATAATGAGCACAGGAC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGCCAGATAATGAGCACAGGAC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_seq_list(output, checker): + """ + Insertion of a sequence as a list. + """ + checker('NG_008939.1:g.5207_5208ins[GTCCTGTGCTCATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_012337.1']], indirect=True) +def test_ins_seq_list_reverse(output, checker): + """ + Insertion of a sequence as a list on reverse strand. + """ + checker('NG_012337.1(TIMM8B_v001):c.12_13ins[GATC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_012337.1:g.4911_4912insATCG' + assert 'NG_012337.1(TIMM8B_v001):c.12_13insGATC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_list(output, checker): + """ + Insertion of a range as a list. + """ + checker('NG_008939.1:g.5207_5208ins[4300_4320]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_inv_list(output, checker): + """ + Insertion of an inverse range as a list. + """ + checker('NG_008939.1:g.5207_5208ins[4300_4320inv]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGCCAGATAATGAGCACAGGAC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGCCAGATAATGAGCACAGGAC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_seq_seq(output, checker): + """ + Insertion of two sequences. + """ + checker('NG_008939.1:g.5207_5208ins[GTCCTGTGCTC;ATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_012337.1']], indirect=True) +def test_ins_seq_seq_reverse(output, checker): + """ + Insertion of two sequences on reverse strand. + """ + checker('NG_012337.1(TIMM8B_v001):c.12_13ins[TTT;GATC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_012337.1:g.4911_4912insATCAAAG' + assert 'NG_012337.1(TIMM8B_v001):c.12_13insTTTGATC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_range(output, checker): + """ + Insertion of two ranges. + """ + checker('NG_008939.1:g.5207_5208ins[4300_4309;4310_4320]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_range_inv(output, checker): + """ + Insertion of a range and an inverse range. + """ + checker('NG_008939.1:g.5207_5208ins[4300_4309;4310_4320inv]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTGCCAGATAATG' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTGCCAGATAATG' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_seq_range(output, checker): + """ + Insertion of a sequence and a range. + """ + checker('NG_008939.1:g.5207_5208ins[GTCCTGTGCT;4310_4320]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_seq_range_inv(output, checker): + """ + Insertion of a sequence and an inverse range. + """ + checker('NG_008939.1:g.5207_5208ins[GTCCTGTGCT;4310_4320inv]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTGCCAGATAATG' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTGCCAGATAATG' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_seq(output, checker): + """ + Insertion of a range and a sequence. + """ + checker('NG_008939.1:g.5207_5208ins[4300_4309;CATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_inv_seq(output, checker): + """ + Insertion of an inverse range and a sequence. + """ + checker('NG_008939.1:g.5207_5208ins[4300_4309inv;CATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insAGCACAGGACCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insAGCACAGGACCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_seq_coding(output, checker): + """ + Insertion of a sequence (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_seq_list_coding(output, checker): + """ + Insertion of a sequence as a list (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_157ins[GTCCTGTGCTCATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_seq_seq_coding(output, checker): + """ + Insertion of two sequences (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_157ins[GTCCTGTGCTC;ATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5208insGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_157insGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_coding(output, checker): + """ + Insertion of a range (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_157ins180_188') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_inv_coding(output, checker): + """ + Insertion of an inverse range (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_157ins180_188inv') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_list_coding(output, checker): + """ + Insertion of a range as a list (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_157ins[180_188]') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_ins_range_inv_list_coding(output, checker): + """ + Insertion of an inverse range as a list (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_157ins[180_188inv]') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_seq(output, checker): + """ + Insertion-deletion of a sequence. + """ + checker('NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range(output, checker): + """ + Insertion-deletion of a range. + """ + checker('NG_008939.1:g.5207_5212delins4300_4320') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_inv(output, checker): + """ + Insertion-deletion of an inverse range. + """ + checker('NG_008939.1:g.5207_5212delins4300_4320inv') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGCCAGATAATGAGCACAGGAC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGCCAGATAATGAGCACAGGAC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_seq_list(output, checker): + """ + Insertion-deletion of a sequence as a list. + """ + checker('NG_008939.1:g.5207_5212delins[GTCCTGTGCTCATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_list(output, checker): + """ + Insertion-deletion of a range as a list. + """ + checker('NG_008939.1:g.5207_5212delins[4300_4320]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_inv_list(output, checker): + """ + Insertion-deletion of an inverse range as a list. + """ + checker('NG_008939.1:g.5207_5212delins[4300_4320inv]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGCCAGATAATGAGCACAGGAC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGCCAGATAATGAGCACAGGAC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_seq_seq(output, checker): + """ + Insertion-deletion of two sequences. + """ + checker('NG_008939.1:g.5207_5212delins[GTCCTGTGCT;CATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_range(output, checker): + """ + Insertion-deletion of two ranges. + """ + checker('NG_008939.1:g.5207_5212delins[4300_4309;4310_4320]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_inv_range(output, checker): + """ + Insertion-deletion of an inverse range and a range. + + Note that the delins is also shortened by one position here. + """ + checker('NG_008939.1:g.5207_5212delins[4300_4309inv;4310_4320]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5208_5212delinsGCACAGGACCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.157_161delinsGCACAGGACCATTATCTGGC' \ + in output.getOutput('descriptions') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 0 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_seq_range(output, checker): + """ + Insertion-deletion of a sequence and a range. + """ + checker('NG_008939.1:g.5207_5212delins[GTCCTGTGCT;4310_4320]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_seq_range_inv(output, checker): + """ + Insertion-deletion of a sequence and an inverse range. + + Note that the delins is also shortened by one position here. + """ + checker('NG_008939.1:g.5207_5212delins[GTCCTGTGCT;4310_4320inv]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5211delinsGTCCTGTGCTGCCAGATAAT' + assert 'NG_008939.1(PCCB_v001):c.156_160delinsGTCCTGTGCTGCCAGATAAT' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_seq(output, checker): + """ + Insertion-deletion of a range and a sequence. + """ + checker('NG_008939.1:g.5207_5212delins[4300_4309;CATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_inv_seq(output, checker): + """ + Insertion-deletion of an inverse range and a sequence. + + Note that the delins is also shortened by one position here. + """ + checker('NG_008939.1:g.5207_5212delins[4300_4309inv;CATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5208_5212delinsGCACAGGACCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.157_161delinsGCACAGGACCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_seq_coding(output, checker): + """ + Insertion-deletion of a sequence (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_seq_list_coding(output, checker): + """ + Insertion-deletion of a sequence as a list (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_161delins[GTCCTGTGCTCATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_seq_seq_coding(output, checker): + """ + Insertion-deletion of two sequences (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_161delins[GTCCTGTGCT;CATTATCTGGC]') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5207_5212delinsGTCCTGTGCTCATTATCTGGC' + assert 'NG_008939.1(PCCB_v001):c.156_161delinsGTCCTGTGCTCATTATCTGGC' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_coding(output, checker): + """ + Insertion-deletion of a range (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_161delins180_188') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_inv_coding(output, checker): + """ + Insertion-deletion of an inverse range (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_161delins180_188inv') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_list_coding(output, checker): + """ + Insertion-deletion of a range as a list (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_161delins[180_188]') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_delins_range_inv_list_coding(output, checker): + """ + Insertion-deletion of an inverse range as a list (coding). + """ + checker('NG_008939.1(PCCB_v001):c.156_161delins[180_188inv]') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +def test_no_reference(output, checker): + """ + Variant description without a reference. + """ + checker('g.244355733del') + assert len(output.getMessagesWithErrorCode('ENOREF')) == 1 + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_chromosomal_positions(output, checker): + """ + Variants on transcripts in c. notation should have chromosomal positions + defined. + """ + checker('NM_003002.2:c.274G>T') + assert output.getIndexedOutput('rawVariantsChromosomal', 0) == ('chr11', '+', [('274G>T', (111959695, 111959695))]) + + +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_ex_notation(output, checker): + """ + Variant description using EX notation should not crash but deletion of + one exon should delete two splice sites. + """ + checker('NM_002001.2:c.EX1del') + assert len(output.getMessagesWithErrorCode('IDELSPLICE')) == 1 + + +@pytest.mark.parametrize('references', [['LRG_1']], indirect=True) +def test_lrg_reference(output, checker): + """ + We should be able to use LRG reference sequence without error. + """ + checker('LRG_1t1:c.266G>T') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicDescription', 0) == 'LRG_1:g.6855G>T' + + +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_gi_reference_plain(output, checker): + """ + Test reference sequence notation with GI number. + """ + checker('31317229:c.6del') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicDescription', 0) == '31317229:n.105del' + assert '31317229(FCER1A_v001):c.6del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_gi_reference_prefix(output, checker): + """ + Test reference sequence notation with GI number and prefix. + """ + checker('GI31317229:c.6del') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicDescription', 0) == '31317229:n.105del' + assert '31317229(FCER1A_v001):c.6del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_gi_reference_prefix_colon(output, checker): + """ + Test reference sequence notation with GI number and prefix with colon. + """ + checker('GI:31317229:c.6del') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicDescription', 0) == '31317229:n.105del' + assert '31317229(FCER1A_v001):c.6del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_nop_nm(output, checker): + """ + Variant on NM without effect should be described as '='. + """ + checker('NM_002001.2:c.1_3delinsATG') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicDescription', 0) == 'NM_002001.2:n.=' + assert 'NM_002001.2(FCER1A_v001):c.=' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['DMD']], indirect=True) +def test_nop_ud(output, references, checker): + """ + Variant on UD without effect should be described as '='. + """ + ud = references[0].accession + checker(ud + ':g.5T>T') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000023.11:g.=' + assert output.getIndexedOutput('genomicDescription', 0) == ud + ':g.=' + assert ud + '(DMD_v001):c.=' in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['DPYD']], indirect=True) +def test_ud_reverse_sequence(output, references, checker): + """ + Variant on UD from reverse strand should have reverse complement + sequence. + """ + ud = references[0].accession + checker(ud + '(DPYD_v1):c.85C>T') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.98348885G>A' + assert output.getIndexedOutput('genomicDescription', 0) == ud + ':g.42731C>T' + assert ud + '(DPYD_v001):c.85C>T' in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['MARK1']], indirect=True) +def test_ud_forward_sequence(output, references, checker): + """ + Variant on UD from forward strand should have forward sequence. + """ + ud = references[0].accession + checker(ud + '(MARK1_v001):c.400T>C') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.220773181T>C' + assert output.getIndexedOutput('genomicDescription', 0) == ud + ':g.76614T>C' + assert ud + '(MARK1_v001):c.400T>C' in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['chr9_reverse']], indirect=True) +def test_ud_reverse_range(output, references, checker): + """ + Variant on UD from reverse strand should have reversed range + positions. + """ + # This is just some slice on from the reverse strand of hg19 chr9. + ud = references[0].accession + checker(ud + ':g.10624_78132del') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000009.11:g.32928508_32996016del' + assert output.getIndexedOutput('genomicDescription', 0) == ud + ':g.10624_78132del' + + +@pytest.mark.parametrize('references', [['MARK1']], indirect=True) +def test_ud_forward_range(output, references, checker): + """ + Variant on UD from forward strand should have forward range positions. + """ + ud = references[0].accession + checker(ud + '(MARK1_v001):c.400_415del') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.220773181_220773196del' + assert output.getIndexedOutput('genomicDescription', 0) == ud + ':g.76614_76629del' + + +@pytest.mark.parametrize('references', [['chr9_reverse']], indirect=True) +def test_ud_reverse_del_length(output, references, checker): + """ + Variant on UD from reverse strand should have reversed range + positions, but not reverse complement of first argument (it is not a + sequence, but a length). + """ + # This is just some slice on from the reverse strand of hg19 chr9. + ud = references[0].accession + checker(ud + ':g.10624_78132del67509') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000009.11:g.32928508_32996016del' + assert output.getIndexedOutput('genomicDescription', 0) == ud + ':g.10624_78132del' + + +@pytest.mark.parametrize('references', [['DPYD']], indirect=True) +def test_ud_reverse_roll(output, references, checker): + """ + Variant on UD from reverse strand should roll the oposite direction. + + The situation is as follows: + + G A A A T T + c. 102 103 104 105 106 107 + g. 748 749 750 751 752 753 + chr g. 868 867 866 865 864 863 + """ + ud = references[0].accession + checker(ud + '(DPYD_v001):c.104del') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.98348867del' + assert output.getIndexedOutput('genomicDescription', 0) == ud + ':g.42751del' + assert ud + '(DPYD_v001):c.105del' in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['MARK1']], indirect=True) +def test_ud_forward_roll(output, references, checker): + """ + Variant on UD from forward strand should roll the same. + + The situation is as follows: + + A T T T A + c. 398 399 400 401 402 + g. 612 613 614 615 616 + chr g. 179 180 181 182 183 + """ + ud = references[0].accession + checker(ud + '(MARK1_v001):c.400del') + error_count, _, _ = output.Summary() + assert error_count == 0 + assert output.getIndexedOutput('genomicChromDescription', 0) == 'NC_000001.10:g.220773182del' + assert output.getIndexedOutput('genomicDescription', 0) == ud + ':g.76615del' + assert ud + '(MARK1_v001):c.401del' in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_with_sequence_forward_genomic(output, checker): + """ + Specify the deleted sequence in a deletion. + """ + checker('AL449423.14:g.65471_65472delTC') + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.65471_65472del' + assert 'AL449423.14(CDKN2A_v001):c.98_99del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_with_length_forward_genomic(output, checker): + """ + Specify the deleted sequence length in a deletion. + """ + checker('AL449423.14:g.65471_65472del2') + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.65471_65472del' + assert 'AL449423.14(CDKN2A_v001):c.98_99del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_with_sequence_reverse_coding(output, checker): + """ + Specify the deleted sequence in a deletion on the reverse strand. + """ + checker('AL449423.14(CDKN2A_v001):c.161_163delTGG') + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61937_61939del' + assert 'AL449423.14(CDKN2A_v001):c.161_163del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['AL449423.14']], indirect=True) +def test_deletion_with_length_reverse_coding(output, checker): + """ + Specify the deleted sequence length in a deletion on the reverse strand. + """ + checker('AL449423.14(CDKN2A_v001):c.161_163del3') + assert output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.61937_61939del' + assert 'AL449423.14(CDKN2A_v001):c.161_163del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_deletion_with_sequence_reverse_ng_coding(output, checker): + """ + Specify the deleted sequence in a deletion on the reverse strand + using a genomic reference. + """ + checker('NG_008939.1:c.155_157delAAC') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5206_5208del' + assert 'NG_008939.1(PCCB_v001):c.155_157del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_deletion_with_length_reverse_ng_coding(output, checker): + """ + Specify the deleted sequence length in a deletion on the reverse strand + using a genomic reference. + """ + checker('NG_008939.1:c.155_157del3') + assert output.getIndexedOutput('genomicDescription', 0) == 'NG_008939.1:g.5206_5208del' + assert 'NG_008939.1(PCCB_v001):c.155_157del' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['AB026906.1']], indirect=True) +def test_inversion(output, checker): + """ + Inversion variant. + """ + checker('AB026906.1:c.274_275inv') + assert output.getIndexedOutput('genomicDescription', 0) == 'AB026906.1:g.7872_7873inv' + assert 'AB026906.1(SDHD_v001):c.274_275inv' \ + in output.getOutput('descriptions') + + +@pytest.mark.parametrize('references', [['NM_000193.2']], indirect=True) +def test_delins_with_length(output, checker): + """ + Delins with explicit length of deleted sequence (bug #108). + """ + checker('NM_000193.2:c.108_109del2insG') + assert 'NM_000193.2(SHH_i001):p.(Lys38Serfs*2)' in output.getOutput('protDescriptions') + + +@pytest.mark.parametrize('references', [['NG_009105.1']], indirect=True) +def test_protein_level_description(output, checker): + """ + Currently protein level descriptions are not implemented. + """ + checker('NG_009105.1(OPN1LW):p.=') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['NP_064445.1']], indirect=True) +def test_protein_reference(output, checker): + """ + Currently protein references are not implemented. + """ + checker('NP_064445.1:p.=') + assert len(output.getMessagesWithErrorCode('ENOTIMPLEMENTED')) == 1 + + +@pytest.mark.parametrize('references', [['AF230870.1']], indirect=True) +def test_wnomrna_other(output, checker): + """ + Warning for no mRNA field on other than currently selected transcript + should give WNOMRNA_OTHER warning. + """ + # Contains mtmC2 and mtmB2, both without mRNA + checker('AF230870.1(mtmC2_v001):c.13del') + wnomrna_other = output.getMessagesWithErrorCode('WNOMRNA_OTHER') + assert len(wnomrna_other) == 1 + + +@pytest.mark.parametrize('references', [['AF230870.1']], indirect=True) +def test_wnomrna(output, checker): + """ + Warning for no mRNA field on currently selected transcript should give + WNOMRNA warning. + """ + # Contains mtmC2 and mtmB2, both without mRNA + checker('AF230870.1(mtmC2_v001):c.13del') + wnomrna = output.getMessagesWithErrorCode('WNOMRNA') + wnomrna_other = output.getMessagesWithErrorCode('WNOMRNA_OTHER') + assert len(wnomrna) == 1 + assert len(wnomrna_other) == 1 + + +@pytest.mark.parametrize('references', [['L41870.1']], indirect=True) +def test_mrna_ref_adjacent_exons_warn(output, checker): + """ + Warning for mRNA reference where exons are not adjacent. + + In L41870.1 exon 15 ends on 1558 and 16 starts on 1636. + """ + checker('L41870.1:c.1del') + w_exon_annotation = output.getMessagesWithErrorCode('WEXON_ANNOTATION') + assert len(w_exon_annotation) == 1 + + +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_mrna_ref_adjacent_exons_no_warn(output, checker): + """ + No warning for mRNA reference where exons are adjacent. + """ + checker('NM_003002.2:c.1del') + w_exon_annotation = output.getMessagesWithErrorCode('WEXON_ANNOTATION') + assert len(w_exon_annotation) == 0 + + +@pytest.mark.parametrize('references', [['NM_001199.3']], indirect=True) +def test_fs_no_stop(output, checker): + """ + Frame shift yielding no stop codon should be described with + uncertainty of the stop codon. + + http://www.hgvs.org/mutnomen/FAQ.html#nostop + """ + checker('NM_001199.3(BMP1):c.2188dup') + assert 'NM_001199.3(BMP1_i001):p.(Gln730Profs*?)' in output.getOutput('protDescriptions') + + +@pytest.mark.parametrize('references', [['NM_000193.2']], indirect=True) +def test_ext_no_stop(output, checker): + """ + Extension yielding no stop codon should be described with + uncertainty of the stop codon. + + http://www.hgvs.org/mutnomen/FAQ.html#nostop + """ + checker('NM_000193.2:c.1388G>C') + assert 'NM_000193.2(SHH_i001):p.(*463Serext*?)' in output.getOutput('protDescriptions') + + +@pytest.mark.parametrize('references', [['NM_000193.2']], indirect=True) +def test_fs_ext_no_stop(output, checker): + """ + Extension yielding no stop codon should be described with + uncertainty of the stop codon. + + http://www.hgvs.org/mutnomen/FAQ.html#nostop + """ + checker('NM_000193.2:c.1388_1389insC') + assert 'NM_000193.2(SHH_i001):p.(*463Cysext*?)' in output.getOutput('protDescriptions') + + +@pytest.mark.parametrize('references', [['AB026906.1']], indirect=True) +def test_synonymous_p_is(output, checker): + """ + Synonymous mutation should yield a p.(=) description. + """ + checker('AB026906.1:c.276C>T') + assert 'AB026906.1(SDHD_i001):p.(=)' in output.getOutput('protDescriptions') + assert not output.getOutput('newProteinFancy') + + +@pytest.mark.parametrize('references', [['NM_024426.4']], indirect=True) +def test_synonymous_p_is_alt_start(output, checker): + """ + Synonymous mutation should yield a p.(=) description, also with an + alternative start codon. + """ + checker('NM_024426.4:c.1107A>G') + assert 'NM_024426.4(WT1_i001):p.(=)' in output.getOutput('protDescriptions') + assert not output.getOutput('newProteinFancy') + waltstart = output.getMessagesWithErrorCode('WALTSTART') + assert len(waltstart) == 1 + assert output.getOutput('oldProtein')[0].startswith('M') + assert not output.getOutput('newProtein') + assert not output.getOutput('altStart') + assert not output.getOutput('altProteinFancy') + + +@pytest.mark.parametrize('references', [['AB026906.1']], indirect=True) +def test_start_codon(output, checker): + """ + Mutation of start codon should yield a p.? description. + """ + checker('AB026906.1:c.1A>G') + assert 'AB026906.1(SDHD_i001):p.?' in output.getOutput('protDescriptions') + wstart = output.getMessagesWithErrorCode('WSTART') + assert len(wstart) == 1 + assert output.getOutput('newProtein')[0] == '?' + waltstart = output.getMessagesWithErrorCode('WALTSTART') + assert len(waltstart) == 0 + assert not output.getOutput('altStart') + + +@pytest.mark.parametrize('references', [['NM_024426.4']], indirect=True) +def test_start_codon_alt_start(output, checker): + """ + Mutation of start codon should yield a p.? description, also with an + alternative start codon. + """ + checker('NM_024426.4:c.1C>G') + assert 'NM_024426.4(WT1_i001):p.?' in output.getOutput('protDescriptions') + west = output.getMessagesWithErrorCode('WSTART') + assert len(west) == 1 + assert output.getOutput('newProtein')[0] == '?' + waltstart = output.getMessagesWithErrorCode('WALTSTART') + assert len(waltstart) == 1 + assert not output.getOutput('altStart') + + +@pytest.mark.parametrize('references', [['AB026906.1']], indirect=True) +def test_start_codon_yield_start_p_is(output, checker): + """ + Silent mutation creating new start codon should yield a p.? + description. The visualisation should also render the case for the new + start codon. + """ + checker('AB026906.1:c.1A>T') # yields TTG start codon + assert 'AB026906.1(SDHD_i001):p.?' in output.getOutput('protDescriptions') + wstart = output.getMessagesWithErrorCode('WSTART') + assert len(wstart) == 1 + assert output.getOutput('newProtein')[0] == '?' + waltstart = output.getMessagesWithErrorCode('WALTSTART') + assert len(waltstart) == 0 + assert output.getOutput('oldProtein')[0].startswith('M') + assert 'TTG' in output.getOutput('altStart') + assert not output.getOutput('altProteinFancy') + + +@pytest.mark.parametrize('references', [['NM_024426.4']], indirect=True) +def test_start_codon_alt_start_yield_start_p_is(output, checker): + """ + Silent mutation creating new start codon should yield a p.? + description, also with an alternative start codon. The visualisation + should also render the case for the new start codon. + """ + checker('NM_024426.4:c.1C>A') # yields ATG start codon + assert 'NM_024426.4(WT1_i001):p.?' in output.getOutput('protDescriptions') + west = output.getMessagesWithErrorCode('WSTART') + assert len(west) == 1 + assert output.getOutput('newProtein')[0] == '?' + waltstart = output.getMessagesWithErrorCode('WALTSTART') + assert len(waltstart) == 1 + assert output.getOutput('oldProtein')[0].startswith('M') + assert 'ATG' in output.getOutput('altStart') + assert not output.getOutput('altProteinFancy') + + +@pytest.mark.parametrize('references', [['AB026906.1']], indirect=True) +def test_start_codon_yield_start(output, checker): + """ + Mutation creating new start codon should yield a p.? description. The + visualisation should also render the case for the new start codon. + """ + checker('AB026906.1:c.1_4delinsTTGA') # yields TTG start codon + assert 'AB026906.1(SDHD_i001):p.?' in output.getOutput('protDescriptions') + wstart = output.getMessagesWithErrorCode('WSTART') + assert len(wstart) == 1 + assert output.getOutput('newProtein')[0] == '?' + waltstart = output.getMessagesWithErrorCode('WALTSTART') + assert len(waltstart) == 0 + assert 'TTG' in output.getOutput('altStart') + assert output.getOutput('altProtein')[0].startswith('M') + + +@pytest.mark.parametrize('references', [['NM_024426.4']], indirect=True) +def test_start_codon_alt_start_yield_start(output, checker): + """ + Mutation creating new start codon should yield a p.? description, also + with an alternative start codon. The visualisation should also render + the new start codon. + """ + checker('NM_024426.4:c.1_4delinsATGA') # yields ATG start codon + assert 'NM_024426.4(WT1_i001):p.?' in output.getOutput('protDescriptions') + west = output.getMessagesWithErrorCode('WSTART') + assert len(west) == 1 + assert output.getOutput('newProtein')[0] == '?' + waltstart = output.getMessagesWithErrorCode('WALTSTART') + assert len(waltstart) == 1 + assert output.getOutput('oldProtein')[0].startswith('M') + assert 'ATG' in output.getOutput('altStart') + assert output.getOutput('altProtein')[0].startswith('M') diff --git a/tests/test_website.py b/tests/test_website.py index faf336ba..57d085c2 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -1,13 +1,10 @@ """ -Tests for the WSGI interface to Mutalyzer. - -@todo: Tests for /upload. +Tests for the mutalyzer.website module. """ from __future__ import unicode_literals -#import logging; logging.basicConfig() import bz2 from mock import patch import os @@ -15,916 +12,1004 @@ from io import BytesIO from Bio import Entrez import lxml.html +import pytest from mutalyzer import announce, Scheduler -from mutalyzer.config import settings -from mutalyzer.db import models +from mutalyzer.db.models import BatchJob from mutalyzer.website import create_app -from fixtures import cache, database, hg19, hg19_transcript_mappings -from utils import MutalyzerTest -from utils import fix + +# TODO: Tests for /upload. + + +@pytest.fixture +def website(): + return create_app().test_client() + + +def test_homepage(website): + """ + Expect the index HTML page. + """ + r = website.get('/') + assert r.status_code == 200 + assert 'Welcome to the Mutalyzer website' in r.data -BATCH_RESULT_URL = 'http://localhost/mutalyzer/Results_{id}.txt' +def test_about(website): + """ + See if people get proper credit. + """ + r = website.get('/about') + assert r.status == '200 OK' + assert 'Jonathan Vis' in r.data -class TestWebsite(MutalyzerTest): +def test_non_existing(website): """ - Test the Mutalyzer WSGI interface. + Expect a 404 response. """ - def setup(self): - super(TestWebsite, self).setup() - self.app = create_app().test_client() + r = website.get('/this/doesnotexist') + assert r.status_code == 404 - def test_homepage(self): - """ - Expect the index HTML page. - """ - r = self.app.get('/') + +@pytest.mark.usefixtures('db') +def test_menu_links(website): + """ + Test all links in the main menu. + """ + # This could contain relative links we want to skip. + ignore = [] + r = website.get('/') + + dom = lxml.html.fromstring(r.data) + + for link in dom.cssselect('nav a'): + href = link.get('href') + if (href.startswith('http://') or + href.startswith('https://') or + href.startswith('mailto:') or + href.startswith('#') or + href in ignore): + continue + if not href.startswith('/'): + href = '/' + href + + r = website.get(href) assert r.status_code == 200 - assert 'Welcome to the Mutalyzer website' in r.data - - def test_about(self): - """ - See if people get proper credit. - """ - r = self.app.get('/about') - assert r.status == '200 OK' - assert 'Jonathan Vis' in r.data - - def test_non_existing(self): - """ - Expect a 404 response. - """ - r = self.app.get('/this/doesnotexist') - assert r.status_code == 404 - - @fix(database) - def test_menu_links(self): - """ - Test all links in the main menu. - """ - ignore = [] # This could contain relative links we want to skip - r = self.app.get('/') - - dom = lxml.html.fromstring(r.data) - - for link in dom.cssselect('#menu a'): - href = link.get('href') - if (href.startswith('http://') or - href.startswith('https://') or - href in ignore): - continue - if not href.startswith('/'): - href = '/' + href - - r = self.app.get(href) - assert r.status_code == 200 - - def test_announcement(self): - """ - We should always see the current announcement. - """ - announce.set_announcement('Test announcement') - r = self.app.get('/syntax-checker') - assert r.status == '200 OK' - assert 'Test announcement' in r.data - - announce.set_announcement('New announcement') - r = self.app.get('/syntax-checker') - assert r.status == '200 OK' - assert 'New announcement' in r.data - - announce.unset_announcement() - r = self.app.get('/syntax-checker') - assert r.status == '200 OK' - assert 'nnouncement' not in r.data - - def test_description_extractor_raw(self): - """ - Submit two sequences to the variant description extractor. - """ - r = self.app.post('/description-extractor', data={ - 'reference_method': 'raw_method', - 'sample_method': 'raw_method', - 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'sample_sequence': 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA'}) - assert '[5_6insTT;17del;26A>C;35dup]' in r.data - - def test_description_extractor_raw_fastq(self): - """ - Submit two sequences to the variant description extractor. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', 'extractor_input.fq') - r = self.app.post('/description-extractor', data={ - 'reference_method': 'raw_method', - 'sample_method': 'raw_method', - 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'sample_sequence': open(path).read()}) - assert '[5_6insTT;17del;26A>C;35dup]' in r.data - - @fix(database, cache('NM_004006.1', 'NM_004006.2')) - def test_description_extractor_refseq(self): - """ - Submit two accession numbers to the variant description extractor. - """ - r = self.app.post('/description-extractor', data={ - 'reference_method': 'refseq_method', - 'sample_method': 'refseq_method', - 'reference_accession_number': 'NM_004006.1', - 'sample_accession_number': 'NM_004006.2'}) - assert '[12749G>A;13729G>A]' in r.data - - def test_description_extractor_file_fasta(self): - """ - Submit a sequence and a FASTA file to the variant description - extractor. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', 'extractor_input.fa') - r = self.app.post('/description-extractor', data={ - 'reference_method': 'raw_method', - 'sample_method': 'file_method', - 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'sample_file': (open(path), 'extractor_input.fa')}) - assert '[5_6insTT;17del;26A>C;35dup]' in r.data - - def test_description_extractor_file_fastq(self): - """ - Submit a sequence and a FASTQ file to the variant description - extractor. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', 'extractor_input.fq') - r = self.app.post('/description-extractor', data={ - 'reference_method': 'raw_method', - 'sample_method': 'file_method', - 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'sample_file': (open(path), 'extractor_input.fq')}) - assert '[5_6insTT;17del;26A>C;35dup]' in r.data - - def test_description_extractor_file_text(self): - """ - Submit a sequence and a text file to the variant description - extractor. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', 'extractor_input.txt') - r = self.app.post('/description-extractor', data={ - 'reference_method': 'raw_method', - 'sample_method': 'file_method', - 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'sample_file': (open(path), 'extractor_input.txt')}) - assert '[5_6insTT;17del;26A>C;35dup]' in r.data - - def test_description_extractor_ref_too_long(self): - """ - Submit a reference sequence exceeding the maximum length to the variant - description extractor. - """ - r = self.app.post('/description-extractor', data={ - 'reference_method': 'raw_method', - 'sample_method': 'raw_method', - 'reference_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1), - 'sample_sequence': 'A'}) - assert '2_{}del'.format(settings.EXTRACTOR_MAX_INPUT_LENGTH + 1) not in r.data - assert 'Input sequences are restricted to ' in r.data - assert '1 Error, 0 Warnings.' in r.data - - def test_description_extractor_sample_too_long(self): - """ - Submit a sample sequence exceeding the maximum length to the variant - description extractor. - """ - r = self.app.post('/description-extractor', data={ - 'reference_method': 'raw_method', - 'sample_method': 'raw_method', - 'reference_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH), - 'sample_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1)}) - assert '{}dup'.format(settings.EXTRACTOR_MAX_INPUT_LENGTH) not in r.data - assert 'Input sequences are restricted to ' in r.data - assert '1 Error, 0 Warnings.' in r.data - - def test_description_extractor_lowercase(self): - """ - Submit a sample sequence with a base in lowercase to the variant - description extractor. - """ - r = self.app.post('/description-extractor', data={ - 'reference_method': 'raw_method', - 'sample_method': 'raw_method', - 'reference_sequence': 'TTT', - 'sample_sequence': 'TaT'}) - assert '<pre class="description">2T>A</pre>' in r.data - - def test_checksyntax_valid(self): - """ - Submit the check syntax form with a valid variant. - """ - r = self.app.get('/syntax-checker', - query_string={'description': 'AB026906.1:c.274G>T'}) - assert 'The syntax of this variant description is OK!' in r.data - - def test_checksyntax_invalid(self): - """ - Submit the check syntax form with an invalid variant. - """ - r = self.app.get('/syntax-checker', - query_string={'description': 'AB026906.1:c.27'}) - assert 'Fatal' in r.data - assert 'The "^" indicates the position where the error occurred' in r.data - - @fix(database, cache('NM_002001.2')) - def test_check_valid(self): - """ - Submit the name checker form with a valid variant. - Should include form and main HTML layout. - """ - r = self.app.get('/name-checker', - query_string={'description': 'NM_002001.2:g.1del'}) - assert '0 Errors' in r.data - assert '0 Warnings' in r.data - assert 'Raw variant 1: deletion of 1' in r.data - assert 'value="NM_002001.2:g.1del"' in r.data - - def test_check_invalid(self): - """ - Submit the name checker form with an invalid variant. - """ - r = self.app.get('/name-checker', - query_string={'description': 'NM_002001.2'}) - assert '1 Error' in r.data - assert '0 Warnings' in r.data - assert 'The "^" indicates the position where the error occurred' in r.data - - @fix(database, cache('NP_064445.1')) - def test_check_protein_reference(self): - """ - Submit the name checker form with a protein reference sequence (not - supported). - """ - r = self.app.get('/name-checker', - query_string={'description': 'NP_064445.1:c.274G>T'}) - assert '1 Error' in r.data - assert '0 Warnings' in r.data - assert 'Protein reference sequences are not supported' in r.data - - @fix(database, cache('NM_002001.2')) - def test_check_noninteractive(self): - """ - Submit the name checker form non-interactively. - Should not include form and main layout HTML. - """ - r = self.app.get('/name-checker', - query_string={'description': 'NM_002001.2:g.1del', - 'standalone': '1'}) - assert '<a href="#bottom" class="hornav">go to bottom</a>' not in r.data - assert '<input type="text" name="description" value="NM_002001.2:g.1del" style="width:100%">' not in r.data - assert '0 Errors' in r.data - assert '0 Warnings' in r.data - assert 'Raw variant 1: deletion of 1' in r.data - - @fix(database, cache('NG_012772.1')) - def test_check_interactive_links(self): - """ - Submitting interactively should have links to transcripts also - interactive. - """ - r = self.app.get('/name-checker', - query_string={'description': 'NG_012772.1:g.128del'}) - assert '0 Errors' in r.data - assert 'href="/name-checker?description=NG_012772.1%3Ag.128del"' in r.data - assert 'href="/name-checker?description=NG_012772.1%28BRCA2_v001%29%3Ac.-5100del"' in r.data - - def test_snp_converter_valid(self): - """ - Submit the SNP converter form with a valid SNP. - """ - # Patch Retriever.snpConvert to return rs9919552. - def mock_efetch(*args, **kwargs): - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'rs9919552.xml.bz2') - return bz2.BZ2File(path) - - with patch.object(Entrez, 'efetch', mock_efetch): - r = self.app.get('/snp-converter', - query_string={'rs_id': 'rs9919552'}) - assert '0 Errors' in r.data - assert '0 Warnings' in r.data - assert 'NC_000011.9:g.111959625C>T' in r.data - assert 'NG_012337.2:g.7055C>T' in r.data - assert 'NM_003002.3:c.204C>T' in r.data - assert 'NP_002993.1:p.Ser68=' in r.data - - def test_snp_converter_invalid(self): - """ - Submit the SNP converter form with an invalid SNP. - """ - r = self.app.get('/snp-converter', - query_string={'rs_id': 'r9919552'}) - - assert '1 Error' in r.data - assert '0 Warnings' in r.data - assert 'Fatal' in r.data - assert 'This is not a valid dbSNP id' in r.data - - @fix(database, hg19, hg19_transcript_mappings) - def test_position_converter_c2g(self): - """ - Submit the position converter form with a valid variant. - """ - r = self.app.get('/position-converter', - query_string={'assembly_name_or_alias': 'hg19', - 'description': 'NM_003002.2:c.204C>T'}) - assert 'NC_000011.9:g.111959625C>T' in r.data - - @fix(database, hg19, hg19_transcript_mappings) - def test_position_converter_g2c(self): - """ - Submit the position converter form with a valid variant. - """ - r = self.app.get('/position-converter', - query_string={'assembly_name_or_alias': 'hg19', - 'description': 'NC_000011.9:g.111959625C>T'}) - assert 'NM_003002.2:c.204C>T' in r.data - - def _batch(self, job_type='name-checker', assembly_name_or_alias=None, - file="", size=0, header='', lines=None): - """ - Submit a batch form. - - @kwarg batch_type: Type of batch job to test. One of name-checker, - syntax-checker, position-converter. - @kwarg argument: Optional extra argument for the batch job. - @kwarg file: String with variants to use as input for the batch job. - @kwarg size: Number of variants in input. - @kwarg header: Message that must be found in the batch job result. - @kwarg lines: Number of result rows expected. - - @return: The batch result document. - @rtype: string - """ - data = {'job_type': job_type, - 'email': 'test@test.test', - 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} - if assembly_name_or_alias is not None: - data['assembly_name_or_alias'] = assembly_name_or_alias - - r = self.app.post('/batch-jobs', - data=data) - progress_url = '/' + r.location.split('/')[-1] - - r = self.app.get(progress_url) - assert '<div id="if_items_left">' in r.data - assert '<div id="ifnot_items_left" style="display:none">' in r.data - assert ('<span id="items_left">%d</span>' % size) in r.data - - scheduler = Scheduler.Scheduler() - scheduler.process() - - r = self.app.get(progress_url) - assert '<div id="if_items_left" style="display:none">' in r.data - assert '<div id="ifnot_items_left">' in r.data - - dom = lxml.html.fromstring(r.data) - result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] - - if not lines: - lines = size - - r = self.app.get(result_url) - assert 'text/plain' in r.headers['Content-Type'] - assert header in r.data - assert len(r.data.strip().split('\n')) - 1 == lines - - return r.data - - @fix(database, cache('AB026906.1', 'NM_003002.2', 'AL449423.14')) - def test_batch_namechecker(self): - """ - Submit the batch name checker form. - """ - variants=['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.2:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('name-checker', - file='\n'.join(variants), - size=len(variants), - header='Input\tErrors and warnings') - - @fix(database) - def test_batch_namechecker_extra_tab(self): - """ - Submit the batch syntax checker form with lines ending with tab - characters. - """ - variants=['AB026906.1(SDHD):g.7872G>T\t', - 'AB026906.1(SDHD):g.7872G>T\t', - 'AB026906.1(SDHD):g.7872G>T\t'] - self._batch('syntax-checker', - file='\n'.join(variants), - size=len(variants) * 2, - lines=len(variants), - header='Input\tStatus') - - @fix(database) - def test_batch_syntaxchecker(self): - """ - Submit the batch syntax checker form. - """ - variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('syntax-checker', - file='\n'.join(variants), - size=len(variants), - header='Input\tStatus') - - @fix(database, hg19, hg19_transcript_mappings) - def test_batch_positionconverter(self): - """ - Submit the batch position converter form. - """ - variants = ['NM_003002.2:c.204C>T', - 'NC_000011.9:g.111959625C>T'] - self._batch('position-converter', - assembly_name_or_alias='hg19', - file='\n'.join(variants), - size=len(variants), - header='Input Variant') - - @fix(database) - def test_batch_syntaxchecker_newlines_unix(self): - """ - Submit batch syntax checker job with Unix line endings. - """ - variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('syntax-checker', - file='\n'.join(variants), - size=len(variants), - header='Input\tStatus') - - @fix(database) - def test_batch_syntaxchecker_newlines_mac(self): - """ - Submit batch syntax checker job with Mac line endings. - """ - variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('syntax-checker', - file='\r'.join(variants), - size=len(variants), - header='Input\tStatus') - - @fix(database) - def test_batch_syntaxchecker_newlines_windows(self): - """ - Submit batch syntax checker job with Windows line endings. - """ - variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('syntax-checker', - file='\r\n'.join(variants), - size=len(variants), - header='Input\tStatus') - - @fix(database) - def test_batch_syntaxchecker_newlines_big_unix(self): - """ - Submit big batch syntax checker job with Unix line endings. - """ - samples = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - variants = [] - # Create 240 variants out of 3 samples - for i in range(80): - variants.extend(samples) - self._batch('syntax-checker', - file='\n'.join(variants), - size=len(variants), - header='Input\tStatus') - - @fix(database) - def test_batch_syntaxchecker_newlines_big_mac(self): - """ - Submit big batch syntax checker job with Mac line endings. - """ - samples = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - variants = [] - # Create 240 variants out of 3 samples - for i in range(80): - variants.extend(samples) - self._batch('syntax-checker', - file='\r'.join(variants), - size=len(variants), - header='Input\tStatus') - - @fix(database) - def test_batch_syntaxchecker_newlines_big_windows(self): - """ - Submit big batch syntax checker job with Windows line endings. - """ - samples = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', - 'AL449423.14(CDKN2A_v002):c.5_400del'] - variants = [] - # Create 240 variants out of 3 samples - for i in range(80): - variants.extend(samples) - self._batch('syntax-checker', - file='\r\n'.join(variants), - size=len(variants), - header='Input\tStatus') - - @fix(database) - def test_batch_syntaxchecker_oldstyle(self): - """ - Submit the batch syntax checker form with old style input file. - """ - variants = ['AccNo\tGenesymbol\tMutation', - 'AB026906.1\tSDHD\tg.7872G>T', - 'NM_003002.1\t\tc.3_4insG', - 'AL449423.14\tCDKN2A_v002\tc.5_400del'] - self._batch('syntax-checker', - file='\n'.join(variants), - size=len(variants)-1, - header='Input\tStatus') - - @fix(database, cache('AB026906.1')) - def test_batch_namechecker_restriction_sites(self): - """ - Submit the batch name checker form and see if restriction site effects - are added. - """ - variants=['AB026906.1:c.274G>T', - 'AB026906.1:c.[274G>T;143A>G;15G>T]'] - results = self._batch('name-checker', - file='\n'.join(variants), - size=len(variants), - header='Input\tErrors and warnings').strip().split('\n') - assert 'Restriction Sites Created\tRestriction Sites Deleted' in results[0] - assert 'CviQI,RsaI\tBccI' in results[1] - assert 'CviQI,RsaI;HhaI,HinP1I;SfcI\tBccI;;BpmI,BsaXI (2),LpnPI,MnlI' in results[2] - - @fix(database) - def test_batch_multicolumn(self): - """ - Submit the batch syntax checker with a multiple-colums input file. - - This by the way also tests for the correct order of batch results. - """ - variants = [('AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG'), - ('NM_003002.1:c.3_4insG', 'AB026906.1(SDHD):g.7872G>T'), - ('AL449423.14(CDKN2A_v002):c.5_400del', 'AL449423.14(CDKN2A_v002):c.5_400del')] - result = self._batch('syntax-checker', - file='\n'.join(['\t'.join(r) for r in variants]), - size=len(variants) * 2, - header='Input\tStatus', - lines=len(variants)) - for line in result.splitlines()[1:]: - assert len(line.split('\t')) == len(variants[0]) * 2 - - def test_download_py(self): - """ - Download a Python example client for the web service. - """ - r = self.app.get('/downloads/client-suds.py') - assert 'text/plain' in r.headers['Content-Type'] - assert '#!/usr/bin/env python' in r.data - - def test_download_rb(self): - """ - Download a Ruby example client for the web service. - """ - r = self.app.get('/downloads/client-savon.rb') - assert 'text/plain' in r.headers['Content-Type'] - assert '#!/usr/bin/env ruby' in r.data - - def test_download_cs(self): - """ - Download a C# example client for the web service. - """ - r = self.app.get('/downloads/client-mono.cs') - assert 'text/plain' in r.headers['Content-Type'] - assert 'public static void Main(String [] args) {' in r.data - - def test_download_php(self): - """ - Download a PHP example client for the web service. - """ - r = self.app.get('/downloads/client-php.php') - assert 'text/plain' in r.headers['Content-Type'] - assert '<?php' in r.data - - def test_downloads_batchtest(self): - """ - Download the batch test example file. - """ - r = self.app.get('/downloads/batchtestnew.txt') - assert 'text/plain' in r.headers['Content-Type'] - assert 'NM_003002.1:c.3_4insG' in r.data - - def test_annotated_soap_api(self): - """ - Test the SOAP documentation generated from the WSDL. - """ - r = self.app.get('/soap-api') - assert 'text/html' in r.headers['Content-Type'] - assert 'Web Service: Mutalyzer' in r.data - - @fix(database, cache('NG_012337.1')) - def test_getgs(self): - """ - Test the /getGS interface used by LOVD2. - """ - r = self.app.get('/getGS', - query_string={'variantRecord': 'NM_003002.2', - 'forward': '1', - 'mutationName': 'NG_012337.1:g.7055C>T'}, - follow_redirects=True) - assert '0 Errors' in r.data - assert '0 Warnings' in r.data - assert 'Raw variant 1: substitution at 7055' in r.data - assert 'go to bottom' not in r.data - assert '<input' not in r.data - - @fix(database, cache('NG_012337.1')) - def test_getgs_coding_multiple_transcripts(self): - """ - Test the /getGS interface on a coding description and genomic - reference with multiple transcripts. - """ - r = self.app.get('/getGS', - query_string={'variantRecord': 'NM_003002.2', - 'forward': '1', - 'mutationName': 'NG_012337.1:c.45A>T'}, - follow_redirects=False) - assert '/name-checker?' in r.location - assert 'description=NG_012337.1' in r.location - - @fix(database, cache('NG_008939.1')) - def test_getgs_variant_error(self): - """ - Test the /getGS interface on a variant description with an error. - """ - # The error is that position c.45 is a C, not an A. - r = self.app.get('/getGS', - query_string={'variantRecord': 'NM_000532.4', - 'forward': '1', - 'mutationName': 'NG_008939.1:c.45A>T'}, - follow_redirects=False) - assert '/name-checker?' in r.location - assert 'description=NG_008939.1' in r.location - - @fix(database, hg19, hg19_transcript_mappings) - def test_variantinfo_g2c(self): - """ - Test the /Variant_info interface used by LOVD2 (g to c). - """ - r = self.app.get('/Variant_info', - query_string={'LOVD_ver': '2.0-29', - 'build': 'hg19', - 'acc': 'NM_203473.1', - 'var': 'g.48374289_48374389del'}) - assert 'text/plain' in r.headers['Content-Type'] - expected = '\n'.join(['1020', '0', '1072', '48', '48374289', '48374389', 'del']) - assert r.data == expected - - @fix(database, hg19, hg19_transcript_mappings) - def test_variantinfo_c2g(self): - """ - Test the /Variant_info interface used by LOVD2 (c to g). - """ - r = self.app.get('/Variant_info', - query_string={'LOVD_ver': '2.0-29', - 'build': 'hg19', - 'acc': 'NM_203473.1', - 'var': 'c.1020_1072+48del'}) - assert 'text/plain' in r.headers['Content-Type'] - expected = '\n'.join(['1020', '0', '1072', '48', '48374289', '48374389', 'del']) - assert r.data == expected - - @fix(database, hg19, hg19_transcript_mappings) - def test_variantinfo_c2g_downstream(self): - """ - Test the /Variant_info interface used by LOVD2 (c variant downstream - notation to g). - """ - r = self.app.get('/Variant_info', - query_string={'LOVD_ver': '2.0-29', - 'build': 'hg19', - 'acc': 'NM_203473.1', - 'var': 'c.1709+d187del'}) - assert 'text/plain' in r.headers['Content-Type'] - expected = '\n'.join(['1709', '187', '1709', '187', '48379389', '48379389', 'del']) - assert r.data == expected - - @fix(database, hg19, hg19_transcript_mappings) - def test_variantinfo_no_variant(self): - """ - Test the /Variant_info interface used by LOVD2 (without variant). - """ - r = self.app.get('/Variant_info', - query_string={'LOVD_ver': '2.0-29', - 'build': 'hg19', - 'acc': 'NM_203473.1'}) - assert 'text/plain' in r.headers['Content-Type'] - assert 'text/plain' in r.content_type - expected = '\n'.join(['-158', '1709', '1371']) - assert r.data == expected - - @fix(database, hg19, hg19_transcript_mappings) - def test_variantinfo_ivs(self): - """ - Test the /Variant_info interface used by LOVD2 (with IVS positioning). - """ - r = self.app.get('/Variant_info', - query_string={'LOVD_ver': '2.0-33', - 'build': 'hg19', - 'acc': 'NM_000249.3', - 'var': 'c.IVS10+3A>G'}) - assert 'text/plain' in r.headers['Content-Type'] - expected = '\n'.join(['884', '3', '884', '3', '37059093', '37059093', 'subst']) - assert r.data == expected - - @fix(database) - def test_upload_local_file(self): - """ - Test the genbank uploader. - """ - path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - 'data', - 'AB026906.1.gb.bz2') - r = self.app.post('/reference-loader', - data={'method': 'upload_method', - 'file': (bz2.BZ2File(path), 'AB026906.1.gb')}) - assert 'Your reference sequence was loaded successfully.' in r.data - - dom = lxml.html.fromstring(r.data) - reference_url = dom.cssselect('#reference_download')[0].attrib['href'] - - r = self.app.get(reference_url) - assert r.data == bz2.BZ2File(path).read() - - @fix(database) - def test_upload_local_file_invalid(self): - """ - Test the genbank uploader with a non-genbank file. - """ - r = self.app.post('/reference-loader', - data={'method': 'upload_method', - 'file': (BytesIO('this is not a genbank file'.encode('utf-8')), 'AB026906.1.gb')}) - assert 'Your reference sequence was loaded successfully.' not in r.data - assert 'The file could not be parsed.' in r.data - - @fix(database, cache('NM_002001.2')) - def test_reference(self): - """ - Test if reference files are cached. - """ - r = self.app.get('/name-checker', - query_string={'description': 'NM_002001.2:g.1del'}) - assert '0 Errors' in r.data - - r = self.app.get('/reference/NM_002001.2.gb') + + +def test_announcement(website): + """ + We should always see the current announcement. + """ + announce.set_announcement('Test announcement') + r = website.get('/syntax-checker') + assert r.status == '200 OK' + assert 'Test announcement' in r.data + + announce.set_announcement('New announcement') + r = website.get('/syntax-checker') + assert r.status == '200 OK' + assert 'New announcement' in r.data + + announce.unset_announcement() + r = website.get('/syntax-checker') + assert r.status == '200 OK' + assert 'nnouncement' not in r.data + + +def test_description_extractor_raw(website): + """ + Submit two sequences to the variant description extractor. + """ + r = website.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'raw_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_sequence': 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA'}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + +def test_description_extractor_raw_fastq(website): + """ + Submit two sequences to the variant description extractor. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'extractor_input.fq') + r = website.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'raw_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_sequence': open(path).read()}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize( + 'references', [['NM_004006.1', 'NM_004006.2']], indirect=True) +def test_description_extractor_refseq(website): + """ + Submit two accession numbers to the variant description extractor. + """ + r = website.post('/description-extractor', data={ + 'reference_method': 'refseq_method', + 'sample_method': 'refseq_method', + 'reference_accession_number': 'NM_004006.1', + 'sample_accession_number': 'NM_004006.2'}) + assert '[12749G>A;13729G>A]' in r.data + + +def test_description_extractor_file_fasta(website): + """ + Submit a sequence and a FASTA file to the variant description + extractor. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'extractor_input.fa') + r = website.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'file_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_file': (open(path), 'extractor_input.fa')}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + +def test_description_extractor_file_fastq(website): + """ + Submit a sequence and a FASTQ file to the variant description + extractor. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'extractor_input.fq') + r = website.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'file_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_file': (open(path), 'extractor_input.fq')}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + +def test_description_extractor_file_text(website): + """ + Submit a sequence and a text file to the variant description + extractor. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'extractor_input.txt') + r = website.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'file_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_file': (open(path), 'extractor_input.txt')}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + +def test_description_extractor_ref_too_long(settings, website): + """ + Submit a reference sequence exceeding the maximum length to the variant + description extractor. + """ + r = website.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'raw_method', + 'reference_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1), + 'sample_sequence': 'A'}) + assert '2_{}del'.format(settings.EXTRACTOR_MAX_INPUT_LENGTH + 1) not in r.data + assert 'Input sequences are restricted to ' in r.data + assert '1 Error, 0 Warnings.' in r.data + + +def test_description_extractor_sample_too_long(settings, website): + """ + Submit a sample sequence exceeding the maximum length to the variant + description extractor. + """ + r = website.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'raw_method', + 'reference_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH), + 'sample_sequence': 'A' * (settings.EXTRACTOR_MAX_INPUT_LENGTH + 1)}) + assert '{}dup'.format(settings.EXTRACTOR_MAX_INPUT_LENGTH) not in r.data + assert 'Input sequences are restricted to ' in r.data + assert '1 Error, 0 Warnings.' in r.data + + +def test_description_extractor_lowercase(website): + """ + Submit a sample sequence with a base in lowercase to the variant + description extractor. + """ + r = website.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'raw_method', + 'reference_sequence': 'TTT', + 'sample_sequence': 'TaT'}) + assert '<pre class="description">2T>A</pre>' in r.data + + +def test_checksyntax_valid(website): + """ + Submit the check syntax form with a valid variant. + """ + r = website.get('/syntax-checker', + query_string={'description': 'AB026906.1:c.274G>T'}) + assert 'The syntax of this variant description is OK!' in r.data + + +def test_checksyntax_invalid(website): + """ + Submit the check syntax form with an invalid variant. + """ + r = website.get('/syntax-checker', + query_string={'description': 'AB026906.1:c.27'}) + assert 'Fatal' in r.data + assert 'The "^" indicates the position where the error occurred' in r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_check_valid(website): + """ + Submit the name checker form with a valid variant. + Should include form and main HTML layout. + """ + r = website.get('/name-checker', + query_string={'description': 'NM_002001.2:g.1del'}) + assert '0 Errors' in r.data + assert '0 Warnings' in r.data + assert 'Raw variant 1: deletion of 1' in r.data + assert 'value="NM_002001.2:g.1del"' in r.data + + +def test_check_invalid(website): + """ + Submit the name checker form with an invalid variant. + """ + r = website.get('/name-checker', + query_string={'description': 'NM_002001.2'}) + assert '1 Error' in r.data + assert '0 Warnings' in r.data + assert 'The "^" indicates the position where the error occurred' in r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NP_064445.1']], indirect=True) +def test_check_protein_reference(website): + """ + Submit the name checker form with a protein reference sequence (not + supported). + """ + r = website.get('/name-checker', + query_string={'description': 'NP_064445.1:c.274G>T'}) + assert '1 Error' in r.data + assert '0 Warnings' in r.data + assert 'Protein reference sequences are not supported' in r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_check_noninteractive(website): + """ + Submit the name checker form non-interactively. + Should not include form and main layout HTML. + """ + r = website.get('/name-checker', + query_string={'description': 'NM_002001.2:g.1del', + 'standalone': '1'}) + assert '<a href="#bottom" class="hornav">go to bottom</a>' not in r.data + assert '<input type="text" name="description" value="NM_002001.2:g.1del" style="width:100%">' not in r.data + assert '0 Errors' in r.data + assert '0 Warnings' in r.data + assert 'Raw variant 1: deletion of 1' in r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NG_012772.1']], indirect=True) +def test_check_interactive_links(website): + """ + Submitting interactively should have links to transcripts also + interactive. + """ + r = website.get('/name-checker', + query_string={'description': 'NG_012772.1:g.128del'}) + assert '0 Errors' in r.data + assert 'href="/name-checker?description=NG_012772.1%3Ag.128del"' in r.data + assert 'href="/name-checker?description=NG_012772.1%28BRCA2_v001%29%3Ac.-5100del"' in r.data + + +def test_snp_converter_valid(website): + """ + Submit the SNP converter form with a valid SNP. + """ + # Patch Retriever.snpConvert to return rs9919552. + def mock_efetch(*args, **kwargs): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data', - 'NM_002001.2.gb.bz2') - assert r.data == bz2.BZ2File(path).read() - - @fix(database, cache('NM_002001.2')) - def test_reference_head(self): - """ - Test if reference files are cached, by issuing a HEAD request. - """ - r = self.app.get('/name-checker', - query_string={'description': 'NM_002001.2:g.1del'}) - assert '0 Errors' in r.data - - r = self.app.head('/reference/NM_002001.2.gb') - assert r.status_code == 200 + 'rs9919552.xml.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + r = website.get('/snp-converter', + query_string={'rs_id': 'rs9919552'}) + assert '0 Errors' in r.data + assert '0 Warnings' in r.data + assert 'NC_000011.9:g.111959625C>T' in r.data + assert 'NG_012337.2:g.7055C>T' in r.data + assert 'NM_003002.3:c.204C>T' in r.data + assert 'NP_002993.1:p.Ser68=' in r.data + + +def test_snp_converter_invalid(website): + """ + Submit the SNP converter form with an invalid SNP. + """ + r = website.get('/snp-converter', + query_string={'rs_id': 'r9919552'}) + + assert '1 Error' in r.data + assert '0 Warnings' in r.data + assert 'Fatal' in r.data + assert 'This is not a valid dbSNP id' in r.data + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_position_converter_c2g(website): + """ + Submit the position converter form with a valid variant. + """ + r = website.get('/position-converter', + query_string={'assembly_name_or_alias': 'hg19', + 'description': 'NM_003002.2:c.204C>T'}) + assert 'NC_000011.9:g.111959625C>T' in r.data + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_position_converter_g2c(website): + """ + Submit the position converter form with a valid variant. + """ + r = website.get('/position-converter', + query_string={'assembly_name_or_alias': 'hg19', + 'description': 'NC_000011.9:g.111959625C>T'}) + assert 'NM_003002.2:c.204C>T' in r.data + + +def _batch(website, job_type='name-checker', assembly_name_or_alias=None, + file='', size=0, header='', lines=None): + """ + Submit a batch form. + + @kwarg batch_type: Type of batch job to test. One of name-checker, + syntax-checker, position-converter. + @kwarg argument: Optional extra argument for the batch job. + @kwarg file: String with variants to use as input for the batch job. + @kwarg size: Number of variants in input. + @kwarg header: Message that must be found in the batch job result. + @kwarg lines: Number of result rows expected. + + @return: The batch result document. + @rtype: string + """ + data = {'job_type': job_type, + 'email': 'test@test.test', + 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} + if assembly_name_or_alias is not None: + data['assembly_name_or_alias'] = assembly_name_or_alias + + r = website.post('/batch-jobs', + data=data) + progress_url = '/' + r.location.split('/')[-1] + + r = website.get(progress_url) + assert '<div id="if_items_left">' in r.data + assert '<div id="ifnot_items_left" style="display:none">' in r.data + assert ('<span id="items_left">%d</span>' % size) in r.data + + scheduler = Scheduler.Scheduler() + scheduler.process() + + r = website.get(progress_url) + assert '<div id="if_items_left" style="display:none">' in r.data + assert '<div id="ifnot_items_left">' in r.data + + dom = lxml.html.fromstring(r.data) + result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] + + if not lines: + lines = size + + r = website.get(result_url) + assert 'text/plain' in r.headers['Content-Type'] + assert header in r.data + assert len(r.data.strip().split('\n')) - 1 == lines + + return r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize( + 'references', [['AB026906.1', 'NM_003002.2', 'AL449423.14']], + indirect=True) +def test_batch_namechecker(website): + """ + Submit the batch name checker form. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.2:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + _batch(website, + 'name-checker', + file='\n'.join(variants), + size=len(variants), + header='Input\tErrors and warnings') + + +@pytest.mark.usefixtures('db') +def test_batch_namechecker_extra_tab(website): + """ + Submit the batch syntax checker form with lines ending with tab + characters. + """ + variants = ['AB026906.1(SDHD):g.7872G>T\t', + 'AB026906.1(SDHD):g.7872G>T\t', + 'AB026906.1(SDHD):g.7872G>T\t'] + _batch(website, + 'syntax-checker', + file='\n'.join(variants), + size=len(variants) * 2, + lines=len(variants), + header='Input\tStatus') + + +@pytest.mark.usefixtures('db') +def test_batch_syntaxchecker(website): + """ + Submit the batch syntax checker form. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.1:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + _batch(website, + 'syntax-checker', + file='\n'.join(variants), + size=len(variants), + header='Input\tStatus') + + +@pytest.mark.usefixtures('hg19') +def test_batch_positionconverter(website): + """ + Submit the batch position converter form. + """ + variants = ['NM_003002.2:c.204C>T', + 'NC_000011.9:g.111959625C>T'] + _batch(website, + 'position-converter', + assembly_name_or_alias='hg19', + file='\n'.join(variants), + size=len(variants), + header='Input Variant') + + +@pytest.mark.usefixtures('db') +def test_batch_syntaxchecker_newlines_unix(website): + """ + Submit batch syntax checker job with Unix line endings. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.1:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + _batch(website, + 'syntax-checker', + file='\n'.join(variants), + size=len(variants), + header='Input\tStatus') + + +@pytest.mark.usefixtures('db') +def test_batch_syntaxchecker_newlines_mac(website): + """ + Submit batch syntax checker job with Mac line endings. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.1:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + _batch(website, + 'syntax-checker', + file='\r'.join(variants), + size=len(variants), + header='Input\tStatus') + + +@pytest.mark.usefixtures('db') +def test_batch_syntaxchecker_newlines_windows(website): + """ + Submit batch syntax checker job with Windows line endings. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.1:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + _batch(website, + 'syntax-checker', + file='\r\n'.join(variants), + size=len(variants), + header='Input\tStatus') + + +@pytest.mark.usefixtures('db') +def test_batch_syntaxchecker_newlines_big_unix(website): + """ + Submit big batch syntax checker job with Unix line endings. + """ + samples = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.1:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + variants = [] + # Create 240 variants out of 3 samples + for i in range(80): + variants.extend(samples) + _batch(website, + 'syntax-checker', + file='\n'.join(variants), + size=len(variants), + header='Input\tStatus') + + +@pytest.mark.usefixtures('db') +def test_batch_syntaxchecker_newlines_big_mac(website): + """ + Submit big batch syntax checker job with Mac line endings. + """ + samples = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.1:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + variants = [] + # Create 240 variants out of 3 samples + for i in range(80): + variants.extend(samples) + _batch(website, + 'syntax-checker', + file='\r'.join(variants), + size=len(variants), + header='Input\tStatus') + + +@pytest.mark.usefixtures('db') +def test_batch_syntaxchecker_newlines_big_windows(website): + """ + Submit big batch syntax checker job with Windows line endings. + """ + samples = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.1:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + variants = [] + # Create 240 variants out of 3 samples + for i in range(80): + variants.extend(samples) + _batch(website, + 'syntax-checker', + file='\r\n'.join(variants), + size=len(variants), + header='Input\tStatus') + + +@pytest.mark.usefixtures('db') +def test_batch_syntaxchecker_oldstyle(website): + """ + Submit the batch syntax checker form with old style input file. + """ + variants = ['AccNo\tGenesymbol\tMutation', + 'AB026906.1\tSDHD\tg.7872G>T', + 'NM_003002.1\t\tc.3_4insG', + 'AL449423.14\tCDKN2A_v002\tc.5_400del'] + _batch(website, + 'syntax-checker', + file='\n'.join(variants), + size=len(variants)-1, + header='Input\tStatus') + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['AB026906.1']], indirect=True) +def test_batch_namechecker_restriction_sites(website): + """ + Submit the batch name checker form and see if restriction site effects + are added. + """ + variants = ['AB026906.1:c.274G>T', + 'AB026906.1:c.[274G>T;143A>G;15G>T]'] + results = _batch(website, + 'name-checker', + file='\n'.join(variants), + size=len(variants), + header='Input\tErrors and warnings').strip().split('\n') + assert 'Restriction Sites Created\tRestriction Sites Deleted' in results[0] + assert 'CviQI,RsaI\tBccI' in results[1] + assert 'CviQI,RsaI;HhaI,HinP1I;SfcI\tBccI;;BpmI,BsaXI (2),LpnPI,MnlI' in results[2] + + +@pytest.mark.usefixtures('db') +def test_batch_multicolumn(website): + """ + Submit the batch syntax checker with a multiple-colums input file. + + This by the way also tests for the correct order of batch results. + """ + variants = [('AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG'), + ('NM_003002.1:c.3_4insG', 'AB026906.1(SDHD):g.7872G>T'), + ('AL449423.14(CDKN2A_v002):c.5_400del', 'AL449423.14(CDKN2A_v002):c.5_400del')] + result = _batch(website, + 'syntax-checker', + file='\n'.join(['\t'.join(r) for r in variants]), + size=len(variants) * 2, + header='Input\tStatus', + lines=len(variants)) + for line in result.splitlines()[1:]: + assert len(line.split('\t')) == len(variants[0]) * 2 + + +def test_download_py(website): + """ + Download a Python example client for the web service. + """ + r = website.get('/downloads/client-suds.py') + assert 'text/plain' in r.headers['Content-Type'] + assert '#!/usr/bin/env python' in r.data + + +def test_download_rb(website): + """ + Download a Ruby example client for the web service. + """ + r = website.get('/downloads/client-savon.rb') + assert 'text/plain' in r.headers['Content-Type'] + assert '#!/usr/bin/env ruby' in r.data + + +def test_download_cs(website): + """ + Download a C# example client for the web service. + """ + r = website.get('/downloads/client-mono.cs') + assert 'text/plain' in r.headers['Content-Type'] + assert 'public static void Main(String [] args) {' in r.data + + +def test_download_php(website): + """ + Download a PHP example client for the web service. + """ + r = website.get('/downloads/client-php.php') + assert 'text/plain' in r.headers['Content-Type'] + assert '<?php' in r.data + + +def test_downloads_batchtest(website): + """ + Download the batch test example file. + """ + r = website.get('/downloads/batchtestnew.txt') + assert 'text/plain' in r.headers['Content-Type'] + assert 'NM_003002.1:c.3_4insG' in r.data + + +def test_annotated_soap_api(website): + """ + Test the SOAP documentation generated from the WSDL. + """ + r = website.get('/soap-api') + assert 'text/html' in r.headers['Content-Type'] + assert 'Web Service: Mutalyzer' in r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NG_012337.1']], indirect=True) +def test_getgs(website): + """ + Test the /getGS interface used by LOVD2. + """ + r = website.get('/getGS', + query_string={'variantRecord': 'NM_003002.2', + 'forward': '1', + 'mutationName': 'NG_012337.1:g.7055C>T'}, + follow_redirects=True) + assert '0 Errors' in r.data + assert '0 Warnings' in r.data + assert 'Raw variant 1: substitution at 7055' in r.data + assert 'go to bottom' not in r.data + assert '<input' not in r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NG_012337.1']], indirect=True) +def test_getgs_coding_multiple_transcripts(website): + """ + Test the /getGS interface on a coding description and genomic + reference with multiple transcripts. + """ + r = website.get('/getGS', + query_string={'variantRecord': 'NM_003002.2', + 'forward': '1', + 'mutationName': 'NG_012337.1:c.45A>T'}, + follow_redirects=False) + assert '/name-checker?' in r.location + assert 'description=NG_012337.1' in r.location + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NG_008939.1']], indirect=True) +def test_getgs_variant_error(website): + """ + Test the /getGS interface on a variant description with an error. + """ + # The error is that position c.45 is a C, not an A. + r = website.get('/getGS', + query_string={'variantRecord': 'NM_000532.4', + 'forward': '1', + 'mutationName': 'NG_008939.1:c.45A>T'}, + follow_redirects=False) + assert '/name-checker?' in r.location + assert 'description=NG_008939.1' in r.location + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_variantinfo_g2c(website): + """ + Test the /Variant_info interface used by LOVD2 (g to c). + """ + r = website.get('/Variant_info', + query_string={'LOVD_ver': '2.0-29', + 'build': 'hg19', + 'acc': 'NM_203473.1', + 'var': 'g.48374289_48374389del'}) + assert 'text/plain' in r.headers['Content-Type'] + expected = '\n'.join(['1020', '0', '1072', '48', '48374289', '48374389', 'del']) + assert r.data == expected + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_variantinfo_c2g(website): + """ + Test the /Variant_info interface used by LOVD2 (c to g). + """ + r = website.get('/Variant_info', + query_string={'LOVD_ver': '2.0-29', + 'build': 'hg19', + 'acc': 'NM_203473.1', + 'var': 'c.1020_1072+48del'}) + assert 'text/plain' in r.headers['Content-Type'] + expected = '\n'.join(['1020', '0', '1072', '48', '48374289', '48374389', 'del']) + assert r.data == expected + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_variantinfo_c2g_downstream(website): + """ + Test the /Variant_info interface used by LOVD2 (c variant downstream + notation to g). + """ + r = website.get('/Variant_info', + query_string={'LOVD_ver': '2.0-29', + 'build': 'hg19', + 'acc': 'NM_203473.1', + 'var': 'c.1709+d187del'}) + assert 'text/plain' in r.headers['Content-Type'] + expected = '\n'.join(['1709', '187', '1709', '187', '48379389', '48379389', 'del']) + assert r.data == expected + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_variantinfo_no_variant(website): + """ + Test the /Variant_info interface used by LOVD2 (without variant). + """ + r = website.get('/Variant_info', + query_string={'LOVD_ver': '2.0-29', + 'build': 'hg19', + 'acc': 'NM_203473.1'}) + assert 'text/plain' in r.headers['Content-Type'] + assert 'text/plain' in r.content_type + expected = '\n'.join(['-158', '1709', '1371']) + assert r.data == expected + + +@pytest.mark.usefixtures('hg19_transcript_mappings') +def test_variantinfo_ivs(website): + """ + Test the /Variant_info interface used by LOVD2 (with IVS positioning). + """ + r = website.get('/Variant_info', + query_string={'LOVD_ver': '2.0-33', + 'build': 'hg19', + 'acc': 'NM_000249.3', + 'var': 'c.IVS10+3A>G'}) + assert 'text/plain' in r.headers['Content-Type'] + expected = '\n'.join(['884', '3', '884', '3', '37059093', '37059093', 'subst']) + assert r.data == expected + + +@pytest.mark.usefixtures('db') +def test_upload_local_file(website): + """ + Test the genbank uploader. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'AB026906.1.gb.bz2') + r = website.post('/reference-loader', + data={'method': 'upload_method', + 'file': (bz2.BZ2File(path), 'AB026906.1.gb')}) + assert 'Your reference sequence was loaded successfully.' in r.data + + dom = lxml.html.fromstring(r.data) + reference_url = dom.cssselect('#reference_download')[0].attrib['href'] + + r = website.get(reference_url) + assert r.data == bz2.BZ2File(path).read() + + +@pytest.mark.usefixtures('db') +def test_upload_local_file_invalid(website): + """ + Test the genbank uploader with a non-genbank file. + """ + r = website.post('/reference-loader', + data={'method': 'upload_method', + 'file': (BytesIO('this is not a genbank file'.encode('utf-8')), 'AB026906.1.gb')}) + assert 'Your reference sequence was loaded successfully.' not in r.data + assert 'The file could not be parsed.' in r.data + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_reference(website): + """ + Test if reference files are cached. + """ + r = website.get('/name-checker', + query_string={'description': 'NM_002001.2:g.1del'}) + assert '0 Errors' in r.data + + r = website.get('/reference/NM_002001.2.gb') + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'NM_002001.2.gb.bz2') + assert r.data == bz2.BZ2File(path).read() + + +@pytest.mark.usefixtures('references') +@pytest.mark.parametrize('references', [['NM_002001.2']], indirect=True) +def test_reference_head(website): + """ + Test if reference files are cached, by issuing a HEAD request. + """ + r = website.get('/name-checker', + query_string={'description': 'NM_002001.2:g.1del'}) + assert '0 Errors' in r.data + + r = website.head('/reference/NM_002001.2.gb') + assert r.status_code == 200 + + +@pytest.mark.usefixtures('db') +def test_reference_head_none(website): + """ + Test if non-existing reference files gives a 404 on a HEAD request. + """ + r = website.head('/reference/NM_002001.2.gb') + assert r.status_code == 404 + + +@pytest.mark.usefixtures('references', 'hg19_transcript_mappings') +@pytest.mark.parametrize('references', [['NM_003002.2']], indirect=True) +def test_bed(website): + """ + BED track for variant. + """ + r = website.get('/bed', + query_string={'description': 'NM_003002.2:c.274G>T'}) + assert 'text/plain' in r.headers['Content-Type'] + assert '\t'.join(['chr11', '111959694', '111959695', '274G>T', '0', '+']) in r.data + + +@pytest.mark.usefixtures('references', 'hg19_transcript_mappings') +@pytest.mark.parametrize('references', [['NM_000132.3']], indirect=True) +def test_bed_reverse(website): + """ + BED track for variant on reverse strand. + """ + r = website.get('/bed', + query_string={'description': 'NM_000132.3:c.[4374A>T;4380_4381del]'}) + assert 'text/plain' in r.headers['Content-Type'] + assert '\t'.join(['chrX', '154157690', '154157691', '4374A>T', '0', '-']) in r.data + assert '\t'.join(['chrX', '154157683', '154157685', '4380_4381del', '0', '-']) in r.data + + +def test_checksyntax_unicode(website): + """ + Run check syntax form with an invalid variant description containing + non-ASCII unicode characters. + """ + r = website.get('/syntax-checker', + query_string={'description': 'La Pe\xf1a'}) + body = r.get_data(as_text=True) + assert 'Fatal' in body + assert 'The "^" indicates the position where the error occurred' in body + assert 'Expected W:(0123...) (at char 2), (line:1, col:3)' in body + + +@pytest.mark.usefixtures('db') +def test_batch_unicode(website): + """ + Submit a batch form with non-ASCII unicode characters in the input + file. + """ + file = '\n'.join(['\u2026AB026906.1:c.274G>T', + '\u2026AL449423.14(CDKN2A_v002):c.5_400del']) + expected = [['\u2026AB026906.1:c.274G>T', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], + ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', + '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] + + data = {'job_type': 'syntax-checker', + 'email': 'test@test.test', + 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} + + r = website.post('/batch-jobs', + data=data) + progress_url = '/' + r.location.split('/')[-1] + + assert BatchJob.query.first().email == 'test@test.test' + + scheduler = Scheduler.Scheduler() + scheduler.process() + + r = website.get(progress_url) + + dom = lxml.html.fromstring(r.data) + result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] + + r = website.get(result_url) + assert 'text/plain' in r.headers['Content-Type'] + + result = r.get_data(as_text=True).strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] + + +@pytest.mark.usefixtures('db') +def test_batch_unicode_email(website): + """ + Submit a batch form with non-ASCII unicode characters in the email + address. + """ + file = '\n'.join(['AB026906.1:c.274G>T', + 'AL449423.14(CDKN2A_v002):c.5_400del']) + expected = [['AB026906.1:c.274G>T', + 'OK'], + ['AL449423.14(CDKN2A_v002):c.5_400del', + 'OK']] + + data = {'job_type': 'syntax-checker', + 'email': 'pe\xf1a@test.test', + 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} + + r = website.post('/batch-jobs', + data=data) + progress_url = '/' + r.location.split('/')[-1] + + assert BatchJob.query.first().email == 'pe\xf1a@test.test' + + scheduler = Scheduler.Scheduler() + scheduler.process() + + r = website.get(progress_url) + + dom = lxml.html.fromstring(r.data) + result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] + + r = website.get(result_url) + assert 'text/plain' in r.headers['Content-Type'] - @fix(database) - def test_reference_head_none(self): - """ - Test if non-existing reference files gives a 404 on a HEAD request. - """ - r = self.app.head('/reference/NM_002001.2.gb') - assert r.status_code == 404 - - @fix(database, hg19, hg19_transcript_mappings, cache('NM_003002.2')) - def test_bed(self): - """ - BED track for variant. - """ - r = self.app.get('/bed', - query_string={'description': 'NM_003002.2:c.274G>T'}) - assert 'text/plain' in r.headers['Content-Type'] - assert '\t'.join(['chr11', '111959694', '111959695', '274G>T', '0', '+']) in r.data - - @fix(database, hg19, hg19_transcript_mappings, cache('NM_000132.3')) - def test_bed_reverse(self): - """ - BED track for variant on reverse strand. - """ - r = self.app.get('/bed', - query_string={'description': 'NM_000132.3:c.[4374A>T;4380_4381del]'}) - assert 'text/plain' in r.headers['Content-Type'] - assert '\t'.join(['chrX', '154157690', '154157691', '4374A>T', '0', '-']) in r.data - assert '\t'.join(['chrX', '154157683', '154157685', '4380_4381del', '0', '-']) in r.data - - def test_checksyntax_unicode(self): - """ - Run check syntax form with an invalid variant description containing - non-ASCII unicode characters. - """ - r = self.app.get('/syntax-checker', - query_string={'description': 'La Pe\xf1a'}) - body = r.get_data(as_text=True) - assert 'Fatal' in body - assert 'The "^" indicates the position where the error occurred' in body - assert 'Expected W:(0123...) (at char 2), (line:1, col:3)' in body - - @fix(database) - def test_batch_unicode(self): - """ - Submit a batch form with non-ASCII unicode characters in the input - file. - """ - file = '\n'.join(['\u2026AB026906.1:c.274G>T', - '\u2026AL449423.14(CDKN2A_v002):c.5_400del']) - expected = [['\u2026AB026906.1:c.274G>T', - '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)'], - ['\u2026AL449423.14(CDKN2A_v002):c.5_400del', - '(grammar): Expected W:(0123...) (at char 0), (line:1, col:1)']] - - data = {'job_type': 'syntax-checker', - 'email': 'test@test.test', - 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} - - r = self.app.post('/batch-jobs', - data=data) - progress_url = '/' + r.location.split('/')[-1] - - assert models.BatchJob.query.first().email == 'test@test.test' - - scheduler = Scheduler.Scheduler() - scheduler.process() - - r = self.app.get(progress_url) - - dom = lxml.html.fromstring(r.data) - result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] - - r = self.app.get(result_url) - assert 'text/plain' in r.headers['Content-Type'] - - result = r.get_data(as_text=True).strip().split('\n')[1:] - assert expected == [line.split('\t') for line in result] - - @fix(database) - def test_batch_unicode_email(self): - """ - Submit a batch form with non-ASCII unicode characters in the email - address. - """ - file = '\n'.join(['AB026906.1:c.274G>T', - 'AL449423.14(CDKN2A_v002):c.5_400del']) - expected = [['AB026906.1:c.274G>T', - 'OK'], - ['AL449423.14(CDKN2A_v002):c.5_400del', - 'OK']] - - data = {'job_type': 'syntax-checker', - 'email': 'pe\xf1a@test.test', - 'file': (BytesIO(file.encode('utf-8')), 'test.txt')} - - r = self.app.post('/batch-jobs', - data=data) - progress_url = '/' + r.location.split('/')[-1] - - assert models.BatchJob.query.first().email == 'pe\xf1a@test.test' - - scheduler = Scheduler.Scheduler() - scheduler.process() - - r = self.app.get(progress_url) - - dom = lxml.html.fromstring(r.data) - result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] - - r = self.app.get(result_url) - assert 'text/plain' in r.headers['Content-Type'] - - result = r.get_data(as_text=True).strip().split('\n')[1:] - assert expected == [line.split('\t') for line in result] + result = r.get_data(as_text=True).strip().split('\n')[1:] + assert expected == [line.split('\t') for line in result] diff --git a/tests/utils.py b/tests/utils.py deleted file mode 100644 index 0e77b291..00000000 --- a/tests/utils.py +++ /dev/null @@ -1,86 +0,0 @@ -""" -Utilities for unit tests. -""" - - -from __future__ import unicode_literals - -from functools import wraps -import os -import shutil -import tempfile - -from mutalyzer.config import settings -from mutalyzer.redisclient import client as redis -from mutalyzer import db - - -class TestEnvironment(object): - """ - Configure Mutalyzer for unit tests. All storage is transient and isolated. - """ - def __init__(self, fixtures=None): - fixtures = fixtures or [] - - self.cache_dir = tempfile.mkdtemp() - - log_handle, self.log_file = tempfile.mkstemp() - os.close(log_handle) - - database_uri = os.getenv('MUTALYZER_TEST_DATABASE_URI', 'sqlite://') - redis_uri = os.getenv('MUTALYZER_TEST_REDIS_URI', None) - - settings.configure({'DEBUG': False, - 'TESTING': True, - 'CACHE_DIR': self.cache_dir, - 'REDIS_URI': redis_uri, - 'DATABASE_URI': database_uri, - 'LOG_FILE': self.log_file}) - - # Mutalyzer create tables automatically if we're using an SQLite - # in-memory database. - if database_uri != 'sqlite://': - db.Base.metadata.drop_all(db.session.get_bind()) - db.Base.metadata.create_all(db.session.get_bind()) - - if redis_uri is not None: - redis.flushdb() - - for fixture in fixtures: - fixture() - - def destroy(self): - """ - Destroy all storage defined in the current environment. - """ - db.session.remove() - - shutil.rmtree(self.cache_dir) - os.unlink(self.log_file) - - -class MutalyzerTest(object): - """ - Test class providing an isolated test environment for each test. - """ - fixtures = () - - def setup(self): - self.environment = TestEnvironment(fixtures=self.fixtures) - - def teardown(self): - self.environment.destroy() - - -def fix(*fixtures): - """ - Decorator for a unit test setting up the specified fixtures. - """ - def decorator(f): - @wraps(f) - def fixed_f(*args, **kwargs): - for fixture in fixtures: - fixture() - return f(*args, **kwargs) - return fixed_f - return decorator -- GitLab