diff --git a/migrations/env.py b/migrations/env.py index 251d615548bd0bc71933d8f180012782a94b6ccb..1625aef4e0acbc6090fdbcdf6cff325299e9f92c 100644 --- a/migrations/env.py +++ b/migrations/env.py @@ -53,25 +53,23 @@ def run_migrations_online(): and associate a connection with the context. """ - alembic_config = config.get_section(config.config_ini_section) - alembic_config['sqlalchemy.url'] = settings.DATABASE_URI - - engine = engine_from_config( - alembic_config, - prefix='sqlalchemy.', - poolclass=pool.NullPool) - - connection = engine.connect() - context.configure( - connection=connection, - target_metadata=target_metadata - ) - - try: + # Enable reusing an existing connection. + # http://alembic.readthedocs.org/en/latest/cookbook.html#sharing-a-connection-with-a-series-of-migration-commands-and-environments + connectable = config.attributes.get('connection', None) + + if connectable is None: + alembic_config = config.get_section(config.config_ini_section) + alembic_config['sqlalchemy.url'] = settings.DATABASE_URI + connectable = engine_from_config(alembic_config, + prefix='sqlalchemy.', + poolclass=pool.NullPool) + + with connectable.connect() as connection: + context.configure(connection=connection, + target_metadata=target_metadata) with context.begin_transaction(): context.run_migrations() - finally: - connection.close() + if context.is_offline_mode(): run_migrations_offline() diff --git a/migrations/versions/ea660b66f26_initial_schema.py b/migrations/versions/ea660b66f26_initial_schema.py index eec6ce6af5ee8767be03e99bda445305002394b1..bd3c148349274bb9b966410d4d12b66cfa54bd95 100644 --- a/migrations/versions/ea660b66f26_initial_schema.py +++ b/migrations/versions/ea660b66f26_initial_schema.py @@ -1,4 +1,4 @@ -"""initial schema +"""Initial schema Revision ID: ea660b66f26 Revises: None @@ -17,8 +17,132 @@ import sqlalchemy as sa def upgrade(): - pass + ### commands auto generated by Alembic - please adjust! ### + op.create_table('assemblies', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=30), nullable=False), + sa.Column('alias', sa.String(length=10), nullable=True), + sa.Column('taxonomy_id', sa.Integer(), nullable=False), + sa.Column('taxonomy_common_name', sa.String(length=50), nullable=False), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('alias'), + sa.UniqueConstraint('name'), + mysql_charset='utf8', + mysql_engine='InnoDB' + ) + op.create_table('batch_jobs', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('email', sa.String(length=200), nullable=True), + sa.Column('download_url', sa.String(length=200), nullable=True), + sa.Column('job_type', sa.Enum('name-checker', 'syntax-checker', 'position-converter', 'snp-converter', name='job_type'), nullable=False), + sa.Column('argument', sa.String(length=20), nullable=True), + sa.Column('result_id', sa.String(length=50), nullable=False), + sa.Column('added', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + mysql_charset='utf8', + mysql_engine='InnoDB' + ) + op.create_index(op.f('ix_batch_jobs_result_id'), 'batch_jobs', ['result_id'], unique=True) + op.create_table('references', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('accession', sa.String(length=20), nullable=False), + sa.Column('checksum', sa.String(length=32), nullable=False), + sa.Column('geninfo_identifier', sa.String(length=13), nullable=True), + sa.Column('slice_accession', sa.String(length=20), nullable=True), + sa.Column('slice_start', sa.Integer(), nullable=True), + sa.Column('slice_stop', sa.Integer(), nullable=True), + sa.Column('slice_orientation', sa.Enum('forward', 'reverse', name='slice_orentation'), nullable=True), + sa.Column('download_url', sa.String(length=255), nullable=True), + sa.Column('added', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + mysql_charset='utf8', + mysql_engine='InnoDB' + ) + op.create_index(op.f('ix_references_accession'), 'references', ['accession'], unique=True) + op.create_index(op.f('ix_references_checksum'), 'references', ['checksum'], unique=True) + op.create_index(op.f('ix_references_download_url'), 'references', ['download_url'], unique=True) + op.create_index(op.f('ix_references_geninfo_identifier'), 'references', ['geninfo_identifier'], unique=True) + op.create_index('reference_slice', 'references', ['slice_accession', 'slice_start', 'slice_stop', 'slice_orientation'], unique=True) + op.create_table('transcript_protein_links', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('transcript_accession', sa.String(length=20), nullable=False), + sa.Column('protein_accession', sa.String(length=20), nullable=True), + sa.Column('added', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + mysql_charset='utf8', + mysql_engine='InnoDB' + ) + op.create_index(op.f('ix_transcript_protein_links_protein_accession'), 'transcript_protein_links', ['protein_accession'], unique=False) + op.create_index(op.f('ix_transcript_protein_links_transcript_accession'), 'transcript_protein_links', ['transcript_accession'], unique=True) + op.create_table('batch_queue_items', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('batch_job_id', sa.Integer(), nullable=False), + sa.Column('item', sa.String(length=200), nullable=False), + sa.Column('flags', sa.String(length=20), nullable=False), + sa.ForeignKeyConstraint(['batch_job_id'], ['batch_jobs.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id'), + mysql_charset='utf8', + mysql_engine='InnoDB' + ) + op.create_index('batch_queue_item_with_batch_job', 'batch_queue_items', ['batch_job_id', 'id'], unique=False) + op.create_table('chromosomes', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('assembly_id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=30), nullable=False), + sa.Column('accession', sa.String(length=30), nullable=False), + sa.Column('organelle', sa.Enum('nucleus', 'mitochondrion', name='organelle'), nullable=True), + sa.ForeignKeyConstraint(['assembly_id'], ['assemblies.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id'), + mysql_charset='utf8', + mysql_engine='InnoDB' + ) + op.create_index('chromosome_accession', 'chromosomes', ['assembly_id', 'accession'], unique=True) + op.create_index('chromosome_name', 'chromosomes', ['assembly_id', 'name'], unique=True) + op.create_table('transcript_mappings', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('chromosome_id', sa.Integer(), nullable=False), + sa.Column('reference_type', sa.Enum('refseq', 'lrg', name='reference_type'), nullable=False), + sa.Column('accession', sa.String(length=20), nullable=False), + sa.Column('version', sa.Integer(), nullable=True), + sa.Column('gene', sa.String(length=30), nullable=False), + sa.Column('transcript', sa.Integer(), nullable=False), + sa.Column('orientation', sa.Enum('forward', 'reverse', name='orentation'), nullable=False), + sa.Column('start', sa.Integer(), nullable=False), + sa.Column('stop', sa.Integer(), nullable=False), + sa.Column('cds_start', sa.Integer(), nullable=True), + sa.Column('cds_stop', sa.Integer(), nullable=True), + sa.Column('exon_starts', sa.Text(), nullable=False), + sa.Column('exon_stops', sa.Text(), nullable=False), + sa.Column('select_transcript', sa.Boolean(), nullable=False), + sa.Column('source', sa.Enum('ucsc', 'ncbi', 'reference', name='source'), nullable=False), + sa.ForeignKeyConstraint(['chromosome_id'], ['chromosomes.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id'), + mysql_charset='utf8', + mysql_engine='InnoDB' + ) + op.create_index('transcript_mapping_transcript', 'transcript_mappings', ['accession', 'version', 'gene', 'transcript', 'chromosome_id'], unique=True) + ### end Alembic commands ### def downgrade(): - pass + ### commands auto generated by Alembic - please adjust! ### + op.drop_index('transcript_mapping_transcript', table_name='transcript_mappings') + op.drop_table('transcript_mappings') + op.drop_index('chromosome_name', table_name='chromosomes') + op.drop_index('chromosome_accession', table_name='chromosomes') + op.drop_table('chromosomes') + op.drop_index('batch_queue_item_with_batch_job', table_name='batch_queue_items') + op.drop_table('batch_queue_items') + op.drop_index(op.f('ix_transcript_protein_links_transcript_accession'), table_name='transcript_protein_links') + op.drop_index(op.f('ix_transcript_protein_links_protein_accession'), table_name='transcript_protein_links') + op.drop_table('transcript_protein_links') + op.drop_index('reference_slice', table_name='references') + op.drop_index(op.f('ix_references_geninfo_identifier'), table_name='references') + op.drop_index(op.f('ix_references_download_url'), table_name='references') + op.drop_index(op.f('ix_references_checksum'), table_name='references') + op.drop_index(op.f('ix_references_accession'), table_name='references') + op.drop_table('references') + op.drop_index(op.f('ix_batch_jobs_result_id'), table_name='batch_jobs') + op.drop_table('batch_jobs') + op.drop_table('assemblies') + ### end Alembic commands ### diff --git a/tests/test_migrations.py b/tests/test_migrations.py new file mode 100644 index 0000000000000000000000000000000000000000..c367e43c6159aeabc79a282af4e59441b2ea8cb3 --- /dev/null +++ b/tests/test_migrations.py @@ -0,0 +1,184 @@ +""" +Test database migrations. +""" + + +from __future__ import unicode_literals + +import os + +import alembic.autogenerate +import alembic.command +import alembic.config +from alembic.migration import MigrationContext +import sqlalchemy as sa +from sqlalchemy import create_engine, sql + +from mutalyzer import db + + +def test_migrations(): + """ + Run all migrations and assert the result is up to date with the model + definitions. + + We don't use `utils.MutalyzerTest` here, or `mutalyzer.db.session` in any + way for that matter, since it will bootstrap the database schema. + """ + database_uri = os.getenv('MUTALYZER_TEST_DATABASE_URI', 'sqlite://') + + alembic_config = alembic.config.Config('migrations/alembic.ini') + engine = create_engine(database_uri) + + with engine.begin() as connection: + # http://alembic.readthedocs.org/en/latest/cookbook.html#sharing-a-connection-with-a-series-of-migration-commands-and-environments + alembic_config.attributes['connection'] = connection + + if database_uri != 'sqlite://': + db.Base.metadata.drop_all(connection) + + # Create initial schema by running the first migration. + alembic.command.upgrade(alembic_config, 'ea660b66f26') + + # Add some database content to run the migrations on. + add_database_content(connection) + + # Run the remaining migrations. + alembic.command.upgrade(alembic_config, 'head') + + context = MigrationContext.configure(connection) + assert not alembic.autogenerate.compare_metadata( + context, db.Base.metadata) + + engine.dispose() + + +def add_database_content(connection): + """ + Add some content to the database. + """ + # We only define tables and columns we actually need, so this is not a + # complete mapping of the schema. + + assemblies = sql.table( + 'assemblies', + sql.column('id', sa.Integer), + sql.column('name', sa.String(30)), + sql.column('alias', sa.String(10)), + sql.column('taxonomy_id', sa.Integer), + sql.column('taxonomy_common_name', sa.String(50))) + + chromosomes = sql.table( + 'chromosomes', + sql.column('id', sa.Integer), + sql.column('assembly_id', sa.Integer), + sql.column('name', sa.String(30)), + sql.column('accession', sa.String(30)), + sql.column('organelle', sa.Enum('nucleus', 'mitochondrion', + name='organelle'))) + + transcript_mappings = sql.table( + 'transcript_mappings', + sql.column('chromosome_id', sa.Integer), + sql.column('reference_type', sa.Enum('refseq', 'lrg', + name='reference_type')), + sql.column('accession', sa.String(20)), + sql.column('gene', sa.String(30)), + sql.column('transcript', sa.Integer), + sql.column('orientation', sa.Enum('forward', 'reverse', + name='orentation')), + sql.column('start', sa.Integer), + sql.column('stop', sa.Integer), + sql.column('exon_starts', sa.Text), + sql.column('exon_stops', sa.Text), + sql.column('select_transcript', sa.Boolean), + sql.column('source', sa.Enum('ucsc', 'ncbi', 'reference', + name='source'))) + + transcript_protein_links = sql.table( + 'transcript_protein_links', + sql.column('transcript_accession', sa.String(30)), + sql.column('protein_accession', sa.String(30))) + + # Add some common data. + connection.execute( + assemblies.insert(), + name='GRCh37', + taxonomy_id=9606, + taxonomy_common_name='Homo sapiens', + alias='hg19') + hg19_id = connection.execute( + assemblies.select(assemblies.c.alias == 'hg19') + .with_only_columns([assemblies.c.id]) + ).fetchone()[0] + + connection.execute( + chromosomes.insert(), + assembly_id=hg19_id, + name='chr1', + accession='NC_000001.10', + organelle='nucleus') + chr1_id = connection.execute( + chromosomes.select(chromosomes.c.name == 'chr1') + .with_only_columns([chromosomes.c.id]) + ).fetchone()[0] + + # Data for migration 402ff01b0d5d: + # Fix GRCm38 chromosome accession number versions. + connection.execute( + chromosomes.insert(), + assembly_id=hg19_id, + name='chr11', + accession='NC_000077.60', + organelle='nucleus') + + # Data for migration 2e062969eb54: + # Rename GRCh36 assembly to NCBI36. + connection.execute( + assemblies.insert(), + name='GRCh36', + taxonomy_id=9606, + taxonomy_common_name='Homo sapiens', + alias='hg18') + + # Data for migration 4bafcc5086dd: + # Fix zero-exon transcript mappings. + connection.execute( + transcript_mappings.insert(), + chromosome_id=chr1_id, + reference_type='refseq', + accession='NC_001807', + gene='ATP6', + transcript=1, + orientation='forward', + start=8528, + stop=9208, + exon_starts='8528', + exon_stops='9208', + select_transcript=True, + source='ncbi') + connection.execute( + transcript_mappings.insert(), + chromosome_id=chr1_id, + reference_type='refseq', + accession='NC_001807', + gene='ATP8', + transcript=1, + orientation='forward', + start=8367, + stop=8573, + exon_starts='', + exon_stops='', + select_transcript=True, + source='ncbi') + + # Data for migration 3492d2ee8884: + # Transcript protein links have nullable transcript and unique protein. + connection.execute( + transcript_protein_links.insert(), + transcript_accession='NM_052818', + protein_accession='NP_438169') + connection.execute( + transcript_protein_links.insert(), + transcript_accession='NM_001079691', + protein_accession=None) diff --git a/tests/utils.py b/tests/utils.py index aa90a1fd7b8c21bbc9c9ae97a676004b5a11854d..6743804fdb528b75d0ffb3c1c2a90b821c9ae88f 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -48,6 +48,8 @@ class TestEnvironment(object): """ Destroy all storage defined in the current environment. """ + db.session.remove() + shutil.rmtree(self.cache_dir) os.unlink(self.log_file)