Skip to content
Snippets Groups Projects
Commit e921f13a authored by Vermaat's avatar Vermaat
Browse files

Merge pull request #63 from mutalyzer/mapping-zero-exons

Fix transcript mappings containing no exons
parents 3dfb33f9 5e0d444a
No related branches found
No related tags found
No related merge requests found
"""Fix zero-exon transcript mappings
Revision ID: 4bafcc5086dd
Revises: 2e062969eb54
Create Date: 2015-07-20 16:16:01.602964
"""
from __future__ import unicode_literals
# revision identifiers, used by Alembic.
revision = '4bafcc5086dd'
down_revision = u'2e062969eb54'
from alembic import op
from sqlalchemy import sql
import sqlalchemy as sa
def upgrade():
transcript_mappings = sql.table('transcript_mappings',
sql.column('start', sa.Integer()),
sql.column('stop', sa.Integer()),
sql.column('exon_starts', sa.Text()),
sql.column('exon_stops', sa.Text()))
# https://alembic.readthedocs.org/en/latest/ops.html#alembic.operations.Operations.execute
op.execute(transcript_mappings
.update()
.where(transcript_mappings.c.exon_starts == op.inline_literal(''))
.values({'exon_starts': transcript_mappings.c.start,
'exon_stops': transcript_mappings.c.stop}))
def downgrade():
# We cannot reliably downgrade this migration.
pass
...@@ -920,6 +920,9 @@ def import_from_mapview_file(assembly, mapview_file, group_label): ...@@ -920,6 +920,9 @@ def import_from_mapview_file(assembly, mapview_file, group_label):
Our strategy is too sort by gene and chromosome and process the file Our strategy is too sort by gene and chromosome and process the file
grouped by these two fields. grouped by these two fields.
For transcripts without any UTR and CDS entries (seems to happen for
predicted genes), we generate one exon spanning the entire transcript.
""" """
columns = ['taxonomy', 'chromosome', 'start', 'stop', 'orientation', columns = ['taxonomy', 'chromosome', 'start', 'stop', 'orientation',
'contig', 'ctg_start', 'ctg_stop', 'ctg_orientation', 'contig', 'ctg_start', 'ctg_stop', 'ctg_orientation',
...@@ -999,6 +1002,12 @@ def import_from_mapview_file(assembly, mapview_file, group_label): ...@@ -999,6 +1002,12 @@ def import_from_mapview_file(assembly, mapview_file, group_label):
else: else:
cds = None cds = None
# If no exons are annotated, we create one spanning the entire
# transcript.
if not exon_starts:
exon_starts = [start]
exon_stops = [stop]
yield TranscriptMapping.create_or_update( yield TranscriptMapping.create_or_update(
chromosome, 'refseq', accession, gene, orientation, start, chromosome, 'refseq', accession, gene, orientation, start,
stop, exon_starts, exon_stops, 'ncbi', cds=cds, stop, exon_starts, exon_stops, 'ncbi', cds=cds,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment