Skip to content
Snippets Groups Projects
Commit 5e0d444a authored by Vermaat's avatar Vermaat
Browse files

Fix transcript mappings containing no exons

For transcripts without any UTR and CDS entries in the NCBI Mapview
file (seems to happen for  predicted genes), we generate one exon
spanning the entire transcript.
parent 3dfb33f9
No related branches found
No related tags found
No related merge requests found
"""Fix zero-exon transcript mappings
Revision ID: 4bafcc5086dd
Revises: 2e062969eb54
Create Date: 2015-07-20 16:16:01.602964
"""
from __future__ import unicode_literals
# revision identifiers, used by Alembic.
revision = '4bafcc5086dd'
down_revision = u'2e062969eb54'
from alembic import op
from sqlalchemy import sql
import sqlalchemy as sa
def upgrade():
transcript_mappings = sql.table('transcript_mappings',
sql.column('start', sa.Integer()),
sql.column('stop', sa.Integer()),
sql.column('exon_starts', sa.Text()),
sql.column('exon_stops', sa.Text()))
# https://alembic.readthedocs.org/en/latest/ops.html#alembic.operations.Operations.execute
op.execute(transcript_mappings
.update()
.where(transcript_mappings.c.exon_starts == op.inline_literal(''))
.values({'exon_starts': transcript_mappings.c.start,
'exon_stops': transcript_mappings.c.stop}))
def downgrade():
# We cannot reliably downgrade this migration.
pass
......@@ -920,6 +920,9 @@ def import_from_mapview_file(assembly, mapview_file, group_label):
Our strategy is too sort by gene and chromosome and process the file
grouped by these two fields.
For transcripts without any UTR and CDS entries (seems to happen for
predicted genes), we generate one exon spanning the entire transcript.
"""
columns = ['taxonomy', 'chromosome', 'start', 'stop', 'orientation',
'contig', 'ctg_start', 'ctg_stop', 'ctg_orientation',
......@@ -999,6 +1002,12 @@ def import_from_mapview_file(assembly, mapview_file, group_label):
else:
cds = None
# If no exons are annotated, we create one spanning the entire
# transcript.
if not exon_starts:
exon_starts = [start]
exon_stops = [stop]
yield TranscriptMapping.create_or_update(
chromosome, 'refseq', accession, gene, orientation, start,
stop, exon_starts, exon_stops, 'ncbi', cds=cds,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment