Commit 5d54d4f5 authored by Mihai's avatar Mihai
Browse files

Support for LRG 1.9

- Switch to new LRG location (fix for #447).
- Extract only the gene name from the `updatable` section.
- Fix for LRG transcripts with no coding region.
- Adapted LRG examples on the website.
- Extract the annotation set based on attribute type.
- More informative error message (part of #135).
- Makes #338 obsolete.
parent 1d3b1341
......@@ -65,8 +65,7 @@ OUTPUT_LEVEL = 1
LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"
# Prefix URL from where LRG files are fetched.
#LRG_PREFIX_URL = 'ftp://ftp.ebi.ac.uk/pub/databases/lrgex/'
LRG_PREFIX_URL = 'ftp://ftp.ebi.ac.uk/pub/databases/lrgex/SCHEMA_1_7_ARCHIVE/'
LRG_PREFIX_URL = 'ftp://ftp.ebi.ac.uk/pub/databases/lrgex/'
# Allow for this fraction of errors in batch jobs.
BATCH_JOBS_ERROR_THRESHOLD = 0.05
......
This diff is collapsed.
......@@ -1551,10 +1551,20 @@ def process_variant(mutator, description, record, output):
# Todo: Incorrect error message, it might also be that
# there are no transcripts at all (e.g. N4BP2L1 on
# NG_012772.1).
output.addMessage(__file__, 4, "ENOTRANSCRIPT",
"Multiple transcripts found for gene %s. Please " \
"choose from: %s" % (gene.name,
", ".join(gene.listLoci())))
transcripts_no = len(gene.listLoci())
if transcripts_no == 1:
message = "Transcript t%s not found. The reference " \
"sequence contains only transcript t%s." \
%(transcript_id, gene.listLoci()[0])
elif transcripts_no > 1:
message = "Transcript t%s not found. The reference " \
"sequence contains for gene %s the " \
"following %s transcripts: %s."\
%(transcript_id, gene.name,
len(gene.listLoci()),
", ".join(("t%s" %c)
for c in gene.listLoci()))
output.addMessage(__file__, 4, "ENOTRANSCRIPT", message)
else:
# No transcript id given.
if len(gene.transcriptList) == 1:
......
......@@ -21,7 +21,7 @@ Variation Society</a>.
<p>Examples:
<code class="example-input" data-for="description">AB026906.1:c.40_42del</code>,
<code class="example-input" data-for="description">NG_012337.1(SDHD_v001):c.274G&gt;T</code>,
<code class="example-input" data-for="description">LRG_9t1:c.159dup</code>
<code class="example-input" data-for="description">LRG_24t1:c.159dup</code>
</p>
<!-- <a href="{{ url_for('.name_checker') }}" class="btn btn-default btn-small btn-primary">Try
......
......@@ -55,7 +55,7 @@
<p>Examples:
<code class="example-input" data-for="description">AB026906.1:c.40_42del</code>,
<code class="example-input" data-for="description">NG_012337.1(SDHD_v001):c.274G&gt;T</code>,
<code class="example-input" data-for="description">LRG_9t1:c.159dup</code>
<code class="example-input" data-for="description">LRG_24t1:c.159dup</code>
</p>
</div>
......
......@@ -33,7 +33,7 @@ normalize it to HGVS. Use the <a href="{{ url_for('.name_checker') }}">Name Chec
<code class="example-input"
data-for="description">NM_003002.3:c.274G&gt;T</code>,
<code class="example-input"
data-for="description">LRG_9t1:c.274G&gt;T</code>,
data-for="description">LRG_11t1:c.274G&gt;T</code>,
<code class="example-input"
data-for="description">chr11:g.111959693G&gt;T</code>,
<code class="example-input"
......
......@@ -20,7 +20,7 @@ Sequence Variant Nomenclature">HGVS</a> format.
<p>Examples:
<code class="example-input" data-for="description">AB026906.1:c.40_42del</code>,
<code class="example-input" data-for="description">NG_012337.1(SDHD_v001):c.274G&gt;T</code>,
<code class="example-input" data-for="description">LRG_9t1:c.159dup</code>
<code class="example-input" data-for="description">LRG_24t1:c.159dup</code>
</p>
</div>
<div class="form-group button-group">
......
No preview for this file type
......@@ -147,8 +147,14 @@ L41870.1:
checksum: 91b1e539a053f731f95d230a06710897
filename: L41870.1.gb.bz2
LRG_1:
checksum: 5b8f5a39fcd9e3005688eddffd482746
checksum: 8dc3c2fcc80b3216319d8b218e5093ab
filename: LRG_1.xml.bz2
LRG_24:
checksum: 551d043bd217004059525999a134ef3b
filename: LRG_24.xml.bz2
LRG_163:
checksum: fd4a3161a23ba1ed23c112195f14617d
filename: LRG_163.xml.bz2
MARK1:
accession: UD_139015213982
checksum: 0d63a8fe5beddeb793940f6ae194b985
......
"""
Tests for the mutalyzer.parsers.lrg module.
"""
from __future__ import unicode_literals
import os
import bz2
from mutalyzer.parsers.lrg import create_record
from fixtures import with_references
@with_references('LRG_1')
def test_lrg_basic(settings, references):
"""
"""
accession = references[0].accession
filename = os.path.join(settings.CACHE_DIR, '%s.xml.bz2' % accession)
file_handle = bz2.BZ2File(filename, 'r')
record = create_record(file_handle.read())
file_handle.close()
assert [g.name for g in record.geneList] == ['COL1A1']
assert record.geneList[0].transcriptList[0].name == '1'
assert len(record.geneList[0].transcriptList) == 1
assert record.geneList[0].transcriptList[0].CDS is not None
assert record.organism == 'Homo sapiens'
@with_references('LRG_24')
def test_lrg_multiple_transcripts(settings, references):
"""
"""
accession = references[0].accession
filename = os.path.join(settings.CACHE_DIR, '%s.xml.bz2' % accession)
file_handle = bz2.BZ2File(filename, 'r')
record = create_record(file_handle.read())
file_handle.close()
assert len(record.geneList[0].transcriptList) == 2
@with_references('LRG_163')
def test_lrg_no_coding_sequence(settings, references):
"""
"""
accession = references[0].accession
filename = os.path.join(settings.CACHE_DIR, '%s.xml.bz2' % accession)
file_handle = bz2.BZ2File(filename, 'r')
record = create_record(file_handle.read())
file_handle.close()
assert len(record.geneList[0].transcriptList) == 1
assert record.geneList[0].transcriptList[0].CDS is None
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment