Commit 8ffd09b7 authored by Mihai's avatar Mihai
Browse files

Merge branch 'master' into lrg_pending_fix

parents be14b9b5 52227eb7
......@@ -3,11 +3,40 @@ Changelog
This is a record of changes made between each Mutalyzer release.
Version 2.0.27
---------------
Release date to be decided.
Version 2.0.26
---------------
Released on July 19th 2017.
- Description-extractor dependency updated to version 2.35 (`#429
<https://github.com/mutalyzer/mutalyzer/pull/429>`_).
- Fix for negative cCDSStop due to wrong transcript to protein link. (`#430
<https://github.com/mutalyzer/mutalyzer/issues/430>`_)
Version 2.0.25
---------------
Released on May 17th 2017.
- Fix for batch processor crash when trying to alter an "item" column of the
"batch_queue_items" database table. (`#426
<https://github.com/mutalyzer/mutalyzer/pull/426>`_).
Version 2.0.24
---------------
Release date to be decided.
Released on April 12th 2017.
- Fix for SNP converter crash when called with 'rs0' as parameter.
SNP converter displays now more warning messages. (`#419
<https://github.com/mutalyzer/mutalyzer/issues/419>`_).
Version 2.0.23
......
......@@ -371,7 +371,7 @@ class GenBankRetriever(Retriever):
# EFetch `seq_start` and `seq_stop` are one-based, inclusive, and
# in reference orientation.
handle = Entrez.efetch(
db='nuccore', rettype='gb', retmode='text', id=accno,
db='nuccore', rettype='gbwithparts', retmode='text', id=accno,
seq_start=start, seq_stop=stop, strand=orientation)
raw_data = handle.read()
handle.close()
......
......@@ -195,6 +195,9 @@ Mutalyzer batch scheduler""" % download_url)
if "S0" in flags :
message = "Entry could not be formatted correctly, check "\
"batch input file help for details"
elif "S2" in flags:
message = "Unaccepted input line length, check "\
"batch input file help for details"
elif "S9" in flags :
message = "Empty Line"
else :
......@@ -208,7 +211,7 @@ Mutalyzer batch scheduler""" % download_url)
return False
#__processFlags
def __alterBatchEntries(self, jobID, old, new, flag, nselector) :
def __alterBatchEntries(self, jobID, old, new, flag, nselector, O) :
"""
Replace within one JobID all entries matching old with new, if they do
not match the negative selector.
......@@ -255,13 +258,21 @@ Mutalyzer batch scheduler""" % download_url)
# 'flag': flag,
# 'nselector': nselector}
#session.execute(query, parameters)
BatchQueueItem.query \
.filter_by(batch_job_id=jobID) \
.filter(BatchQueueItem.item.startswith(old),
~BatchQueueItem.item.startswith(nselector)) \
.update({'item': func.replace(BatchQueueItem.item, old, new),
'flags': BatchQueueItem.flags + flag},
synchronize_session=False)
try:
BatchQueueItem.query \
.filter_by(batch_job_id=jobID) \
.filter(BatchQueueItem.item.startswith(old+':'),
~BatchQueueItem.item.startswith(nselector),
~BatchQueueItem.flags.contains('S2')) \
.update({'item': func.replace(BatchQueueItem.item, old, new),
'flags': BatchQueueItem.flags + flag},
synchronize_session=False)
except Exception as ex:
message = ("An exception of type '%s' occurred in __alterBatchEntries() "
"with the following arguments: %s. "
"Other info: old=%s, new=%s, flag=%s, nselector=%s"
% (type(ex).__name__, ex.args, old, new, flag, nselector))
O.addMessage(__file__, 4, "ABATCHE", message)
session.commit()
#__alterBatchEntries
......@@ -340,7 +351,7 @@ Mutalyzer batch scheduler""" % download_url)
O.addMessage(__file__, 2, "WBSUBST",
"All further occurrences of %s will be substituted "
"by %s" % (old, new))
self.__alterBatchEntries(jobID, old, new, flag, nselector)
self.__alterBatchEntries(jobID, old, new, flag, nselector, O)
#if
#for
#_updateDbFlags
......@@ -726,12 +737,7 @@ Mutalyzer batch scheduler""" % download_url)
descriptions = []
if not skip:
try:
descriptions = ncbi.rsid_to_descriptions(cmd)
except ncbi.ServiceError:
O.addMessage(__file__, 4, 'EENTREZ',
'An error occured while communicating with '
'dbSNP.')
descriptions = ncbi.rsid_to_descriptions(cmd, O)
# Todo: Is output ok?
outputline = "%s\t" % cmd
......@@ -801,6 +807,8 @@ Mutalyzer batch scheduler""" % download_url)
else:
flag = "S9" # Flag for empty line
inputl = " " #Database doesn't like an empty inputfield
elif len(inputl) > 190: # Input line length not accepted
flag = "S2" # Flag for unaccepted input line length
else:
flag = None
if (i + 1) % columns:
......
......@@ -21,8 +21,8 @@ from __future__ import unicode_literals
# [1] http://peak.telecommunity.com/DevCenter/setuptools#specifying-your-project-s-version
# [2] http://semver.org/
__version_info__ = ('2', '0', '24', 'dev')
__date__ = '9 Nov 2016'
__version_info__ = ('2', '0', '27', 'dev')
__date__ = '19 July 2017'
__version__ = '.'.join(__version_info__)
......
......@@ -12,6 +12,7 @@ import argparse
import signal
import sys
import time
import socket
from .. import db
from .. import Scheduler
......@@ -41,6 +42,7 @@ def process():
signal.signal(signal.SIGINT, handle_exit)
while True:
# Process batch jobs.
scheduler.process()
db.session.remove()
......@@ -58,6 +60,7 @@ def main():
"""
Command line interface to the batch processor.
"""
socket.setdefaulttimeout(60)
parser = argparse.ArgumentParser(
description='Mutalyzer batch processor.',
epilog='The process can be shutdown gracefully by sending a SIGINT '
......
......@@ -335,22 +335,15 @@ def protein_to_transcript(protein_accession, protein_version=None,
match_version=match_version)
def rsid_to_descriptions(rsid):
def _get_snp_from_ncbi(rsid):
"""
Return all annotated HGVS descriptions for a given dbSNP rs#.
:arg str rsid: The rs# of the dbSNP record (e.g., `rs9919552`).
:raises ServiceError: On error in Entrez communication.
Connects to the Entrez DB to fetch the annotated SNP records.
:returns: List of HGVS descriptions.
:rtype: list(str)
:param rsid: The rs# of the dbSNP record (e.g., `rs9919552`).
:return: response_text(str)
"""
Entrez.email = settings.EMAIL
if not rsid.startswith('rs') or not rsid[2:].isdigit():
return []
try:
response = Entrez.efetch(db='snp', id=rsid[2:], retmode='xml')
except (IOError, httplib.HTTPException):
......@@ -363,8 +356,39 @@ def rsid_to_descriptions(rsid):
# TODO: Log error.
raise ServiceError()
return response_text
def rsid_to_descriptions(rsid, output):
"""
Return all annotated HGVS descriptions for a given dbSNP rs#.
:arg str rsid: The rs# of the dbSNP record (e.g., `rs9919552`).
:raises ServiceError: On error in Entrez communication.
:returns: List of HGVS descriptions.
:rtype: list(str)
"""
# Some first checks. The rsid should be prefixed with 'rs'.
if not rsid.startswith('rs') or not rsid[2:].isdigit():
output.addMessage(__file__, 2, 'RSID',
'Incorrect RSID input format.')
return []
# Get the NCBI Entrez DB response.
try:
response_text = _get_snp_from_ncbi(rsid)
except ServiceError:
output.addMessage(__file__, 4, 'EENTREZ',
'An error occured while communicating with dbSNP.')
return []
if response_text.strip() == b'\n':
# This is apparently what dbSNP returns for non-existing rs#.
output.addMessage(__file__, 2, 'EENTREZ',
'Non existing %s in the DB.' % rsid)
return []
try:
......@@ -377,6 +401,8 @@ def rsid_to_descriptions(rsid):
except IndexError:
# The expected root element is not present, this has also been
# observed as a response for non-existing rs#.
output.addMessage(__file__, 2, 'EENTREZ',
'Non existing %s in the DB or no root element.' % rsid)
return []
return [hgvs.lastChild.data for hgvs in rs.getElementsByTagName('hgvs')]
......@@ -349,20 +349,39 @@ class GBparser():
return 1 # Everything matches, but there is little information.
#__matchByRange
def link_via_attribute(self, t, p, attr, method_display):
"""
Try to link a transcript (t) with a protein (p) via the provided
attribute (attr).
:param t: A transcript.
:param p: A protein.
:param attr: The attribute on which the link should pe performed.
:param method_display: The link method to be displayed at output.
:return: True if link was performed, False otherwise.
"""
if self.__matchByRange(t, p) > 0 and not p.linked:
if getattr(t, attr) and getattr(t, attr) == getattr(p, attr):
t.link = p
t.linkMethod = method_display
p.linked = True
return True
return False
def link(self, rnaList, cdsList):
"""
Link mRNA loci to CDS loci (all belonging to one gene).
First of all, the range of the CDS must be a subrange of that of
the mRNA. If this is true, then we try to link both loci. The first
method is by looking at the locus_tag, if this fails, we try to
match the proteinLink tags, if this also fails, we try the
method is by looking at the proteinLink, if this fails, we try to
match the locus tags, if this also fails, we try the
productTag.
If no link could be found, but there is only one possibility left,
the loci are linked too.
The method that was used to link the loci, is put in the linkmethod
The method that was used to link the loci, is put in the linkMethod
variable of the transcript locus. The link variable of the
transcript locus is a pointer to the CDS locus. Furthermore, the
linked variable of the CDS locus is set to indicate that this locus
......@@ -391,32 +410,20 @@ class GBparser():
for i in rnaList :
i.link = None
i.linkMethod = None
# Try first to link via the proteinLink tag.
for j in cdsList :
if self.__matchByRange(i, j) > 0 :
# Try to link via the locus tag first.
if i.locus_tag and i.locus_tag == j.locus_tag :
i.link = j
i.linkMethod = "locus"
j.linked = True
#print "Linked:", j.locus_tag
if self.link_via_attribute(i, j, 'proteinLink', 'protein'):
break
if not i.link:
# Try to link next via the locus tag.
for j in cdsList:
if self.link_via_attribute(i, j, 'locus_tag', 'locus'):
break
#if
# Try the proteinLink tag.
if i.proteinLink and i.proteinLink == j.proteinLink :
i.link = j
i.linkMethod = "protein"
j.linked = True
if not i.link:
# Try finally to link via the productTag.
for j in cdsList:
if self.link_via_attribute(i, j, 'productTag', 'product'):
break
#if
# Try the productTag.
if i.productTag and i.productTag == j.productTag :
i.link = j
i.linkMethod = "product"
j.linked = True
break
#if
#if
#for
# Now look if there is only one possibility left.
# One *could* also do exhaustion per matched range...
......
......@@ -1464,12 +1464,7 @@ class MutalyzerService(ServiceBase):
stats.increment_counter('snp-converter/webservice')
try:
descriptions = ncbi.rsid_to_descriptions(rs_id)
except ncbi.ServiceError:
output.addMessage(__file__, 4, 'EENTREZ',
'An error occured while communicating with '
'dbSNP.')
descriptions = ncbi.rsid_to_descriptions(rs_id, output)
output.addMessage(__file__, -1, 'INFO',
'Finished processing getdbSNPDescription(%s)' % rs_id)
......@@ -1527,6 +1522,8 @@ class MutalyzerService(ServiceBase):
# From all the transcripts for this gene, get the lowest start
# position and highest stop position. For integrity, we group by
# chromosome and orientation.
# Order by chromosome name for disambiguation, as is done in
# Convertor._get_mapping()
mapping = \
session.query(func.min(TranscriptMapping.start),
func.max(TranscriptMapping.stop),
......@@ -1538,6 +1535,7 @@ class MutalyzerService(ServiceBase):
.join(TranscriptMapping.chromosome) \
.group_by(Chromosome.id,
TranscriptMapping.orientation) \
.order_by(Chromosome.name.asc()) \
.first()
if not mapping:
......
......@@ -173,6 +173,28 @@ of Mutalyzer.
title="SUN Microsystems"></a>
</div>
<h2 id="recommended-by">Recommended by</h2>
<div>
<a href="http://www.humanvariomeproject.org/solutions/recommended-systems.html">
<img src="{{ url_for('static',
filename='images/hvp_recommended_system.png')
}}"
width="209"
height="140"
alt="HVP"
title="HVP"></a>
<a href="http://www.irdirc.org/activities/irdirc-recognized-resources/">
<img src="{{ url_for('static',
filename='images/irdirc_recognized_resource.jpg')
}}"
width="300"
height="166"
alt="IRDiRC"
title="IRDiRC"></a>
</div>
<h2>Counters</h2>
<p>
......
......@@ -172,14 +172,30 @@
development version
{% endif %}
</span>
<br>
<a href="https://github.com/mutalyzer/mutalyzer/blob/master/CHANGES.rst">
Changelog
</a>
</p>
</div>
<div class="col-md-4">
<p class="text-muted">HGVS nomenclature version {{ nomenclature_version }}</p>
<p class="text-muted">HGVS nomenclature version {{ nomenclature_version }}
(<a href="https://github.com/mutalyzer/mutalyzer/wiki/HGVS-Mutalyzer-Differences">notes</a>)
<br>
<a href="{{ url_for('website.about') }}#recommended-by" name="recommended-by">
Recommended by
</a>
<br>
</p>
</div>
<div class="col-md-4">
<img src="{{ url_for('static', filename='images/LUMC_24x24.png') }}" align="middle">
<p>&copy; {{ copyright_years[0] }}-{{ copyright_years[1] }} <a href="http://www.lumc.nl">LUMC</a></p>
<p>&copy; {{ copyright_years[0] }}-{{ copyright_years[1] }} <a href="http://www.lumc.nl">LUMC</a>
<br>
<a href="https://www.gnu.org/licenses/agpl-3.0.html">
Disclaimer
</a>
</p>
</div>
</footer>
......
......@@ -67,7 +67,7 @@
</ul>
<p>
The maximum file size is {{ max_file_size }} megabytes, and the maximum
length per entry (variant description) is 200 characters.
length per entry (variant description) is 190 characters.
</p>
<p>We accept two types of input files, you can download examples below.</p>
......
......@@ -93,6 +93,13 @@
{% endfor %}
{% endif %}
{% if parse_error %}
<div class="alert alert-info">
Please note that Mutalyzer does not cover the entire HGVS nomenclature. You can find more information
<a href="https://github.com/mutalyzer/mutalyzer/wiki/HGVS-Mutalyzer-Differences">here</a>.
</div>
{% endif %}
{% if summary == "0 Errors, 0 Warnings." %}
<p class="alert alert-success summary">{{ summary }}</p>
{% else %}
......@@ -306,6 +313,10 @@
{% endfor %}
</tbody>
</table>
<p>
Link methods priorities order: protein (NCBI), locus tag,
product tag, exhaustion, and construction.
</p>
{% endif %}
</div>
</div>
......
......@@ -51,6 +51,10 @@ normalize it to HGVS. Use the <a href="{{ url_for('.name_checker') }}">Name Chec
{% for m in messages %}
{% if m.class == "error" %}
<p class="alert alert-danger" title="{{ m.level }} (origin: {{ m.origin }})">{{ m.description }}</p>
<p class="alert alert-info">
Please note that Mutalyzer does not cover the entire HGVS nomenclature. You can find more information
<a href="https://github.com/mutalyzer/mutalyzer/wiki/HGVS-Mutalyzer-Differences">here</a>.
</p>
{% elif m.class == "warning" %}
<p class="alert alert-warning" title="{{ m.level }} (origin: {{ m.origin }})">{{ m.description }}</p>
{% elif m.class == "information" %}
......
......@@ -54,6 +54,13 @@ Sequence Variant Nomenclature">HGVS</a> format.
{% endif %}
{% endfor %}
{% endif %}
{% if parse_error %}
<div class="alert alert-info">
Please note that Mutalyzer does not cover the entire HGVS nomenclature. You can find more information
<a href="https://github.com/mutalyzer/mutalyzer/wiki/HGVS-Mutalyzer-Differences">here</a>.
</div>
{% endif %}
{% endif %}{# description #}
{% endblock content %}
......@@ -497,12 +497,7 @@ def snp_converter():
% (rs_id, request.remote_addr))
stats.increment_counter('snp-converter/website')
descriptions = []
try:
descriptions = ncbi.rsid_to_descriptions(rs_id)
except ncbi.ServiceError:
output.addMessage(__file__, 4, 'EENTREZ',
'An error occured while communicating with dbSNP.')
descriptions = ncbi.rsid_to_descriptions(rs_id, output)
messages = map(util.message_info, output.getMessages())
......
......@@ -11,7 +11,8 @@ backtranslate==0.0.5
biopython==1.68
chardet==2.3.0
cssselect==0.9.1
description-extractor==2.3.2
-e git+https://github.com/mutalyzer/crossmapper.git#egg=crossmapper
description-extractor==2.3.5
interval-binning==1.0.0
lxml==3.5.0
mock==1.3.0
......
......@@ -274,3 +274,20 @@ chr9_reverse:
- null
- - NM_001195249
- NP_001182178
UD_144413132067:
accession: UD_144413132067
checksum: 960c8ea95537c7e49735dfc7cb0211a3
filename: UD_144413132067.gb.bz2
links:
- - NM_001220777.1
- NP_001207706.1
- - NM_078467.2
- NP_510867.1
- - NM_001291549.1
- NP_001278478.1
- - NM_000389.4
- NP_000380.1
- - NM_001220778.1
- NP_001207707.1
- - NR_109836
- null
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment