Skip to content
Snippets Groups Projects
Commit e5a17cd8 authored by Vermaat's avatar Vermaat
Browse files

Some refactoring and docstrings added.

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/gbinfo-sync-branch@317 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent dfa57a20
No related branches found
No related tags found
No related merge requests found
"""
Module for synchronizing the database with other Mutalyzer instances.
Todo: add some logging to the output object.
"""
......@@ -19,11 +21,18 @@ DEFAULT_CREATED_SINCE_DAYS = 7
class CacheSync(object):
"""
Todo.
Synchronize the database cache with other Mutalyzer instances.
"""
def __init__(self, config, output, database):
"""
Todo.
Instantiate the object.
@arg config: A configuration object.
@type config: mutalyzer.config.Config
@arg output: An output object.
@type output: mutalyzer.output.Output
@arg database: A database object.
@type database: mutalyzer.Db.Cache
"""
self._config = config
self._output = output
......@@ -31,33 +40,57 @@ class CacheSync(object):
def local_cache(self, created_since=None):
"""
Todo.
Get all entries in the local cache with creation date {created_since}
or later.
@kwarg created_since: Only entries with this creation date or later
are returned.
@type created_since: datatime.datetime
@return: List of cache entries.
@rtype: list(dictionary)
"""
if not created_since:
created_since = datetime.today() - \
timedelta(days=DEFAULT_CREATED_SINCE_DAYS)
cache = self._database.getGBSince(created_since)
entries = []
entries = self._database.getGBSince(created_since)
cache = []
# For each entry, check if it is cached on our filesystem.
# Todo: refactor
for entry in cache:
e = list(entry)
# Translate each entry to a dictionary and check if it is cached on
# our filesystem.
for entry in entries:
# Note that this way we only include Genbank files, not LRG files.
file_name = '%s.gb.bz2' % entry[0]
file_path = os.path.join(self._config.Retriever.cache, file_name)
if os.path.isfile(file_path):
e.append('%s.gb' % entry[0])
else:
e.append(None)
entries.append(e)
return entries
cached = None
if os.path.isfile(os.path.join(self._config.Retriever.cache,
'%s.gb.bz2' % entry[0])):
cached = '%s.gb' % entry[0]
cache.append({'name': entry[0],
'gi': entry[1],
'hash': entry[2],
'chromosomeName': entry[3],
'chromosomeStart': entry[4],
'chromosomeStop': entry[5],
'chromosomeOrientation': entry[6],
'url': entry[7],
'created': entry[8],
'cached': cached}
return cache
def remote_cache(self, remote_wsdl, created_since=None):
"""
Todo.
Get all entries in the remote cache with creation date {created_since}
or later.
@arg remote_wsdl: The url of the remote SOAP WSDL description.
@type remote_wsdl: string
@kwarg created_since: Only entries with this creation date or later
are returned.
@type created_since: datatime.datetime
@return: List of cache entries.
@rtype: list(dictionary)
"""
if not created_since:
created_since = datetime.today() - \
......@@ -85,10 +118,30 @@ class CacheSync(object):
def sync_with_remote(self, remote_wsdl, url_template, created_since=None):
"""
Todo.
Synchronize the local cache with the remote cache.
>>> wsdl = 'http://mutalyzer.nl/mutalyzer/services/?wsdl'
>>> template = 'http://mutalyzer.nl/mutalyzer/Reference/{file}'
>>> self.sync_with_remote(wsdl, template)
(14, 3)
@arg remote_wsdl: The url of the remote SOAP WSDL description.
@type remote_wsdl: string
@arg url_template: Formatting string containing a {file} occurence,
see examle usage above.
@string url_template: string
@kwarg created_since: Only remote entries with this creation date or
later are considered.
@type created_since: datatime.datetime
@return: The number of entries added to the local cache and the number
cache files downloaded from the remote site.
@rtype: tuple(int, int)
"""
remote_cache = self.remote_cache(remote_wsdl, created_since)
inserted = downloaded = 0
for entry in remote_cache:
if self._database.getHash(entry['name']):
continue
......@@ -104,17 +157,23 @@ class CacheSync(object):
entry['chromosomeStop'],
entry['chromosomeOrientation'],
entry['url'])
print 'inserting %s' % entry['name']
if not entry['chromosomeName'] and not entry['url']:
if entry['cached']:
print 'downloading file from remote cache: %s' % (url_template % str(entry['cached']))
self.store_remote_file(entry['name'], url_template % entry['cached'])
else:
print 'cannot download this file from remote cache'
inserted += 1
if not entry['chromosomeName'] and not entry['url'] \
and entry['cached']:
url = url_template.format(file=entry['cached'])
self.store_remote_file(entry['name'], url)
downloaded += 1
return inserted, downloaded
def store_remote_file(self, name, url):
"""
Todo.
Download a remote file located at {url} and store it as {name}.
@arg name: Name to store the file under.
@type name: string
@arg url: Url to the remote file.
@type url: string
"""
if not re.match('^[\da-zA-Z\._-]+$', name):
return
......@@ -125,5 +184,7 @@ class CacheSync(object):
handle.close()
# Store remote data
retriever = Retriever.GenBankRetriever(self._config.Retriever, self._output, self._database)
retriever = Retriever.GenBankRetriever(self._config.Retriever,
self._output,
self._database)
retriever.write(data, name, 0)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment