Skip to content
Snippets Groups Projects
Commit c7e609b9 authored by Vermaat's avatar Vermaat
Browse files

Fully functional remote cache sync.

- Writes to log file.
- Can be run from bin/mutalyzer-cache-sync.
- Post-install script installs (by default disabled) cronjob.



git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/gbinfo-sync-branch@318 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent e5a17cd8
No related branches found
No related tags found
No related merge requests found
...@@ -3,35 +3,48 @@ ...@@ -3,35 +3,48 @@
""" """
Synchronize the database cache with other Mutalyzer instances. Synchronize the database cache with other Mutalyzer instances.
Usage:
./mutalyzer-cache-sync days remote_wsdl url_template
remote_wsdl: Location of the remote WSDL description.
url_template: URL to remote downloads, where {file} is to be substituted
by the filename.
days: Number of days to go back in the remote cache.
This program is intended to be run daily from cron. Example: This program is intended to be run daily from cron. Example:
25 5 * * * mutalyzer-cache-sync 25 5 * * * mutalyzer-cache-sync 'http://dom1/?wsdl' 'http://dom1/{file}' 7
55 5 * * * mutalyzer-cache-sync 'http://dom2/?wsdl' 'http://dom2/{file}' 7
""" """
import sys
from mutalyzer.config import Config from mutalyzer.config import Config
from mutalyzer.output import Output from mutalyzer.output import Output
from mutalyzer.sync import CacheSync from mutalyzer.sync import CacheSync
from mutalyzer import Db from mutalyzer import Db
def main(): def cache_sync(remote_wsdl, url_template, days):
""" """
Synchronize the database cache with other Mutalyzer instances. Synchronize the database cache with other Mutalyzer instances.
""" """
config = Config() config = Config()
output = Output(__file__, config.Output) output = Output(__file__, config.Output)
output.addMessage(__file__, -1, 'INFO',
'Starting cache sync')
database = Db.Cache(config.Db) database = Db.Cache(config.Db)
sync = CacheSync(config.Sync, database)
created_since = datetime.today() - timedelta(days=60)
sync.sync_with_remote(created_since)
output.addMessage(__file__, -1, 'INFO', 'Cache sync end') sync = CacheSync(config.Retriever, output, database)
sync.sync_with_remote(remote_wsdl, url_template, days)
if __name__ == '__main__': if __name__ == '__main__':
main() if len(sys.argv) < 4:
print __doc__.strip()
sys.exit(1)
try:
days = int(sys.argv[3])
except ValueError:
print 'Last argument must be an integer.'
sys.exit(1)
cache_sync(sys.argv[1], sys.argv[2], int(sys.argv[3]))
# Synchronize the local cache with the live server every morning at 05:25
#25 5 * * * www-data <MUTALYZER_BIN_CACHE_SYNC> 'http://www.mutalyzer.nl/2.0/services/?wsdl' 'http://www.mutalyzer.nl/2.0/Reference/{file}' 3
...@@ -21,6 +21,7 @@ set -e ...@@ -21,6 +21,7 @@ set -e
# directory to be used. # directory to be used.
PACKAGE_ROOT=$(cd / && python -c 'import mutalyzer; print mutalyzer.package_root()') PACKAGE_ROOT=$(cd / && python -c 'import mutalyzer; print mutalyzer.package_root()')
BIN_BATCHD=$(which mutalyzer-batchd) BIN_BATCHD=$(which mutalyzer-batchd)
BIN_CACHE_SYNC=$(which mutalyzer-cache-sync)
BIN_UCSC_UPDATE=$(which mutalyzer-ucsc-update) BIN_UCSC_UPDATE=$(which mutalyzer-ucsc-update)
BIN_WEBSITE=$(which mutalyzer-website.wsgi) BIN_WEBSITE=$(which mutalyzer-website.wsgi)
BIN_WEBSERVICE=$(which mutalyzer-webservice.wsgi) BIN_WEBSERVICE=$(which mutalyzer-webservice.wsgi)
...@@ -56,6 +57,8 @@ update-rc.d mutalyzer-batchd defaults 98 02 ...@@ -56,6 +57,8 @@ update-rc.d mutalyzer-batchd defaults 98 02
echo "Installing crontab" echo "Installing crontab"
cp extras/cron.d/mutalyzer-ucsc-update /etc/cron.d/mutalyzer-ucsc-update cp extras/cron.d/mutalyzer-ucsc-update /etc/cron.d/mutalyzer-ucsc-update
sed -i -e "s@<MUTALYZER_BIN_UCSC_UPDATE>@${BIN_UCSC_UPDATE}@g" /etc/cron.d/mutalyzer-ucsc-update sed -i -e "s@<MUTALYZER_BIN_UCSC_UPDATE>@${BIN_UCSC_UPDATE}@g" /etc/cron.d/mutalyzer-ucsc-update
cp extras/cron.d/mutalyzer-cache-sync /etc/cron.d/mutalyzer-cache-sync
sed -i -e "s@<MUTALYZER_BIN_CACHE_SYNC>@${BIN_CACHE_SYNC}@g" /etc/cron.d/mutalyzer-cache-sync
echo "Creating /etc/apache2/conf.d/mutalyzer.conf" echo "Creating /etc/apache2/conf.d/mutalyzer.conf"
cp extras/apache/mutalyzer.conf /etc/apache2/conf.d/mutalyzer.conf cp extras/apache/mutalyzer.conf /etc/apache2/conf.d/mutalyzer.conf
......
...@@ -10,6 +10,7 @@ import tempfile ...@@ -10,6 +10,7 @@ import tempfile
from configobj import ConfigObj from configobj import ConfigObj
import mutalyzer import mutalyzer
from mutalyzer import util
class ConfigurationError(Exception): class ConfigurationError(Exception):
...@@ -28,7 +29,6 @@ class Config(): ...@@ -28,7 +29,6 @@ class Config():
class Batch(): pass class Batch(): pass
class File(): pass class File(): pass
class GenRecord(): pass class GenRecord(): pass
class Sync(): pass
def __init__(self, filename=None): def __init__(self, filename=None):
""" """
...@@ -134,8 +134,6 @@ class Config(): ...@@ -134,8 +134,6 @@ class Config():
self.GenRecord.spliceAlarm = int(config["spliceAlarm"]) self.GenRecord.spliceAlarm = int(config["spliceAlarm"])
self.GenRecord.spliceWarn = int(config["spliceWarn"]) self.GenRecord.spliceWarn = int(config["spliceWarn"])
# Set the variables needed by the sync module.
self.Sync.cache = config["cache"]
# If we are in a testing environment, use a temporary file for # If we are in a testing environment, use a temporary file for
# logging and a temporary directory for the cache. # logging and a temporary directory for the cache.
......
""" """
Module for synchronizing the database with other Mutalyzer instances. Module for synchronizing the database with other Mutalyzer instances.
Todo: add some logging to the output object.
""" """
...@@ -28,7 +26,7 @@ class CacheSync(object): ...@@ -28,7 +26,7 @@ class CacheSync(object):
Instantiate the object. Instantiate the object.
@arg config: A configuration object. @arg config: A configuration object.
@type config: mutalyzer.config.Config @type config: mutalyzer.config.Config.Retriever
@arg output: An output object. @arg output: An output object.
@type output: mutalyzer.output.Output @type output: mutalyzer.output.Output
@arg database: A database object. @arg database: A database object.
...@@ -62,7 +60,7 @@ class CacheSync(object): ...@@ -62,7 +60,7 @@ class CacheSync(object):
for entry in entries: for entry in entries:
# Note that this way we only include Genbank files, not LRG files. # Note that this way we only include Genbank files, not LRG files.
cached = None cached = None
if os.path.isfile(os.path.join(self._config.Retriever.cache, if os.path.isfile(os.path.join(self._config.cache,
'%s.gb.bz2' % entry[0])): '%s.gb.bz2' % entry[0])):
cached = '%s.gb' % entry[0] cached = '%s.gb' % entry[0]
cache.append({'name': entry[0], cache.append({'name': entry[0],
...@@ -74,7 +72,7 @@ class CacheSync(object): ...@@ -74,7 +72,7 @@ class CacheSync(object):
'chromosomeOrientation': entry[6], 'chromosomeOrientation': entry[6],
'url': entry[7], 'url': entry[7],
'created': entry[8], 'created': entry[8],
'cached': cached} 'cached': cached})
return cache return cache
...@@ -92,6 +90,9 @@ class CacheSync(object): ...@@ -92,6 +90,9 @@ class CacheSync(object):
@return: List of cache entries. @return: List of cache entries.
@rtype: list(dictionary) @rtype: list(dictionary)
""" """
self._output.addMessage(__file__, -1, 'INFO', 'Getting remote cache'
' from %s' % remote_wsdl)
if not created_since: if not created_since:
created_since = datetime.today() - \ created_since = datetime.today() - \
timedelta(days=DEFAULT_CREATED_SINCE_DAYS) timedelta(days=DEFAULT_CREATED_SINCE_DAYS)
...@@ -116,7 +117,31 @@ class CacheSync(object): ...@@ -116,7 +117,31 @@ class CacheSync(object):
return map(cache_entry_from_soap, cache.CacheEntry) return map(cache_entry_from_soap, cache.CacheEntry)
def sync_with_remote(self, remote_wsdl, url_template, created_since=None): def store_remote_file(self, name, url):
"""
Download a remote file located at {url} and store it as {name}.
@arg name: Name to store the file under.
@type name: string
@arg url: Url to the remote file.
@type url: string
"""
if not re.match('^[\da-zA-Z\._-]+$', name):
return
# Download remote data
handle = urllib2.urlopen(url)
data = handle.read()
handle.close()
# Store remote data
retriever = Retriever.GenBankRetriever(self._config,
self._output,
self._database)
retriever.write(data, name, 0)
def sync_with_remote(self, remote_wsdl, url_template,
days=DEFAULT_CREATED_SINCE_DAYS):
""" """
Synchronize the local cache with the remote cache. Synchronize the local cache with the remote cache.
...@@ -130,14 +155,17 @@ class CacheSync(object): ...@@ -130,14 +155,17 @@ class CacheSync(object):
@arg url_template: Formatting string containing a {file} occurence, @arg url_template: Formatting string containing a {file} occurence,
see examle usage above. see examle usage above.
@string url_template: string @string url_template: string
@kwarg created_since: Only remote entries with this creation date or @kwarg days: Only remote entries added this number of days ago or
later are considered. later are considered.
@type created_since: datatime.datetime @type days: int
@return: The number of entries added to the local cache and the number @return: The number of entries added to the local cache and the number
cache files downloaded from the remote site. cache files downloaded from the remote site.
@rtype: tuple(int, int) @rtype: tuple(int, int)
""" """
self._output.addMessage(__file__, -1, 'INFO', 'Starting cache sync')
created_since = datetime.today() - timedelta(days=days)
remote_cache = self.remote_cache(remote_wsdl, created_since) remote_cache = self.remote_cache(remote_wsdl, created_since)
inserted = downloaded = 0 inserted = downloaded = 0
...@@ -164,27 +192,10 @@ class CacheSync(object): ...@@ -164,27 +192,10 @@ class CacheSync(object):
self.store_remote_file(entry['name'], url) self.store_remote_file(entry['name'], url)
downloaded += 1 downloaded += 1
return inserted, downloaded self._output.addMessage(__file__, -1, 'INFO',
'Inserted %d entries in the cache,'
def store_remote_file(self, name, url): ' downloaded %d files.' \
""" % (inserted, downloaded))
Download a remote file located at {url} and store it as {name}. self._output.addMessage(__file__, -1, 'INFO', 'Finished cache sync')
@arg name: Name to store the file under.
@type name: string
@arg url: Url to the remote file.
@type url: string
"""
if not re.match('^[\da-zA-Z\._-]+$', name):
return
# Download remote data
handle = urllib2.urlopen(url)
data = handle.read()
handle.close()
# Store remote data return inserted, downloaded
retriever = Retriever.GenBankRetriever(self._config.Retriever,
self._output,
self._database)
retriever.write(data, name, 0)
...@@ -885,7 +885,10 @@ class MutalyzerService(DefinitionBase): ...@@ -885,7 +885,10 @@ class MutalyzerService(DefinitionBase):
@soap(DateTime, _returns = Array(CacheEntry)) @soap(DateTime, _returns = Array(CacheEntry))
def getCache(self, created_since=None): def getCache(self, created_since=None):
""" """
Todo: documentation. Get a list of entries from the local cache created since given date.
This method is intended to be used by Mutalyzer itself to synchronize
the cache between installations on different servers.
""" """
output = Output(__file__, self._config.Output) output = Output(__file__, self._config.Output)
...@@ -893,22 +896,16 @@ class MutalyzerService(DefinitionBase): ...@@ -893,22 +896,16 @@ class MutalyzerService(DefinitionBase):
'Received request getCache') 'Received request getCache')
database = Db.Cache(self._config.Db) database = Db.Cache(self._config.Db)
sync = CacheSync(self._config, output, database) sync = CacheSync(self._config.Retriever, output, database)
cache = sync.local_cache(created_since) cache = sync.local_cache(created_since)
def cache_entry_to_soap(entry): def cache_entry_to_soap(entry):
e = CacheEntry() e = CacheEntry()
(e.name, for attr in ('name', 'gi', 'hash', 'chromosomeName',
e.gi, 'chromosomeStart', 'chromosomeStop',
e.hash, 'chromosomeOrientation', 'url', 'created', 'cached'):
e.chromosomeName, setattr(e, attr, entry[attr])
e.chromosomeStart,
e.chromosomeStop,
e.chromosomeOrientation,
e.url,
e.created,
e.cached) = entry
return e return e
output.addMessage(__file__, -1, 'INFO', output.addMessage(__file__, -1, 'INFO',
......
...@@ -18,11 +18,11 @@ setup( ...@@ -18,11 +18,11 @@ setup(
packages=find_packages(exclude=['doc', 'extras', 'tests']), packages=find_packages(exclude=['doc', 'extras', 'tests']),
include_package_data=True, include_package_data=True,
scripts=['bin/mutalyzer', scripts=['bin/mutalyzer',
'bin/mutalyzer-cache-sync',
'bin/mutalyzer-batchd', 'bin/mutalyzer-batchd',
'bin/mutalyzer-cache-sync',
'bin/mutalyzer-ucsc-update', 'bin/mutalyzer-ucsc-update',
'bin/mutalyzer-website.wsgi', 'bin/mutalyzer-webservice.wsgi',
'bin/mutalyzer-webservice.wsgi'], 'bin/mutalyzer-website.wsgi'],
zip_safe=False zip_safe=False
) )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment