Skip to content
Snippets Groups Projects
Commit dfa57a20 authored by Vermaat's avatar Vermaat
Browse files

Basic working cache sync.

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/gbinfo-sync-branch@316 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent a61b28b7
No related branches found
No related tags found
No related merge requests found
...@@ -48,4 +48,8 @@ if cache: ...@@ -48,4 +48,8 @@ if cache:
if 'url' in r: if 'url' in r:
print r.url print r.url
print r.created print r.created
if r.cached:
print 'cached'
else:
print 'not cached'
print print
...@@ -1035,10 +1035,10 @@ class Cache(Db) : ...@@ -1035,10 +1035,10 @@ class Cache(Db) :
return None return None
#getGBFromGI #getGBFromGI
def getGB(self, created_since): def getGBSince(self, created_since):
""" """
Get all accession number entries starting with creation date Get all accession number entries with creation date {created_since}
{created_since}. or later.
SQL tables from internalDb: SQL tables from internalDb:
- GBInfo ; Information about cached and uploaded GenBank files. - GBInfo ; Information about cached and uploaded GenBank files.
...@@ -1057,7 +1057,7 @@ class Cache(Db) : ...@@ -1057,7 +1057,7 @@ class Cache(Db) :
""", created_since """, created_since
return self.query(statement) return self.query(statement)
#getGB #getGBSince
def getLoc(self, accNo) : def getLoc(self, accNo) :
""" """
......
...@@ -233,5 +233,5 @@ class CacheEntry(ClassModel): ...@@ -233,5 +233,5 @@ class CacheEntry(ClassModel):
chromosomeOrientation = Integer chromosomeOrientation = Integer
url = String url = String
created = Mandatory.DateTime created = Mandatory.DateTime
cached = Mandatory.Boolean cached = String
#CacheEntry #CacheEntry
...@@ -6,9 +6,13 @@ Module for synchronizing the database with other Mutalyzer instances. ...@@ -6,9 +6,13 @@ Module for synchronizing the database with other Mutalyzer instances.
from mutalyzer.util import monkey_patch_suds; monkey_patch_suds() from mutalyzer.util import monkey_patch_suds; monkey_patch_suds()
import os import os
import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
import urllib2
from suds.client import Client from suds.client import Client
from mutalyzer import Retriever
DEFAULT_CREATED_SINCE_DAYS = 7 DEFAULT_CREATED_SINCE_DAYS = 7
...@@ -17,11 +21,12 @@ class CacheSync(object): ...@@ -17,11 +21,12 @@ class CacheSync(object):
""" """
Todo. Todo.
""" """
def __init__(self, config, database): def __init__(self, config, output, database):
""" """
Todo. Todo.
""" """
self._config = config self._config = config
self._output = output
self._database = database self._database = database
def local_cache(self, created_since=None): def local_cache(self, created_since=None):
...@@ -31,16 +36,21 @@ class CacheSync(object): ...@@ -31,16 +36,21 @@ class CacheSync(object):
if not created_since: if not created_since:
created_since = datetime.today() - \ created_since = datetime.today() - \
timedelta(days=DEFAULT_CREATED_SINCE_DAYS) timedelta(days=DEFAULT_CREATED_SINCE_DAYS)
cache = self._database.getGB(created_since) cache = self._database.getGBSince(created_since)
entries = [] entries = []
# For each entry, check if it is cached on our filesystem. # For each entry, check if it is cached on our filesystem.
# Todo: refactor # Todo: refactor
for entry in cache: for entry in cache:
file_path = os.path.join(self._config.cache, '%s.bz2' % entry[0])
e = list(entry) e = list(entry)
e.append(os.path.isfile(file_path)) # Note that this way we only include Genbank files, not LRG files.
file_name = '%s.gb.bz2' % entry[0]
file_path = os.path.join(self._config.Retriever.cache, file_name)
if os.path.isfile(file_path):
e.append('%s.gb' % entry[0])
else:
e.append(None)
entries.append(e) entries.append(e)
return entries return entries
...@@ -59,20 +69,21 @@ class CacheSync(object): ...@@ -59,20 +69,21 @@ class CacheSync(object):
""" """
Create a nice dictionary out of the CacheEntry object. Create a nice dictionary out of the CacheEntry object.
""" """
entry_dict = {'name': entry.name, entry_dict = {'name': str(entry.name),
'hash': entry.hash, 'hash': str(entry.hash),
'created': entry.created, 'created': entry.created}
'cached': bool(entry.cached)} for attribute in ('gi', 'chromosomeName', 'url', 'cached'):
for attribute in ('gi', 'chromosomeName', 'chromosomeStart' entry_dict[attribute] = str(entry[attribute]) \
'chromosomeStop', 'chromosomeOrientation', if attribute in entry else None
'url'): for attribute in ('chromosomeStart', 'chromosomeStop',
entry_dict[attribute] = entry[attribute] \ 'chromosomeOrientation'):
entry_dict[attribute] = int(entry[attribute]) \
if attribute in entry else None if attribute in entry else None
return entry_dict return entry_dict
return map(cache_entry_from_soap, cache.CacheEntry) return map(cache_entry_from_soap, cache.CacheEntry)
def sync_with_remote(self, remote_wsdl, remote_cache, created_since=None): def sync_with_remote(self, remote_wsdl, url_template, created_since=None):
""" """
Todo. Todo.
""" """
...@@ -81,18 +92,38 @@ class CacheSync(object): ...@@ -81,18 +92,38 @@ class CacheSync(object):
for entry in remote_cache: for entry in remote_cache:
if self._database.getHash(entry['name']): if self._database.getHash(entry['name']):
continue continue
#self._database.insertGB(entry['name'], if self._database.getGBFromHash(entry['hash']):
# entry['gi'], continue
# entry['hash'], if entry['gi'] and self._database.getGBFromGI(entry['gi']):
# entry['chromosomeName'], continue
# entry['chromosomeStart'], self._database.insertGB(entry['name'],
# entry['chromosomeStop'], entry['gi'],
# entry['chromosomeOrientation'], entry['hash'],
# entry['url']) entry['chromosomeName'],
#print 'inserting %s' % entry['name'] entry['chromosomeStart'],
#print entry entry['chromosomeStop'],
entry['chromosomeOrientation'],
entry['url'])
print 'inserting %s' % entry['name']
if not entry['chromosomeName'] and not entry['url']: if not entry['chromosomeName'] and not entry['url']:
if entry['cached']: if entry['cached']:
print 'downloading file from remote cache: %s' % (remote_cache % entry['name']) print 'downloading file from remote cache: %s' % (url_template % str(entry['cached']))
#else: self.store_remote_file(entry['name'], url_template % entry['cached'])
#print 'cannot download this file from remote cache' else:
print 'cannot download this file from remote cache'
def store_remote_file(self, name, url):
"""
Todo.
"""
if not re.match('^[\da-zA-Z\._-]+$', name):
return
# Download remote data
handle = urllib2.urlopen(url)
data = handle.read()
handle.close()
# Store remote data
retriever = Retriever.GenBankRetriever(self._config.Retriever, self._output, self._database)
retriever.write(data, name, 0)
...@@ -893,7 +893,7 @@ class MutalyzerService(DefinitionBase): ...@@ -893,7 +893,7 @@ class MutalyzerService(DefinitionBase):
'Received request getCache') 'Received request getCache')
database = Db.Cache(self._config.Db) database = Db.Cache(self._config.Db)
sync = CacheSync(self._config.Sync, database) sync = CacheSync(self._config, output, database)
cache = sync.local_cache(created_since) cache = sync.local_cache(created_since)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment