diff --git a/mutalyzer/Db.py b/mutalyzer/Db.py index 3c45ac47435eefb6cb8f8f8c2c1bda902357d433..376221d6480c9871648b67c0e04e73c1b832e943 100644 --- a/mutalyzer/Db.py +++ b/mutalyzer/Db.py @@ -128,42 +128,3 @@ class Db(): return result #query #Db - - -class Counter(Db): - """ - Database functions for the service counters. - - Special methods: - - __init__() ; Initialise the class. - - Inherited methods from Db: - - query(statement) ; General query function. - - SQL tables from internalDb: - - Counter ; Service count information. - """ - def __init__(self): - """ - Initialise the Db parent class. Use the internalDb. - """ - Db.__init__(self, settings.MYSQL_DATABASE, - settings.MYSQL_USER, - settings.MYSQL_HOST) - - def increment(self, service, interface): - """ - Increment the counter for given service and interface. - - SQL tables from internalDb: - - Counter ; Service count information. - """ - statement = """ - UPDATE `Counter` SET - `count` = `count` + 1 - WHERE `service` = %s - AND `interface` = %s; - """, (service, interface) - - self.query(statement) -#Counter diff --git a/mutalyzer/Scheduler.py b/mutalyzer/Scheduler.py index b589c0db17411203a2ccf0dcc2a8b78265eee4d1..77e7f311b91ce2121ef2b46f3aff7019f93e4a72 100644 --- a/mutalyzer/Scheduler.py +++ b/mutalyzer/Scheduler.py @@ -24,6 +24,7 @@ import mutalyzer from mutalyzer.config import settings from mutalyzer.db import queries, session from mutalyzer.db.models import Assembly, BatchJob, BatchQueueItem +from mutalyzer import stats from mutalyzer import variantchecker from mutalyzer.grammar import Grammar from mutalyzer.output import Output @@ -298,7 +299,7 @@ Mutalyzer batch scheduler""" % url) #for #_updateDbFlags - def process(self, counter): + def process(self): """ Start the mutalyzer Batch Processing. This method retrieves all jobs jobs from the database and processes them in a roundrobin fashion. @@ -358,13 +359,13 @@ Mutalyzer batch scheduler""" % url) item, flags = batch_queue_item if batch_job.job_type == 'NameChecker': - self._processNameBatch(batch_job, item, flags, counter) + self._processNameBatch(batch_job, item, flags) elif batch_job.job_type == 'SyntaxChecker': - self._processSyntaxCheck(batch_job, item, flags, counter) + self._processSyntaxCheck(batch_job, item, flags) elif batch_job.job_type == 'PositionConverter': - self._processConversion(batch_job, item, flags, counter) + self._processConversion(batch_job, item, flags) elif batch_job.job_type == 'SnpConverter': - self._processSNP(batch_job, item, flags, counter) + self._processSNP(batch_job, item, flags) else: # Unknown job type, should never happen. # Todo: Log some screaming message. @@ -380,7 +381,7 @@ Mutalyzer batch scheduler""" % url) session.commit() #process - def _processNameBatch(self, batch_job, cmd, flags, counter) : + def _processNameBatch(self, batch_job, cmd, flags): """ Process an entry from the Name Batch, write the results to the job-file. If an Exception is raised, catch and continue. @@ -399,7 +400,7 @@ Mutalyzer batch scheduler""" % url) O.addMessage(__file__, -1, "INFO", "Received NameChecker batchvariant " + cmd) - counter.increment('namecheck', 'batch') + stats.increment_counter('name-checker/batch') #Read out the flags skip = self.__processFlags(O, flags) @@ -468,7 +469,7 @@ Mutalyzer batch scheduler""" % url) "Finished NameChecker batchvariant " + cmd) #_processNameBatch - def _processSyntaxCheck(self, batch_job, cmd, flags, counter) : + def _processSyntaxCheck(self, batch_job, cmd, flags): """ Process an entry from the Syntax Check, write the results to the job-file. @@ -489,7 +490,7 @@ Mutalyzer batch scheduler""" % url) output.addMessage(__file__, -1, "INFO", "Received SyntaxChecker batchvariant " + cmd) - counter.increment('syntaxcheck', 'batch') + stats.increment_counter('syntax-checker/batch') skip = self.__processFlags(output, flags) #Process @@ -527,7 +528,7 @@ Mutalyzer batch scheduler""" % url) "Finished SyntaxChecker batchvariant " + cmd) #_processSyntaxCheck - def _processConversion(self, batch_job, cmd, flags, counter) : + def _processConversion(self, batch_job, cmd, flags): """ Process an entry from the Position Converter, write the results to the job-file. The Position Converter is wrapped in a try except @@ -555,7 +556,7 @@ Mutalyzer batch scheduler""" % url) O.addMessage(__file__, -1, "INFO", "Received PositionConverter batchvariant " + cmd) - counter.increment('positionconvert', 'batch') + stats.increment_counter('position-converter/batch') skip = self.__processFlags(O, flags) if not skip : @@ -634,7 +635,7 @@ Mutalyzer batch scheduler""" % url) #_processConversion - def _processSNP(self, batch_job, cmd, flags, counter) : + def _processSNP(self, batch_job, cmd, flags): """ Process an entry from the SNP converter Batch, write the results to the job-file. If an Exception is raised, catch and continue. @@ -653,7 +654,7 @@ Mutalyzer batch scheduler""" % url) O.addMessage(__file__, -1, "INFO", "Received SNP converter batch rs" + cmd) - counter.increment('snpconvert', 'batch') + stats.increment_counter('snp-converter/batch') #Read out the flags # Todo: Do something with the flags? diff --git a/mutalyzer/config/default_settings.py b/mutalyzer/config/default_settings.py index e2d93c339ef02047e4e9ae2ef0274ec6dd0c0caa..108ad84dacd5cd67dd03dd069f1082691f269623 100644 --- a/mutalyzer/config/default_settings.py +++ b/mutalyzer/config/default_settings.py @@ -26,9 +26,8 @@ MAX_CACHE_SIZE = 50 * 1048576 # 50 MB # Maximum size for uploaded and downloaded files (in bytes). MAX_FILE_SIZE = 10 * 1048576 # 10 MB -# Redis connection URI (can be any redis-py connection URI). Redis is used -# for keeping statistics counters. Setting this to `None`, will silently -# yield a mock Redis. +# Redis connection URI (can be any redis-py connection URI). Set to `None` to +# silently use a mock Redis. Redis is only used for non-essential features. REDIS_URI = None # Database connection URI (can be any SQLAlchemy connection URI). diff --git a/mutalyzer/entrypoints/batch_processor.py b/mutalyzer/entrypoints/batch_processor.py index 0a34d813e489a093d47e90aa79151953ff78fe5a..ff2ac02bc8d668b48e56582b6ec4344e06ee9367 100644 --- a/mutalyzer/entrypoints/batch_processor.py +++ b/mutalyzer/entrypoints/batch_processor.py @@ -12,7 +12,6 @@ import sys import time from .. import config -from .. import Db from .. import Scheduler @@ -20,7 +19,6 @@ def process(): """ Run forever in a loop processing scheduled batch jobs. """ - counter = Db.Counter() scheduler = Scheduler.Scheduler() def handle_exit(signum, stack_frame): @@ -37,7 +35,7 @@ def process(): while True: # Process batch jobs. - scheduler.process(counter) + scheduler.process() if scheduler.stopped(): break # Wait a bit and process any possible new jobs. diff --git a/mutalyzer/redisclient.py b/mutalyzer/redisclient.py index 45a4fa2437bd766226c11172d6187654445905a7..8b3138a8018ca3f5152cd6f1abf91b6d85f36415 100644 --- a/mutalyzer/redisclient.py +++ b/mutalyzer/redisclient.py @@ -9,12 +9,18 @@ simple and just use one global connection pool as created by `StrictRedis`. If the `REDIS_URI` configuration setting is `None`, we silently instantiate a mock interface to Redis. + +.. todo:: We currently use Redis for storing stat counters, but there are many + opportunities to use it for caching. For example, which version numbers + are available for a certain accession number, which is a costly operation + and something we implemented quite a hack for to optimize in the batch + name checker (the whole alter thing with batchflags). """ import redis -from mutalyzer import settings +from mutalyzer.config import settings from mutalyzer import util @@ -34,4 +40,4 @@ class LazyClient(util.LazyObject): self._wrapped = redis.StrictRedis.from_url(settings.REDIS_URI) -client = LazyCLient() +client = LazyClient() diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index fd14d3901cba9f002f58f95c0138d14a8837817f..86cc0a161520fd1815d54f8559c19b5f633320db 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -28,6 +28,7 @@ from mutalyzer.db.models import (Assembly, Chromosome, BatchJob, from mutalyzer.output import Output from mutalyzer.grammar import Grammar from mutalyzer.sync import CacheSync +from mutalyzer import stats from mutalyzer import variantchecker from mutalyzer import Db from mutalyzer.mapping import Converter @@ -73,8 +74,7 @@ class MutalyzerService(ServiceBase): """ output = Output(__file__) - counter = Db.Counter() - counter.increment('batchjob', 'webservice') + stats.increment_counter('batch-job/webservice') scheduler = Scheduler.Scheduler() file_instance = File.File(output) @@ -678,8 +678,7 @@ class MutalyzerService(ServiceBase): O.addMessage(__file__, -1, "INFO", "Received request cTogConversion(%s %s)" % (build, variant)) - counter = Db.Counter() - counter.increment('positionconvert', 'webservice') + stats.increment_counter('position-converter/webservice') assembly = Assembly.query.filter(or_(Assembly.name == build, Assembly.alias == build)).first() @@ -722,8 +721,7 @@ class MutalyzerService(ServiceBase): output.addMessage(__file__, -1, "INFO", "Received request checkSyntax(%s)" % (variant)) - counter = Db.Counter() - counter.increment('checksyntax', 'webservice') + stats.increment_counter('syntax-checker/webservice') if not variant : output.addMessage(__file__, 4, "EARG", "EARG no variant") @@ -800,8 +798,7 @@ class MutalyzerService(ServiceBase): O.addMessage(__file__, -1, "INFO", "Received request runMutalyzer(%s)" % (variant)) - counter = Db.Counter() - counter.increment('namecheck', 'webservice') + stats.increment_counter('name-checker/webservice') variantchecker.check_variant(variant, O) @@ -1238,8 +1235,7 @@ class MutalyzerService(ServiceBase): output.addMessage(__file__, -1, 'INFO', 'Received request getdbSNPDescription(%s)' % rs_id) - counter = Db.Counter() - counter.increment('snpconvert', 'webservice') + stats.increment_counter('snp-converter/webservice') retriever = Retriever.Retriever(output) descriptions = retriever.snpConvert(rs_id) diff --git a/mutalyzer/stats.py b/mutalyzer/stats.py new file mode 100644 index 0000000000000000000000000000000000000000..a9589ea5802fd7560fb22bf99c4fed8c814c6ff3 --- /dev/null +++ b/mutalyzer/stats.py @@ -0,0 +1,48 @@ +""" +Simple counters that keep track of how often certain Mutalyzer functionality +is used. Backed by Redis. + +For a given counter, we maintain the total count and the count per minute, +hour, and day. The total count is maintained indefinite, the others +automatically expire after some predefined time. + +.. todo:: Implement querying of counters and building a simple dashboard-like + user inteface for live viewing of the counters. Have a look at `this post + <http://stackoverflow.com/questions/10155398/getting-multiple-key-values-from-redis>` + for some possible implementation ideas. + +We might want to consider using something like `Kairos +<https://github.com/agoragames/kairos>`_ instead of growing on top of this +module much more. +""" + + +import time + +from mutalyzer.redisclient import client +from mutalyzer.config import settings + + +# Label, bucket definition, expiration time in seconds. +INTERVALS = [('minute', '%Y-%m-%d_%H:%M', 60 * 60), + ('hour', '%Y-%m-%d_%H', 60 * 60 * 24), + ('day', '%Y-%m-%d', 60 * 60 * 24 * 30)] + + +def increment_counter(counter): + """ + Increment the specified counter. + """ + pipe = client.pipeline(transaction=False) + pipe.incr('counter:%s:total' % counter) + + for label, bucket, expire in INTERVALS: + key = 'counter:%s:%s:%s' % (counter, label, time.strftime(bucket)) + pipe.incr(key) + + # It's safe to just keep on expiring the counter, even if it already + # had an expiration, since it is bounded by the current day. We don't + # really mind at what time of the day the expiration will be exactly. + pipe.expire(key, expire) + + pipe.execute() diff --git a/mutalyzer/website.py b/mutalyzer/website.py index 13825cd20939ef453ac01151f4d796f81bdd3e36..63dd324782f55eace5a0d6caf70b2dc2e5ec23d0 100644 --- a/mutalyzer/website.py +++ b/mutalyzer/website.py @@ -33,7 +33,7 @@ from spyne.server.http import HttpBase from sqlalchemy import and_, or_ import mutalyzer -from mutalyzer import util +from mutalyzer import stats, util from mutalyzer.config import settings from mutalyzer.db import session from mutalyzer.db.models import Assembly, BatchJob, BatchQueueItem @@ -356,8 +356,7 @@ class SyntaxCheck: output.addMessage(__file__, -1, 'INFO', 'Received request syntaxCheck(%s) from %s' % (i.variant, IP)) - counter = Db.Counter() - counter.increment('syntaxcheck', 'website') + stats.increment_counter('syntax-checker/website') variant = i.variant or '' if variant.find(',') >= 0: @@ -431,8 +430,7 @@ class Snp: output.addMessage(__file__, -1, 'INFO', 'Received request snpConvert(%s) from %s' % (rs_id, IP)) - counter = Db.Counter() - counter.increment('snpconvert', 'website') + stats.increment_counter('snp-converter/website') retriever = Retriever.Retriever(output) descriptions = retriever.snpConvert(rs_id) @@ -508,8 +506,7 @@ class PositionConverter: 'Received request positionConverter(%s, %s) from %s' % ( assembly_name_or_alias, variant, IP)) - counter = Db.Counter() - counter.increment('positionconvert', 'website') + stats.increment_counter('position-converter/website') assembly = Assembly.query.filter( or_(Assembly.name == assembly_name_or_alias, @@ -726,8 +723,7 @@ class Check: output.addMessage(__file__, -1, 'INFO', 'Received variant %s from %s' % (name, web.ctx['ip'])) - counter = Db.Counter() - counter.increment('namecheck', 'website') + stats.increment_counter('name-checker/website') # Todo: The following is probably a problem elsewhere too. # We stringify the variant, because a unicode string crashes @@ -1105,9 +1101,7 @@ class BatchChecker: # to the truth value False, so 'if inFile: ...' is not useful. if email and isEMail(email) and not inFile == None and inFile.file: - - counter = Db.Counter() - counter.increment('batchjob', 'website') + stats.increment_counter('batch-job/website') # Todo: These error messages could be delivered trough a template if not 'CONTENT_LENGTH' in web.ctx.environ.keys(): diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 4bc3b0477dda2ad04b4c7e2a4cc3902ac67ef94d..9c7467f2576b2fb458887c38a18273e556675dde 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -11,7 +11,6 @@ from nose.tools import * from mutalyzer.config import settings from mutalyzer.db.models import BatchJob, BatchQueueItem -from mutalyzer import Db from mutalyzer import File from mutalyzer import output from mutalyzer import Scheduler @@ -45,7 +44,7 @@ class TestScheduler(): .count() assert_equal(left, len(variants)) - scheduler.process(Db.Counter()) + scheduler.process() left = BatchQueueItem.query \ .join(BatchJob) \ diff --git a/tests/utils.py b/tests/utils.py index 7e84ce19ad8e8d9152ece6c7e4f29d288cf0b278..93c6aab52c3684d40e9316f0411994aaacd07d04 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -20,6 +20,7 @@ def create_test_environment(database=False): DEBUG = False, TESTING = True, CACHE_DIR = tempfile.mkdtemp(), + REDIS_URI = None, DATABASE_URI = 'sqlite://', LOG_FILE = log_filename))