diff --git a/mutalyzer/entrypoints/admin.py b/mutalyzer/entrypoints/admin.py index c235a93f39532434a42b6c17324b2357341cf281..f9eeff882b550e0e22302df34d26ca9133cb7a47 100644 --- a/mutalyzer/entrypoints/admin.py +++ b/mutalyzer/entrypoints/admin.py @@ -14,13 +14,14 @@ import os import alembic.command import alembic.config from alembic.migration import MigrationContext +import sqlalchemy from sqlalchemy.orm.exc import NoResultFound from . import _cli_string from .. import announce from .. import db from ..db import session -from ..db.models import Assembly, Chromosome +from ..db.models import Assembly, BatchJob, BatchQueueItem, Chromosome from .. import mapping from .. import output from .. import sync @@ -151,6 +152,50 @@ def sync_cache(wsdl_url, url_template, history=7): % (inserted, downloaded)) +def list_batch_jobs(): + """ + List batch jobs. + """ + # For getting all batch jobs and their item counts, the following query + # might be more obvious at first thought. However, our current query below + # turns out to be more than twice as fast (and shorter). + # + # sq = session.query( + # BatchQueueItem.batch_job_id, + # sqlalchemy.func.count(BatchQueueItem.id).label('count') + # ).group_by(BatchQueueItem.batch_job_id).subquery() + # session.query( + # BatchJob, + # sq.c.count + # ).join(sq, BatchJob.id == sq.c.batch_job_id) + # + batch_jobs_with_counts = session.query( + BatchJob, + session.query(sqlalchemy.func.count('*')).filter( + BatchQueueItem.batch_job_id == BatchJob.id + ).label('count') + ).order_by(BatchJob.added.asc()).all() + + lengths = { + 'id_len': max(len(str(j.id)) for j, _ in batch_jobs_with_counts), + 'type_len': max(len(j.job_type) for j, _ in batch_jobs_with_counts), + 'count_len': max(len(str(c)) for _, c in batch_jobs_with_counts), + 'email_len': max(len(j.email) for j, _ in batch_jobs_with_counts) + } + + template = ('{id:{id_len}} {type:<{type_len}} {added:%Y-%m-%d %H:%M:%S}' + ' {count:<{count_len}} {email:{email_len}}') + + for batch_job, count in batch_jobs_with_counts: + print template.format( + id=batch_job.id, + type=batch_job.job_type, + added=batch_job.added, + count=count, + email=batch_job.email, + **lengths) + + def set_announcement(body, url=None): """ Set announcement to show to the user. @@ -309,6 +354,12 @@ def main(): description=unset_announcement.__doc__.split('\n\n')[0]) p.set_defaults(func=unset_announcement) + # Subparser 'batch-jobs'. + p = subparsers.add_parser( + 'batch-jobs', help='list batch jobs', + description=list_batch_jobs.__doc__.split('\n\n')[0]) + p.set_defaults(func=list_batch_jobs) + # Subparser 'sync-cache'. p = subparsers.add_parser( 'sync-cache', help='synchronize cache with remote Mutalyzer', diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index a256831f1de687878e26fa1ecad06f23e256f94c..8281da387b4049ffbbdcc7d1410d07cf00d4db66 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -71,14 +71,20 @@ class MutalyzerService(ServiceBase): super(MutalyzerService, self).__init__(environ) #__init__ - @srpc(Mandatory.ByteArray, Unicode, Unicode, _returns=Unicode) - def submitBatchJob(data, process='NameChecker', argument=''): + @srpc(Mandatory.ByteArray, Unicode, Unicode, Unicode, _returns=Unicode) + def submitBatchJob(data, process='NameChecker', argument='', email=None): """ Submit a batch job. Input and output file formats for batch jobs are explained on the website <https://mutalyzer.nl/batch>. + Batch jobs are processed using round-robin scheduling grouped by email + address. Per email address, jobs are processed sequentially in order + of submission. Jobs with no email address specified end up in a shared + group. This means your job is likely to be processed sooner if you + provide an email address. + On error an exception is raised: - detail: Human readable description of the error. - faultstring: A code to indicate the type of error. @@ -90,6 +96,8 @@ class MutalyzerService(ServiceBase): (default), SyntaxChecker, PositionConverter, SnpConverter. @arg argument: Additional argument. Currently only used if batch_type is PositionConverter, denoting the human genome build. + @arg email: Optional email address. Notification of job completion + will be sent to this address. @return: Batch job identifier. """ @@ -139,7 +147,7 @@ class MutalyzerService(ServiceBase): if job is None: raise Fault('EPARSE', 'Could not parse input file, please check your file format.') - result_id = scheduler.addJob('job@webservice', job, columns, + result_id = scheduler.addJob(email or 'job@webservice', job, columns, batch_types[process], argument) return result_id diff --git a/mutalyzer/website/templates/batch-job-progress.html b/mutalyzer/website/templates/batch-job-progress.html index 7f64c01fd0bcb95029c71e732da60fec3b4546a0..77511f9dd300f7f862eb95e2752dadb933ce38fb 100644 --- a/mutalyzer/website/templates/batch-job-progress.html +++ b/mutalyzer/website/templates/batch-job-progress.html @@ -10,6 +10,9 @@ <div id="if_items_left"{% if not items_left %} style="display:none"{% endif %}> <p>Your job is in progress with <span id="items_left">{{ items_left }}</span> items remaining.</p> <p>You will receive an email when the job is finished.</p> + <p>Please note that your jobs are processed in order of submission, + meaning you will not see any progress on this job until all your earlier + jobs have finished.</p> </div> <div id="ifnot_items_left"{% if items_left %} style="display:none"{% endif %}> <p>Your job is finished, please download the results: diff --git a/mutalyzer/website/templates/batch-jobs.html b/mutalyzer/website/templates/batch-jobs.html index 4b4db65719e0f514114f19e4d7c55f4e51007991..6955a18a9e9bebf799397a434fe347382fa331bc 100644 --- a/mutalyzer/website/templates/batch-jobs.html +++ b/mutalyzer/website/templates/batch-jobs.html @@ -79,6 +79,8 @@ <p><a href="{{ url_for('.downloads', filename='batchtestnew.txt') }}">Download new style example file</a></p> <h4>Old Style</h4> + <p><strong>Deprecation warning:</strong> Please don't use this format, it + is deprecated and will not be supported in the future.</p> <p>This file format has a header-row, which consists of three tab delimited fields. In each following row the corressponding data is also tab delimited.</p>