From 7e0db4976743e8054e09a036d8782b333d1007fd Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Fri, 30 Oct 2015 15:39:17 +0100
Subject: [PATCH] Process batch jobs grouped by email address

We previously processed batch jobs round robin, i.e., one item
for each job per round. This is fair from the job point of view,
but not from the user point of view when one user has many jobs.

We now process batch jobs one item for each user per round,
where we pick the oldest job if a user has more than one. Users
are defined by their email address.

Batch jobs submitted via the webservices all have the same email
address, so they are effectively throttled as if all from the
same user. Adapting the webservices to also allow setting an
email address is future work.
---
 mutalyzer/Scheduler.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/mutalyzer/Scheduler.py b/mutalyzer/Scheduler.py
index e4816720..f82671a7 100644
--- a/mutalyzer/Scheduler.py
+++ b/mutalyzer/Scheduler.py
@@ -364,9 +364,18 @@ Mutalyzer batch scheduler""" % url)
         refers to the reason of alteration / skip.
         """
         while not self.stopped():
-            batch_jobs = BatchJob.query
-
-            if batch_jobs.count() == 0:
+            # Group batch jobs by email address and retrieve the oldest for
+            # each address. This improves fairness when certain users have
+            # many jobs.
+            # Note that batch jobs submitted via the webservices all have the
+            # same email address, so they are effectively throttled as if all
+            # from the same user. Adapting the webservices to also allow
+            # setting an email address is future work.
+            batch_jobs = BatchJob.query.filter(BatchJob.id.in_(
+                session.query(func.min(BatchJob.id)).group_by(BatchJob.email))
+            ).all()
+
+            if len(batch_jobs) == 0:
                 break
 
             for batch_job in batch_jobs:
-- 
GitLab