Skip to content
Snippets Groups Projects
Commit 27301229 authored by Vermaat's avatar Vermaat
Browse files

Do not cleanup the cache during request handling

Previously, Mutalyzer would after writing any file check the cache size
and start removing files while it exceeded the maximum. However, this
caused long delays in case many files had to be removed (it would re-
calculate the total size after each removal).

Following the principle of separating concerns, this is now handled by
a separate script on our production servers, which uses the inotifywait
tool to cleanup the cache whenever files are added to it. It also
doesn't suffer from the performance problem.

Note that this removes the `MAX_CACHE_SIZE` configuration setting.

Fixes #18
parent 42607fd4
No related branches found
No related tags found
No related merge requests found
......@@ -47,8 +47,6 @@ class Retriever(object) :
configuration file to initialise the class private variables.
Private methods:
- _foldersize(folder) ; Return the size of a folder.
- _cleancache() ; Keep the cache at a maximum size.
- _nametofile(name) ; Convert a name to a filename.
- _write(raw_data, filename, extract) ; Write a record to a file.
- _calcHash(content) ; Calculate the md5sum of 'content'.
......@@ -81,54 +79,6 @@ class Retriever(object) :
self.fileType = None
#__init__
def _foldersize(self, folder) :
"""
Return the size of a folder in bytes.
@arg folder: Name of a directory
@type folder: string
@return: The size of the directory
@rtype: integer
"""
folder_size = 0
for (path, dirs, files) in os.walk(folder) :
for fileName in files :
folder_size += os.path.getsize(os.path.join(path, fileName))
return folder_size
#_foldersize
def _cleancache(self) :
"""
Keep removing files until the size of the cache is less than the
maximum size.
First, the cache checked for its size, if it exceeds the maximum
size the ``oldest'' files are deleted. Note that accessing a file
makes it ``new''.
"""
if self._foldersize(settings.CACHE_DIR) < settings.MAX_CACHE_SIZE:
return
# Build a list of files sorted by access time.
cachelist = []
for (path, dirs, files) in os.walk(settings.CACHE_DIR) :
for filename in files :
filepath = os.path.join(path, filename)
cachelist.append(
(os.stat(filepath).st_atime, filepath))
cachelist.sort()
# Now start removing pairs of files until the size of the folder is
# small enough (or until the list is exhausted).
for i in range(0, len(cachelist)) :
os.remove(cachelist[i][1])
if self._foldersize(settings.CACHE_DIR) < settings.MAX_CACHE_SIZE:
break;
#for
#_cleancache
def _nametofile(self, name) :
"""
Convert an accession number to a filename.
......@@ -162,9 +112,6 @@ class Retriever(object) :
out_handle.write(data)
out_handle.close()
# Since we put something in the cache, check if it needs cleaning.
self._cleancache()
return out_handle.name # return the full path to the file
#_write
......
......@@ -19,9 +19,6 @@ EMAIL = 'mutalyzer@humgen.nl'
# reference files from NCBI or user) and batch job results.
CACHE_DIR = '/tmp'
# Maximum size of the cache directory (in bytes).
MAX_CACHE_SIZE = 50 * 1048576 # 50 MB
# Maximum size for uploaded and downloaded files (in bytes).
MAX_FILE_SIZE = 10 * 1048576 # 10 MB
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment