From 273012297382e4eb840955fec942036c053d8f87 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Sat, 6 Sep 2014 02:18:39 +0200 Subject: [PATCH] Do not cleanup the cache during request handling Previously, Mutalyzer would after writing any file check the cache size and start removing files while it exceeded the maximum. However, this caused long delays in case many files had to be removed (it would re- calculate the total size after each removal). Following the principle of separating concerns, this is now handled by a separate script on our production servers, which uses the inotifywait tool to cleanup the cache whenever files are added to it. It also doesn't suffer from the performance problem. Note that this removes the `MAX_CACHE_SIZE` configuration setting. Fixes #18 --- mutalyzer/Retriever.py | 53 ---------------------------- mutalyzer/config/default_settings.py | 3 -- 2 files changed, 56 deletions(-) diff --git a/mutalyzer/Retriever.py b/mutalyzer/Retriever.py index 77da54e8..dd52c60a 100644 --- a/mutalyzer/Retriever.py +++ b/mutalyzer/Retriever.py @@ -47,8 +47,6 @@ class Retriever(object) : configuration file to initialise the class private variables. Private methods: - - _foldersize(folder) ; Return the size of a folder. - - _cleancache() ; Keep the cache at a maximum size. - _nametofile(name) ; Convert a name to a filename. - _write(raw_data, filename, extract) ; Write a record to a file. - _calcHash(content) ; Calculate the md5sum of 'content'. @@ -81,54 +79,6 @@ class Retriever(object) : self.fileType = None #__init__ - def _foldersize(self, folder) : - """ - Return the size of a folder in bytes. - - @arg folder: Name of a directory - @type folder: string - - @return: The size of the directory - @rtype: integer - """ - - folder_size = 0 - for (path, dirs, files) in os.walk(folder) : - for fileName in files : - folder_size += os.path.getsize(os.path.join(path, fileName)) - - return folder_size - #_foldersize - - def _cleancache(self) : - """ - Keep removing files until the size of the cache is less than the - maximum size. - First, the cache checked for its size, if it exceeds the maximum - size the ``oldest'' files are deleted. Note that accessing a file - makes it ``new''. - """ - if self._foldersize(settings.CACHE_DIR) < settings.MAX_CACHE_SIZE: - return - - # Build a list of files sorted by access time. - cachelist = [] - for (path, dirs, files) in os.walk(settings.CACHE_DIR) : - for filename in files : - filepath = os.path.join(path, filename) - cachelist.append( - (os.stat(filepath).st_atime, filepath)) - cachelist.sort() - - # Now start removing pairs of files until the size of the folder is - # small enough (or until the list is exhausted). - for i in range(0, len(cachelist)) : - os.remove(cachelist[i][1]) - if self._foldersize(settings.CACHE_DIR) < settings.MAX_CACHE_SIZE: - break; - #for - #_cleancache - def _nametofile(self, name) : """ Convert an accession number to a filename. @@ -162,9 +112,6 @@ class Retriever(object) : out_handle.write(data) out_handle.close() - # Since we put something in the cache, check if it needs cleaning. - self._cleancache() - return out_handle.name # return the full path to the file #_write diff --git a/mutalyzer/config/default_settings.py b/mutalyzer/config/default_settings.py index 8166f03d..43009e09 100644 --- a/mutalyzer/config/default_settings.py +++ b/mutalyzer/config/default_settings.py @@ -19,9 +19,6 @@ EMAIL = 'mutalyzer@humgen.nl' # reference files from NCBI or user) and batch job results. CACHE_DIR = '/tmp' -# Maximum size of the cache directory (in bytes). -MAX_CACHE_SIZE = 50 * 1048576 # 50 MB - # Maximum size for uploaded and downloaded files (in bytes). MAX_FILE_SIZE = 10 * 1048576 # 10 MB -- GitLab