diff --git a/.gitignore b/.gitignore index b59df5c30a75add3fb5155c1e392d91c87ae7149..3c4834e999090b3648e7c46d1d921fd651cd82a9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ /dist /mutalyzer.egg-info /static -/mutalyzer.conf +/settings.py diff --git a/mutalyzer/Db.py b/mutalyzer/Db.py index 9410e3b0b97fe37fba3cb3b1382b8119dc22429e..90b6d4f5ad8196cfc3ced73e3b33e76f483f266e 100644 --- a/mutalyzer/Db.py +++ b/mutalyzer/Db.py @@ -25,7 +25,7 @@ import warnings import MySQLdb from mutalyzer import util -from mutalyzer import config +from mutalyzer.config import settings # @@ -74,14 +74,11 @@ class Db(): is unfortunately not implemented in (most versions of) the Python MySQLdb module. Therefore we manually implement automatic reconnects in the query - method, but also optionally use the reconnect option from MySQLdb - if specified in the Mutalyzer configuration. + method. Also see Trac ticket #91. """ - kwargs = dict(user=self._user, db=self._database, host=self._host) - if config.get('autoReconnect'): - kwargs.update(reconnect=True) - self._connection = MySQLdb.connect(**kwargs) + self._connection = MySQLdb.connect( + user=self._user, db=self._database, host=self._host) #_connect def query(self, statement): @@ -209,7 +206,7 @@ class Mapping(Db) : @arg build: The version of the mapping database @type build: string """ - Db.__init__(self, build, config.get('LocalMySQLuser'), config.get('LocalMySQLhost')) + Db.__init__(self, build, settings.MYSQL_USER, settings.MYSQL_HOST) #__init__ def get_NM_version(self, mrnaAcc) : @@ -873,8 +870,8 @@ class Cache(Db) : """ Initialise the Db parent class. Use the internalDb. """ - Db.__init__(self, config.get('internalDb'), - config.get('LocalMySQLuser'), config.get('LocalMySQLhost')) + Db.__init__(self, settings.MYSQL_DATABASE, + settings.MYSQL_USER, settings.MYSQL_HOST) #__init__ def insertGB(self, accNo, GI, fileHash, ChrAccVer, ChrStart, @@ -1228,8 +1225,8 @@ class Cache(Db) : created >= DATE_SUB(CURDATE(), INTERVAL %s DAY)) ); """, (mrnaAcc, - config.get('proteinLinkNoneLifetime'), - config.get('proteinLinkLifetime')) + settings.PROTEIN_LINK_NONE_LIFETIME, + settings.PROTEIN_LINK_LIFETIME) ret = self.query(statement) return ret[0][0] @@ -1263,8 +1260,8 @@ class Cache(Db) : created >= DATE_SUB(CURDATE(), INTERVAL %s DAY)) ); """, (protAcc, - config.get('proteinLinkNoneLifetime'), - config.get('proteinLinkLifetime')) + settings.PROTEIN_LINK_NONE_LIFETIME, + settings.PROTEIN_LINK_LIFETIME) ret = self.query(statement) return ret[0][0] @@ -1325,9 +1322,9 @@ class Batch(Db) : """ Initialise the Db parent class. Use the internalDb. """ - Db.__init__(self, config.get('internalDb'), - config.get('LocalMySQLuser'), - config.get('LocalMySQLhost')) + Db.__init__(self, settings.MYSQL_DATABASE, + settings.MYSQL_USER, + settings.MYSQL_HOST) #__init__ def isJobListEmpty(self) : @@ -1612,9 +1609,9 @@ class Counter(Db): """ Initialise the Db parent class. Use the internalDb. """ - Db.__init__(self, config.get('internalDb'), - config.get('LocalMySQLuser'), - config.get('LocalMySQLhost')) + Db.__init__(self, settings.MYSQL_DATABASE, + settings.MYSQL_USER, + settings.MYSQL_HOST) def increment(self, service, interface): """ diff --git a/mutalyzer/File.py b/mutalyzer/File.py index 0261999eff3d7ef734b11b24466c0fb502632bb0..cce718c34fb8dab59317df7c3888c7ed533e9c3d 100644 --- a/mutalyzer/File.py +++ b/mutalyzer/File.py @@ -27,7 +27,11 @@ import types # UnicodeType from cStringIO import StringIO from mutalyzer import util -from mutalyzer import config +from mutalyzer.config import settings + + +# Amount of bytes to be read for determining the file type. +BUFFER_SIZE = 32768 class File() : @@ -160,7 +164,7 @@ class File() : # I don't think the .seek(0) is needed now we created a new handle new_handle.seek(0) - buf = new_handle.read(config.get('bufSize')) + buf = new_handle.read(BUFFER_SIZE) # Default dialect dialect = 'excel' @@ -280,7 +284,7 @@ class File() : jobl = [(l+1, row) for l, row in enumerate(job)] #TODO: Add more new style old style logic - if jobl[0][1] == config.get('header') : #Old style NameCheckBatch job + if jobl[0][1] == ['AccNo', 'Genesymbol', 'Mutation']: #Old style NameCheckBatch job ret = [] notthree = [] emptyfield = [] @@ -365,7 +369,7 @@ class File() : err = float(len(errlist))/len(ret) if err == 0: return (ret, columns) - elif err < config.get('threshold'): + elif err < settings.BATCH_JOBS_ERROR_THRESHOLD: #allow a 5 (default) percent threshold for errors in batchfiles self.__output.addMessage(__file__, 3, "EBPARSE", "There were errors in your batch entry file, they are " @@ -389,7 +393,7 @@ class File() : @rtype: string """ handle.seek(0) - buf = handle.read(config.get('bufSize')) #: The bufSize configuration variables. + buf = handle.read(BUFFER_SIZE) MagicInstance = magic.open(magic.MAGIC_MIME) MagicInstance.load() diff --git a/mutalyzer/GenRecord.py b/mutalyzer/GenRecord.py index 05f40be534875e36fdb3dc6f3fe9dee975996d02..917d056c5aa71d652217008b4c4042cf061d4ff8 100644 --- a/mutalyzer/GenRecord.py +++ b/mutalyzer/GenRecord.py @@ -19,11 +19,14 @@ search for them each time. import Bio from mutalyzer import util -from mutalyzer import config from mutalyzer import Crossmap from mutalyzer import Db +SPLICE_ALARM = 2 +SPLICE_WARN = 5 + + class PList(object) : """ A position list object, to store a general location and a list of @@ -841,11 +844,11 @@ class GenRecord() : warning = 'WSPLICE_OTHER' str_transcript = 'transcript %s' % transcript.name - if intronPos <= config.get('spliceAlarm'): + if intronPos <= SPLICE_ALARM: self.__output.addMessage(__file__, 2, warning, "Mutation on splice site in gene %s %s." % ( gene.name, str_transcript)) - elif intronPos <= config.get('spliceWarn'): + elif intronPos <= SPLICE_WARN: self.__output.addMessage(__file__, 2, warning, "Mutation near splice site in gene %s %s." % ( gene.name, str_transcript)) diff --git a/mutalyzer/Retriever.py b/mutalyzer/Retriever.py index 38e7140fb0c8dd75a623171ff2d6a0f1379cd6f7..7a80c1d916fcd4a9d9f70a722475c699c847d656 100644 --- a/mutalyzer/Retriever.py +++ b/mutalyzer/Retriever.py @@ -26,7 +26,7 @@ from xml.parsers import expat from httplib import HTTPException, IncompleteRead from mutalyzer import util -from mutalyzer import config +from mutalyzer.config import settings from mutalyzer.parsers import lrg from mutalyzer.parsers import genbank @@ -79,9 +79,9 @@ class Retriever(object) : """ self._output = output self._database = database - if not os.path.isdir(config.get('cache')) : - os.mkdir(config.get('cache')) - Entrez.email = config.get('email') + if not os.path.isdir(settings.CACHE_DIR) : + os.mkdir(settings.CACHE_DIR) + Entrez.email = settings.EMAIL self.fileType = None #__init__ @@ -112,12 +112,12 @@ class Retriever(object) : size the ``oldest'' files are deleted. Note that accessing a file makes it ``new''. """ - if self._foldersize(config.get('cache')) < config.get('cachesize'): + if self._foldersize(settings.CACHE_DIR) < settings.MAX_CACHE_SIZE: return # Build a list of files sorted by access time. cachelist = [] - for (path, dirs, files) in os.walk(config.get('cache')) : + for (path, dirs, files) in os.walk(settings.CACHE_DIR) : for filename in files : filepath = os.path.join(path, filename) cachelist.append( @@ -128,7 +128,7 @@ class Retriever(object) : # small enough (or until the list is exhausted). for i in range(0, len(cachelist)) : os.remove(cachelist[i][1]) - if self._foldersize(config.get('cache')) < config.get('cachesize'): + if self._foldersize(settings.CACHE_DIR) < settings.MAX_CACHE_SIZE: break; #for #_cleancache @@ -143,7 +143,7 @@ class Retriever(object) : @return: A filename @rtype: string """ - return config.get('cache') + '/' + name + "." + self.fileType + ".bz2" + return os.path.join(settings.CACHE_DIR, name + "." + self.fileType + ".bz2") #_nametofile def _write(self, raw_data, filename) : @@ -449,7 +449,7 @@ class GenBankRetriever(Retriever): self._output.addMessage(__file__, 4, 'ERETR', 'Could not retrieve %s.' % name) return None - if length > config.get('maxDldSize'): + if length > settings.MAX_FILE_SIZE: self._output.addMessage(__file__, 4, 'ERETR', 'Could not retrieve %s.' % name) return None @@ -508,7 +508,7 @@ class GenBankRetriever(Retriever): return None # The slice can not be too big. - if stop - start > config.get('maxDldSize'): + if stop - start > settings.MAX_FILE_SIZE: return None # Check whether we have seen this slice before. @@ -660,8 +660,7 @@ class GenBankRetriever(Retriever): info = handle.info() if info["Content-Type"] == "text/plain" : length = int(info["Content-Length"]) - if length > config.get('minDldSize') and \ - length < config.get('maxDldSize'): + if 512 < length < settings.MAX_FILE_SIZE: raw_data = handle.read() md5sum = self._calcHash(raw_data) UD = self._database.getGBFromHash(md5sum) @@ -864,7 +863,7 @@ class LRGRetriever(Retriever): @rtype: string """ - prefix = config.get('lrgurl') + prefix = settings.LRG_PREFIX_URL url = prefix + "%s.xml" % name pendingurl = prefix + "pending/%s.xml" % name @@ -907,7 +906,7 @@ class LRGRetriever(Retriever): if info["Content-Type"] == "application/xml" and info.has_key("Content-length"): length = int(info["Content-Length"]) - if config.get('minDldSize') < length < config.get('maxDldSize'): + if 512 < length < settings.MAX_FILE_SIZE: raw_data = handle.read() handle.close() diff --git a/mutalyzer/Scheduler.py b/mutalyzer/Scheduler.py index 39494f5b416355307f5347492455fb7933f504a8..62d50a8a5d0724fec6a6c27ad56ce8f769121605 100644 --- a/mutalyzer/Scheduler.py +++ b/mutalyzer/Scheduler.py @@ -20,7 +20,7 @@ import smtplib # smtplib.STMP from email.mime.text import MIMEText # MIMEText import mutalyzer -from mutalyzer import config +from mutalyzer.config import settings from mutalyzer import variantchecker from mutalyzer.grammar import Grammar from mutalyzer.output import Output @@ -115,16 +115,15 @@ Thanks for using Mutalyzer. With kind regards, -Mutalyzer batch checker.""" % url) +Mutalyzer batch scheduler""" % url) - message["Subject"] = config.get('mailSubject') - message["From"] = config.get('mailFrom') + message["Subject"] = "Result of your Mutalyzer batch job" + message["From"] = settings.EMAIL message["To"] = mailTo smtpInstance = smtplib.SMTP() smtpInstance.connect() - smtpInstance.sendmail(config.get('mailFrom'), mailTo, - message.as_string()) + smtpInstance.sendmail(settings.EMAIL, mailTo, message.as_string()) smtpInstance.quit() #__sendMail @@ -388,12 +387,28 @@ Mutalyzer batch checker.""" % url) outputline += batchOutput[0] #Output - filename = "%s/Results_%s.txt" % (config.get('resultsDir'), i) + filename = "%s/Results_%s.txt" % (settings.CACHE_DIR, i) if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. - header = config.get('nameCheckOutHeader') + header = ['Input', + 'Errors | Messages', + 'AccNo', + 'Genesymbol', + 'Variant', + 'Reference Sequence Start Descr.', + 'Coding DNA Descr.', + 'Protein Descr.', + 'GeneSymbol Coding DNA Descr.', + 'GeneSymbol Protein Descr.', + 'Genomic Reference', + 'Coding Reference', + 'Protein Reference', + 'Affected Transcripts', + 'Affected Proteins', + 'Restriction Sites Created', + 'Restriction Sites Deleted'] handle = open(filename, 'a') handle.write("%s\n" % "\t".join(header)) #if @@ -447,12 +462,12 @@ Mutalyzer batch checker.""" % url) result = "|".join(output.getBatchMessages(3)) #Output - filename = "%s/Results_%s.txt" % (config.get('resultsDir'), i) + filename = "%s/Results_%s.txt" % (settings.CACHE_DIR, i) if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. - header = config.get('syntaxCheckOutHeader') + header = ['Input', 'Status'] handle = open(filename, 'a') handle.write("%s\n" % "\t".join(header)) #if @@ -549,12 +564,15 @@ Mutalyzer batch checker.""" % url) error = "%s" % "|".join(O.getBatchMessages(3)) #Output - filename = "%s/Results_%s.txt" % (config.get('resultsDir'), i) + filename = "%s/Results_%s.txt" % (settings.CACHE_DIR, i) if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. - header = config.get('positionConverterOutHeader') + header = ['Input Variant', + 'Errors', + 'Chromosomal Variant', + 'Coding Variant(s)'] handle = open(filename, 'a') handle.write("%s\n" % "\t".join(header)) #if @@ -609,12 +627,14 @@ Mutalyzer batch checker.""" % url) outputline += "%s\t" % "|".join(O.getBatchMessages(3)) #Output - filename = "%s/Results_%s.txt" % (config.get('resultsDir'), i) + filename = "%s/Results_%s.txt" % (settings.CACHE_DIR, i) if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. - header = config.get('snpConverterOutHeader') + header = ['Input Variant', + 'HGVS description(s)', + 'Errors | Messages'] handle = open(filename, 'a') handle.write("%s\n" % "\t".join(header)) #if diff --git a/mutalyzer/config.py b/mutalyzer/config.py deleted file mode 100644 index dc8b5a83ebf14e62110050b9cad3af6806657eef..0000000000000000000000000000000000000000 --- a/mutalyzer/config.py +++ /dev/null @@ -1,176 +0,0 @@ -""" -Module for reading the configuration values from a configuration file. - -All communication with this module should be done by using the get function -which returns a configuration value, given a name. - -Reading the configuration file is implemented lazily and as such done upon the -first call to the get function. - -Configuration is read from the file specified by the `MUTALYZER_SETTINGS` -environment variable, or `mutalyzer.conf` in the current directory if it is -not set. -""" - - -import os - -from configobj import ConfigObj - -from mutalyzer.util import singleton - - -class ConfigurationError(Exception): - """ - Raised when a configuration file cannot be read. - """ - pass - - -def get(name): - """ - Get a configuration value by name. - - :arg name: Name for the configuration value. - :type name: string - - :raise ConfigurationError: If configuration value could not be read. - """ - return _Config().get(name) - - -@singleton -class _Config(): - """ - Read the configuration file and provide access to its values. - - Please note the limitations from the use of the @singleton decorator as - described in its docstring. - """ - def __init__(self): - """ - Initialise the class with variables read from the configuration - file. - - Configuration values are read from the file specified by the - `MUTALYZER_SETTINGS` environment variable, or `mutalyzer.conf` in the - current directory if it is not set. - - :raises ConfigurationError: If configuration could not be read. - """ - filename = os.environ.get('MUTALYZER_SETTINGS', 'mutalyzer.conf') - config = self._load_config(filename) - - # We define default values for many configuration settings (except for - # some that are mandatory for the user to define, i.e. those in the - # extras/config.user.example file). - # Todo: Do not duplicate default values here and in the example config - # file template. - config.setdefault('cachesize', 50) - config.setdefault('maxDldSize', 10) - config.setdefault('minDldSize', 512) - config.setdefault('lrgurl', 'ftp://ftp.ebi.ac.uk/pub/databases/lrgex/') - config.setdefault('internalDb', 'mutalyzer') - config.setdefault('dbNames', ['hg18', 'hg19', 'mm10']) - config.setdefault('defaultDb', 'hg19') - config.setdefault('LocalMySQLuser', 'mutalyzer') - config.setdefault('LocalMySQLhost', 'localhost') - config.setdefault('autoReconnect', False) - config.setdefault('proteinLinkLifetime', 30) - config.setdefault('proteinLinkNoneLifetime', 5) - config.setdefault('datestring', '%Y-%m-%d %H:%M:%S') - config.setdefault('loglevel', 3) - config.setdefault('outputlevel', 1) - config.setdefault('debug', True) - config.setdefault('flanksize', 25) - config.setdefault('maxvissize', 25) - config.setdefault('flankclipsize', 6) - config.setdefault('mailFrom', 'noreply@humgen.nl') - config.setdefault('mailSubject', 'Result of Mutalyzer batch check.') - config.setdefault('resultsDir', config['cache']) - config.setdefault('PIDfile', '/var/run/mutalyzer/mutalyzer-batchd.pid') - config.setdefault('batchInputMaxSize', 5) - config.setdefault('nameCheckOutHeader', - ['Input', 'Errors | Messages', 'AccNo', 'Genesymbol', 'Variant', - 'Reference Sequence Start Descr.', 'Coding DNA Descr.', - 'Protein Descr.', 'GeneSymbol Coding DNA Descr.', - 'GeneSymbol Protein Descr.', 'Genomic Reference', - 'Coding Reference', 'Protein Reference', 'Affected Transcripts', - 'Affected Proteins', 'Restriction Sites Created', - 'Restriction Sites Deleted']) - config.setdefault('syntaxCheckOutHeader', ['Input', 'Status']) - config.setdefault('positionConverterOutHeader', - ['Input Variant', 'Errors', 'Chromosomal Variant', 'Coding Variant(s)']) - config.setdefault('snpConverterOutHeader', - ['Input Variant', 'HGVS description(s)', 'Errors | Messages']) - config.setdefault('bufSize', 32768) - config.setdefault('header', ['AccNo', 'Genesymbol', 'Mutation']) - config.setdefault('threshold', 0.05) - config.setdefault('spliceAlarm', 2) - config.setdefault('spliceWarn', 5) - config.setdefault('piwik', False) - config.setdefault('piwikBase', 'https://piwik.example.com') - config.setdefault('piwikSite', 1) - - try: - # We explicitely read all configuration values ad store them in - # our own dictionary. This makes sure we notice missing or - # incorrect values upon instantiation. - - # A few 'special' values. - self._values = {'autoReconnect': config.as_bool('autoReconnect'), - 'debug': config.as_bool('debug'), - 'piwik': config.as_bool('piwik'), - 'threshold': config.as_float('threshold')} - - # Simple string values. - for name in ('email', 'cache', 'lrgurl', 'internalDb', 'dbNames', - 'LocalMySQLuser', 'LocalMySQLhost', 'log', - 'datestring', 'mailFrom', 'mailSubject', - 'resultsDir', 'nameCheckOutHeader', 'defaultDb', - 'syntaxCheckOutHeader', 'positionConverterOutHeader', - 'snpConverterOutHeader', 'PIDfile', 'header', - 'piwikBase'): - self._values[name] = config[name] - - # Simple integer values. - for name in ('minDldSize', 'loglevel', 'outputlevel', 'flanksize', - 'maxvissize', 'flankclipsize', 'bufSize', - 'spliceAlarm', 'spliceWarn', 'piwikSite', - 'proteinLinkLifetime', 'proteinLinkNoneLifetime'): - self._values[name] = config.as_int(name) - - # File sizes (given in megabytes, stored in bytes). - for name in ('cachesize', 'maxDldSize', 'batchInputMaxSize'): - self._values[name] = config.as_int(name) * 1048576 - - except KeyError as e: - raise ConfigurationError('Missing configuration value: %s' % e) - - def get(self, name): - """ - Get a configuration value by name. - - :arg name: Name for the configuration value. - :type name: string - - :raises ConfigurationError: If given configuration value name does not - exist. - """ - try: - return self._values[name] - except KeyError: - raise ConfigurationError('No such configuration value: %s' % name) - - def _load_config(self, filename): - """ - Create a `ConfigObj` from the configuration in `filename`. - """ - try: - return ConfigObj(filename) - except IOError: - raise ConfigurationError('Could not open configuration file: %s' - % filename) - except SyntaxError: - raise ConfigurationError('Could not parse configuration file: %s' - % filename) diff --git a/mutalyzer/config/__init__.py b/mutalyzer/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6956473134c8ee44bcbcecf1dd8522d95e9ab0ee --- /dev/null +++ b/mutalyzer/config/__init__.py @@ -0,0 +1,74 @@ +""" +Module for reading the configuration values from a configuration file. + +Default values will be read from the :mod:`mutalyzer.config.default_settings` +module and overridden by any values from the module specified by the +`MUTALYZER_SETTINGS`. + +Alternatively, the default values can be overridden manually using the +:meth:`settings.configure` method, in which case the `MUTALYZER_SETTINGS` +environment variable will not be used. +""" + + +import flask.config +import os + +from mutalyzer import util + + +ENVIRONMENT_VARIABLE = 'MUTALYZER_SETTINGS' + + +class Settings(flask.config.Config, util.AttributeDictMixin): + """ + Dictionary with some extra ways to fill it from files or special + dictionaries (see `flask.config.Config`) and attribute access. + """ + def __init__(self): + # We fix the root_path argument to the current working directory. + super(Settings, self).__init__(os.getcwd()) + + +class LazySettings(util.LazyObject): + """ + A lazy proxy for a settings object. + + Taken from `Django <https://www.djangoproject.com/>`_ + (`django.conf.LazySettings`). + + .. note:: Django also does some logging config magic here, we did not copy + that. + """ + def _setup(self, settings=None): + """ + Load the settings module pointed to by the environment variable. This + is used the first time we need any settings at all, if the user has not + previously configured the settings manually. + """ + self._wrapped = Settings() + self._wrapped.from_object('mutalyzer.config.default_settings') + if settings is None: + self._wrapped.from_envvar(ENVIRONMENT_VARIABLE) + else: + self._wrapped.update(settings) + + def configure(self, settings): + """ + Called to manually configure the settings. The 'default_settings' + parameter sets where to retrieve any unspecified values from (its + argument must support attribute access (__getattr__)). + """ + if self._wrapped is not None: + raise RuntimeError('settings already configured') + self._setup(settings) + + @property + def configured(self): + """ + Returns True if the settings have already been configured. + """ + return self._wrapped is not None + + +settings = LazySettings() diff --git a/mutalyzer/config/default_settings.py b/mutalyzer/config/default_settings.py new file mode 100644 index 0000000000000000000000000000000000000000..bc4b12cf9ab2d0d9298b888bb58f848422e3045a --- /dev/null +++ b/mutalyzer/config/default_settings.py @@ -0,0 +1,80 @@ +""" +Default Mutalyzer settings. Override these with settings in the module +pointed-to by the `MUTALYZER_SETTINGS` environment variable. +""" + + +# Use Mutalyzer in debug mode. +DEBUG = True + +# This address is used in contact information on the website, as sender in +# batch job notifications, and with retrieval of records at the NCBI using +# Entrez. +EMAIL = 'mutalyzer@humgen.nl' + +# The cache directory. Used to store uploaded and downloaded files (e.g., +# reference files from NCBI or user) and batch job results. +import tempfile +CACHE_DIR = tempfile.mkdtemp() + +# Maximum size of the cache directory (in bytes). +MAX_CACHE_SIZE = 50 * 1048576 # 50 MB + +# Maximum size for uploaded and downloaded files (in bytes). +MAX_FILE_SIZE = 10 * 1048576 # 10 MB + +# Host name for local MySQL databases. +MYSQL_HOST = 'localhost' + +# User for local MySQL databases. +MYSQL_USER = 'mutalyzer' + +# Local MySQL database name. +MYSQL_DATABASE = 'mutalyzer' + +# Available databases with mapping information. +DB_NAMES = ['hg18', 'hg19', 'mm10'] + +# Default database for mapping information. +DEFAULT_DB = 'hg19' + +# Name and location of the log file. +import os +import tempfile +log_handle, log_filename = tempfile.mkstemp() +os.close(log_handle) +LOG_FILE = log_filename + +# Level of logged messages. +LOG_LEVEL = 3 + +# Level of output messages. +OUTPUT_LEVEL = 1 + +# Format of time prefix for log messages. Can be anything that is accepted as +# the format argument of time.strftime. +# http://docs.python.org/2/library/time.html#time.strftime +LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S" + +# Prefix URL from where LRG files are fetched. +LRG_PREFIX_URL = 'ftp://ftp.ebi.ac.uk/pub/databases/lrgex/' + +# Allow for this fraction of errors in batch jobs. +BATCH_JOBS_ERROR_THRESHOLD = 0.05 + +# Number of days a cached transcript->protein link from the NCBI is considered +# valid. +PROTEIN_LINK_LIFETIME = 30 + +# Number of days a cached nonexisting transcript->protein link from the NCBI +# is considered valid. +PROTEIN_LINK_NONE_LIFETIME = 5 + +# Is Piwik enabled? +PIWIK = False + +# Base URL for the Piwik server. +PIWIK_BASE_URL = 'https://piwik.example.com' + +# Piwik site ID for Mutalyzer. +PIWIK_SITE_ID = 1 diff --git a/mutalyzer/mutator.py b/mutalyzer/mutator.py index 55750bd92605e05f6cf6b7d7eda50763a24c7d20..8047d932d4bab1ca4fa66b2020e2d69428d97853 100644 --- a/mutalyzer/mutator.py +++ b/mutalyzer/mutator.py @@ -20,7 +20,17 @@ from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA from Bio.Seq import reverse_complement from mutalyzer import util -from mutalyzer import config + + +# Length of the flanking sequences used in the visualisation of mutations. +VIS_FLANK_LENGTH = 25 + +# Maximum length of visualised mutations. +VIS_MAX_LENGTH = 25 + +# Length of the flanking sequences used if the visualised mutation is clipped +# (because it exceeds VIS_MAX_LENGTH). +VIS_CLIP_FLANK_LENGTH = 6 class Mutator(): @@ -104,19 +114,19 @@ class Mutator(): @return: Visualisation. @rtype: str """ - loflank = self.orig[max(pos1 - config.get('flanksize'), 0):pos1] - roflank = self.orig[pos2:pos2 + config.get('flanksize')] + loflank = self.orig[max(pos1 - VIS_FLANK_LENGTH, 0):pos1] + roflank = self.orig[pos2:pos2 + VIS_FLANK_LENGTH] delPart = self.orig[pos1:pos2] - odel = util.visualise_sequence(delPart, config.get('maxvissize'), - config.get('flankclipsize')) + odel = util.visualise_sequence(delPart, VIS_MAX_LENGTH, + VIS_CLIP_FLANK_LENGTH) bp1 = self.shift(pos1) bp2 = self.shift(pos2) - lmflank = self.mutated[max(bp1 - config.get('flanksize'), 0):bp1] - rmflank = self.mutated[bp2:bp2 + config.get('flanksize')] + lmflank = self.mutated[max(bp1 - VIS_FLANK_LENGTH, 0):bp1] + rmflank = self.mutated[bp2:bp2 + VIS_FLANK_LENGTH] - insvis = util.visualise_sequence(ins, config.get('maxvissize'), - config.get('flankclipsize')) + insvis = util.visualise_sequence(ins, VIS_MAX_LENGTH, + VIS_CLIP_FLANK_LENGTH) fill = abs(len(odel) - len(insvis)) if len(odel) > len(ins): visualisation = ['%s %s %s' % (loflank, odel, roflank), diff --git a/mutalyzer/output.py b/mutalyzer/output.py index 97c96ea08cd0a41a8d97b12c9efa3281d514a713..3f720ea03c88791417917a465cc0f7941ed69e8b 100644 --- a/mutalyzer/output.py +++ b/mutalyzer/output.py @@ -26,7 +26,7 @@ Public classes: import time from mutalyzer import util -from mutalyzer import config +from mutalyzer.config import settings from mutalyzer.models import SoapMessage @@ -77,7 +77,7 @@ class Output() : self._outputData = {} self._messages = [] self._instance = util.nice_filename(instance) - self._loghandle = open(config.get('log'), "a+") + self._loghandle = open(settings.LOG_FILE, "a+") self._errors = 0 self._warnings = 0 #__init__ @@ -117,9 +117,9 @@ class Output() : # Log the message if the message is important enough, or if it is only # meant to be logged (level -1). - if level >= config.get('loglevel') or level == -1 : + if level >= settings.LOG_LEVEL or level == -1 : self._loghandle.write(time.strftime( - config.get('datestring') + ' ') + "%s (%s) %s: %s: %s\n" % ( + settings.LOG_TIME_FORMAT + ' ') + "%s (%s) %s: %s: %s\n" % ( self._instance, nice_name, code, message.named_level(), description)) self._loghandle.flush() @@ -136,7 +136,7 @@ class Output() : @return: A list of messages @rtype: list """ - return filter(lambda m: m.level >= config.get('outputlevel'), + return filter(lambda m: m.level >= settings.OUTPUT_LEVEL, self._messages) #getMessages diff --git a/mutalyzer/parsers/genbank.py b/mutalyzer/parsers/genbank.py index 5564d8f01ba78a8488816835185c55ca3b969d03..2e6c1338e3dbfea10699e9aa9072613d4fcbfa3a 100644 --- a/mutalyzer/parsers/genbank.py +++ b/mutalyzer/parsers/genbank.py @@ -11,7 +11,7 @@ from itertools import izip_longest from Bio import SeqIO, Entrez from Bio.Alphabet import ProteinAlphabet -from mutalyzer import config +from mutalyzer.config import settings from mutalyzer import Db from mutalyzer.GenRecord import PList, Locus, Gene, Record, GenRecord @@ -62,7 +62,7 @@ class GBparser(): Private variables: - __database ; Db.Cache object """ - Entrez.email = config.get('email') + Entrez.email = settings.EMAIL self.__database = Db.Cache() #__init__ diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index 3a8b701adfa939cb78b9968447af41c0ac79a956..231d4862974323ea79acaeafd4359cdd597fa128 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -21,7 +21,7 @@ import tempfile from operator import itemgetter, attrgetter import mutalyzer -from mutalyzer import config +from mutalyzer.config import settings from mutalyzer.output import Output from mutalyzer.grammar import Grammar from mutalyzer.sync import CacheSync @@ -49,7 +49,7 @@ def _checkBuild(L, build) : @type build: string """ - if not build in config.get('dbNames'): + if not build in settings.DB_NAMES: L.addMessage(__file__, 4, "EARG", "EARG %s" % build) raise Fault("EARG", "The build argument (%s) was not a valid " \ @@ -171,7 +171,7 @@ class MutalyzerService(ServiceBase): # argument for spyne.server.wsgi.WsgiApplication in all webservice # instantiations. - max_size = config.get('batchInputMaxSize') + max_size = settings.MAX_FILE_SIZE batch_file = tempfile.TemporaryFile() size = 0 @@ -236,7 +236,7 @@ class MutalyzerService(ServiceBase): raise Fault('EBATCHNOTREADY', 'Batch job result is not yet ready.') filename = 'Results_%s.txt' % job_id - handle = open(os.path.join(config.get('resultsDir'), filename)) + handle = open(os.path.join(settings.CACHE_DIR, filename)) return handle @srpc(Mandatory.String, Mandatory.String, Mandatory.Integer, Boolean, diff --git a/mutalyzer/sync.py b/mutalyzer/sync.py index 48710f7e3af1f7d9fd1af6aa931c556dc470e85f..2438096f70c5e725c3a4facb32ec2091ff622639 100644 --- a/mutalyzer/sync.py +++ b/mutalyzer/sync.py @@ -11,7 +11,7 @@ from datetime import datetime, timedelta import urllib2 from suds.client import Client -from mutalyzer import config +from mutalyzer.config import settings from mutalyzer import Retriever @@ -58,7 +58,7 @@ class CacheSync(object): for entry in entries: # Note that this way we only include Genbank files, not LRG files. cached = None - if os.path.isfile(os.path.join(config.get('cache'), + if os.path.isfile(os.path.join(settings.CACHE_DIR, '%s.gb.bz2' % entry[0])): cached = '%s.gb' % entry[0] cache.append({'name': entry[0], diff --git a/mutalyzer/util.py b/mutalyzer/util.py index 446ec9c8e261218a2fe37b362ddc8c2352d92401..a367f31b6e8798af747a24bc2df56ee1ae018391 100644 --- a/mutalyzer/util.py +++ b/mutalyzer/util.py @@ -19,16 +19,17 @@ General utility functions. """ -import sys -import os -import math -import time +from functools import wraps import inspect from itertools import izip_longest -from functools import wraps +import math +import operator +import os +import sys +import time -import Bio.Seq from Bio.Alphabet import IUPAC +import Bio.Seq from Bio.SeqUtils import seq3 @@ -895,3 +896,81 @@ def monkey_patch_suds(): Import.open = _import_open_patched Import.MUTALYZER_MONKEY_PATCHED = True #monkey_patch_suds + + +class AttributeDictMixin(object): + """ + Augment classes with a Mapping interface by adding attribute access. + + Taken from `Celery <http://www.celeryproject.org/>`_ + (`celery.datastructures.AttributeDictMixin`). + """ + def __getattr__(self, k): + try: + return self[k] + except KeyError: + raise AttributeError( + '{0!r} object has no attribute {1!r}'.format( + type(self).__name__, k)) + + def __setattr__(self, key, value): + self[key] = value + + +# Helper for LazyObject. +def _new_method_proxy(func): + def inner(self, *args): + if self._wrapped is None: + self._setup() + return func(self._wrapped, *args) + return inner + + +class LazyObject(object): + """ + A wrapper for another class that can be used to delay instantiation of the + wrapped class. + + Taken from `Django <https://www.djangoproject.com/>`_ + (`django.utils.functional.LazyObject`). + """ + _wrapped = None + + def __init__(self): + self._wrapped = None + + __getattr__ = _new_method_proxy(getattr) + + def __setattr__(self, name, value): + if name == '_wrapped': + # Assign to __dict__ to avoid infinite __setattr__ loops. + self.__dict__['_wrapped'] = value + else: + if self._wrapped is None: + self._setup() + setattr(self._wrapped, name, value) + + def __delattr__(self, name): + if name == '_wrapped': + raise TypeError('can\'t delete _wrapped.') + if self._wrapped is None: + self._setup() + delattr(self._wrapped, name) + + def _setup(self): + """ + Must be implemented by subclasses to initialize the wrapped object. + """ + raise NotImplementedError('subclasses of LazyObject must provide a ' + '_setup() method') + + # Introspection support + __dir__ = _new_method_proxy(dir) + + # Dictionary methods support + __getitem__ = _new_method_proxy(operator.getitem) + __setitem__ = _new_method_proxy(operator.setitem) + __delitem__ = _new_method_proxy(operator.delitem) + + __len__ = _new_method_proxy(len) + __contains__ = _new_method_proxy(operator.contains) diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py index ce41a4aaebe0f7d0cba83288180523d03a6f131a..67ef0a63c6d9a0e4e11e4aaaaa113de8af881937 100644 --- a/mutalyzer/variantchecker.py +++ b/mutalyzer/variantchecker.py @@ -21,7 +21,6 @@ from Bio.Alphabet import IUPAC from Bio.Alphabet import DNAAlphabet from Bio.Alphabet import ProteinAlphabet -from mutalyzer import config from mutalyzer import util from mutalyzer.grammar import Grammar from mutalyzer.mutator import Mutator @@ -377,13 +376,9 @@ def apply_deletion_duplication(first, last, type, mutator, record, O, 'Sequence "%s" at position %s was given, however, ' \ 'the HGVS notation prescribes that on the forward strand ' \ 'it should be "%s" at position %s.' % ( - util.visualise_sequence(str(mutator.orig[first - 1:last]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[first - 1:last])), util.format_range(first, last), - util.visualise_sequence(str(mutator.orig[new_first - 1:new_stop]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[new_first - 1:new_stop])), util.format_range(new_first, new_stop))) if forward_roll != original_forward_roll and not reverse_strand: @@ -393,13 +388,9 @@ def apply_deletion_duplication(first, last, type, mutator, record, O, O.addMessage(__file__, 1, 'IROLLBACK', 'Sequence "%s" at position %s was not corrected to "%s" at ' \ 'position %s, since they reside in different exons.' % ( - util.visualise_sequence(str(mutator.orig[first - 1:last]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[first - 1:last])), util.format_range(first, last), - util.visualise_sequence(str(mutator.orig[incorrect_first - 1:incorrect_stop]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[incorrect_first - 1:incorrect_stop])), util.format_range(incorrect_first, incorrect_stop))) if reverse_roll and reverse_strand: @@ -409,13 +400,9 @@ def apply_deletion_duplication(first, last, type, mutator, record, O, 'Sequence "%s" at position %s was given, however, ' \ 'the HGVS notation prescribes that on the reverse strand ' \ 'it should be "%s" at position %s.' % ( - util.visualise_sequence(str(mutator.orig[first - 1:last]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[first - 1:last])), util.format_range(first, last), - util.visualise_sequence(str(mutator.orig[new_first - 1:new_stop]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[new_first - 1:new_stop])), util.format_range(new_first, new_stop))) # We don't go through the trouble of visualising the *corrected* variant @@ -457,9 +444,7 @@ def apply_inversion(first, last, mutator, record, O): O.addMessage(__file__, 2, 'WNOCHANGE', 'Sequence "%s" at position %i_%i is a palindrome ' \ '(its own reverse complement).' % ( - util.visualise_sequence(str(mutator.orig[first - 1:last]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[first - 1:last])), first, last)) return else: @@ -468,13 +453,10 @@ def apply_inversion(first, last, mutator, record, O): 'palindrome (the first %i nucleotide(s) are the reverse ' \ 'complement of the last one(s)), the HGVS notation ' \ 'prescribes that it should be "%s" at position %i_%i.' % ( - util.visualise_sequence(str(mutator.orig[first - 1:last]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[first - 1:last])), first, last, snoop, util.visualise_sequence( - str(mutator.orig[first + snoop - 1: last - snoop]), - config.get('maxvissize'), config.get('flankclipsize')), + str(mutator.orig[first + snoop - 1: last - snoop])), first + snoop, last - snoop)) first += snoop last -= snoop @@ -637,9 +619,7 @@ def apply_delins(first, last, insert, mutator, record, output): output.addMessage(__file__, 2, 'WNOCHANGE', 'Sequence "%s" at position %i_%i is identical to ' \ 'the variant.' % ( - util.visualise_sequence(str(mutator.orig[first - 1:last]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[first - 1:last])), first, last)) return @@ -678,9 +658,7 @@ def apply_delins(first, last, insert, mutator, record, output): 'Sequence "%s" at position %i_%i has the same prefix or ' \ 'suffix as the inserted sequence "%s". The HGVS notation ' \ 'prescribes that it should be "%s" at position %i_%i.' % ( - util.visualise_sequence(str(mutator.orig[first - 1:last]), - config.get('maxvissize'), - config.get('flankclipsize')), + util.visualise_sequence(str(mutator.orig[first - 1:last])), first, last, insert, insert_trimmed, first + lcp, last - lcs)) mutator.delins(first + lcp, last - lcs, insert_trimmed) diff --git a/mutalyzer/website.py b/mutalyzer/website.py index 651642f20fd4c024889bb691461697962e2579e5..af9b3ac453e7b426f70d0dccc975dafb6ac452fd 100644 --- a/mutalyzer/website.py +++ b/mutalyzer/website.py @@ -33,7 +33,7 @@ from spyne.server.http import HttpBase import mutalyzer from mutalyzer import util -from mutalyzer import config +from mutalyzer.config import settings from mutalyzer.grammar import Grammar from mutalyzer.services import soap from mutalyzer import variantchecker @@ -47,7 +47,7 @@ from mutalyzer import describe # Show web.py debugging information. -web.config.debug = config.get('debug') +web.config.debug = settings.DEBUG # URL dispatch table @@ -88,12 +88,12 @@ render = render_jinja(pkg_resources.resource_filename('mutalyzer', 'templates'), 'releaseDate' : mutalyzer.__date__, 'release' : mutalyzer.RELEASE, 'copyrightYears' : mutalyzer.COPYRIGHT_YEARS, - 'contactEmail' : config.get('email'), + 'contactEmail' : settings.EMAIL, 'serviceSoapLocation' : SERVICE_SOAP_LOCATION, 'serviceJsonLocation' : SERVICE_JSON_LOCATION, - 'piwik' : config.get('piwik'), - 'piwikBase' : config.get('piwikBase'), - 'piwikSite' : config.get('piwikSite')}) + 'piwik' : settings.PIWIK, + 'piwikBase' : settings.PIWIK_BASE_URL, + 'piwikSite' : settings.PIWIK_SITE_ID}) # web.py application app = web.application(urls, globals(), autoreload=False) @@ -202,7 +202,7 @@ class Reference: The url routing currently makes sure to only call this with filenames of the form [a-zA-Z\._-]+. """ - file_path = os.path.join(config.get('cache'), '%s.bz2' % file) + file_path = os.path.join(settings.CACHE_DIR, '%s.bz2' % file) if not os.path.isfile(file_path): raise web.notfound() @@ -229,7 +229,7 @@ class Reference: can be reconstructed from the information in the database. Because if the latter is the case, Mutalyzer will add it to the cache on the fly. """ - file_path = os.path.join(config.get('cache'), '%s.bz2' % file) + file_path = os.path.join(settings.CACHE_DIR, '%s.bz2' % file) if not os.path.isfile(file_path): # The following is a hack to return a 404 not found status with @@ -476,7 +476,7 @@ class PositionConverter: output = Output(__file__) IP = web.ctx["ip"] - avail_builds = config.get('dbNames') + avail_builds = settings.DB_NAMES # We have to put up with this crap just to get a certain <option> # selected in our TAL template. @@ -484,7 +484,7 @@ class PositionConverter: if build in avail_builds: selected_build = build else: - selected_build = config.get('defaultDb') + selected_build = settings.DEFAULT_DB unselected_builds = sorted(b for b in avail_builds if b != selected_build) @@ -1050,9 +1050,9 @@ class BatchChecker: """ O = Output(__file__) - maxUploadSize = config.get('batchInputMaxSize') + maxUploadSize = settings.MAX_FILE_SIZE - avail_builds = config.get('dbNames') + avail_builds = settings.DB_NAMES # We have to put up with this crap just to get a certain <option> # selected in our TAL template. @@ -1060,7 +1060,7 @@ class BatchChecker: if arg1 in avail_builds: selected_build = arg1 else: - selected_build = config.get('defaultDb') + selected_build = settings.DEFAULT_DB unselected_builds = sorted(b for b in avail_builds if b != selected_build) @@ -1163,7 +1163,7 @@ class BatchResult: """ # Todo: Check if batch job is ready (we have the job id). filename = 'Results_%s.txt' % result - handle = open(os.path.join(config.get('resultsDir'), filename)) + handle = open(os.path.join(settings.CACHE_DIR, filename)) web.header('Content-Type', 'text/plain') web.header('Content-Disposition', 'attachment; filename="%s"' % filename) @@ -1216,13 +1216,13 @@ class Uploader: """ Render reference sequence uploader form. """ - maxUploadSize = config.get('maxDldSize') - available_assemblies = config.get('dbNames') + maxUploadSize = settings.MAX_FILE_SIZE + available_assemblies = settings.DB_NAMES # We have to put up with this crap just to get a certain <option> # selected in our TAL template. # Todo: Now we switched to Jinja2, we can make this sane. - selected_assembly = config.get('defaultDb') + selected_assembly = settings.DEFAULT_DB unselected_assemblies = sorted(b for b in available_assemblies if b != selected_assembly) @@ -1281,9 +1281,9 @@ class Uploader: - chrnamestop: Stop position. - chrnameorientation: Orientation. """ - maxUploadSize = config.get('maxDldSize') + maxUploadSize = settings.MAX_FILE_SIZE - available_assemblies = config.get('dbNames') + available_assemblies = settings.DB_NAMES O = Output(__file__) IP = web.ctx["ip"] @@ -1304,7 +1304,7 @@ class Uploader: if i.chrnameassembly in available_assemblies: selected_assembly = i.chrnameassembly else: - selected_assembly = config.get('defaultDb') + selected_assembly = settings.DEFAULT_DB unselected_assemblies = sorted(b for b in available_assemblies if b != selected_assembly) diff --git a/requirements.txt b/requirements.txt index 34b0cc02ba4f9031f0d5bd9820bf8f1a606251c1..e8b0fd5876c92abed8477e6676eb80074b5ca1eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ MySQL-python==1.2.4 biopython==1.63b -configobj==4.7.2 lxml==3.2.4 nose==1.3.0 pyparsing==2.0.1 @@ -17,3 +16,4 @@ WebTest==1.4.2 cssselect==0.9.1 Jinja2==2.7.1 -e git+https://github.com/mammadori/magic-python.git#egg=Magic_file_extensions +Flask==0.10.1 diff --git a/setup.py b/setup.py index d7f16f16f8b3854a4dfa1704ffbaa49dd1245dd3..5fc25f3547340e761448398ab4b19f1b3bd3488c 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ setup( platforms=['any'], install_requires=install_requires, packages=['mutalyzer', + 'mutalyzer.config', 'mutalyzer.entrypoints', 'mutalyzer.parsers', 'mutalyzer.services'],