diff --git a/doc/API/api.conf b/doc/API/api.conf index f44efb9485e76827741769d89a7ebba626218363..3b307dd88a1c45879a1d6ef37f93f7f804c193b1 100644 --- a/doc/API/api.conf +++ b/doc/API/api.conf @@ -3,8 +3,10 @@ # modules # The list of objects to document. Objects can be named using # dotted names, module filenames, or package directory names. -# Alases for this option include "objects" and "values". -modules: ../../src/Modules, ../../src/*.py +# Aliases for this option include "objects" and "values". +#modules: ../../src/Modules, ../../src/*.py +modules: ../../src/Modules, ../../src/*.py +#modules: src/Modules/Db.py, src/Modules/File.py, src/Modules/Config.py # output # The type of output that should be generated. Should be one @@ -27,7 +29,7 @@ docformat: epytext # name # The documented project's name. -name: Mutalyzer 2.0 +name: "Mutalyzer 2.0" # url # The documented project's URL. @@ -67,7 +69,7 @@ imports: no # An integer indicating how verbose epydoc should be. The default # value is 0; negative values will supress warnings and errors; # positive values will give more verbose output. -verbosity: 0 +verbosity: 1 # parse # Whether or not parsing should be used to examine objects. @@ -75,7 +77,7 @@ parse: yes # introspect # Whether or not introspection should be used to examine objects. -introspect: no +introspect: yes # graph # The list of graph types that should be automatically included @@ -87,7 +89,7 @@ graph: all # dotpath # The path to the Graphviz "dot" executable, used to generate # graphs. -dotpath: /usr/local/bin/dot +dotpath: /usr/bin/dot # sourcecode # Whether or not to include syntax highlighted source code in @@ -97,9 +99,9 @@ dotpath: /usr/local/bin/dot # pstat # The name of one or more pstat files (generated by the profile # or hotshot module). These are used to generate call graphs. -pstat: profile.out +#pstat: profile.out # separate-classes # Whether each class should be listed in its own section when # generating LaTeX or PDF output. -separate-classes: no +separate-classes: yes diff --git a/src/BatchChecker.py b/src/BatchChecker.py index 626db21faa40f6e2d90bea950d08932fbf3e3264..6241b88c9506e8856522e5b1537cc48e02887ad7 100644 --- a/src/BatchChecker.py +++ b/src/BatchChecker.py @@ -1,4 +1,16 @@ #!/usr/bin/python + +""" +@requires: os +@requires: sys +@requires: daemon +@requires: signal +@requires: fcntl +@requires: ftplib +@requires: Modules.Config +@requires: Modules.Db.Batch +@requires: Modules.Scheduler +""" import os import sys import daemon @@ -10,7 +22,9 @@ from Modules.Db import Batch from Modules import Scheduler def sigusr1_daemon_notified(*args): - """Stop the Daemon with SIGUSR1 signal: kill -10 PID""" + """ + Stop the Daemon with SIGUSR1 signal: kill -10 PID + """ sys.exit() # Change dir diff --git a/src/Modules/Config.py b/src/Modules/Config.py index 9eb1401e3a81bad03b867a88ed5e7d440a8b3e91..94b9c66f2c702e2a7a58e73707965569547257fa 100644 --- a/src/Modules/Config.py +++ b/src/Modules/Config.py @@ -1,43 +1,40 @@ #!/usr/bin/python """ - Module for reading the config file and splitting up the variables into - subclasses. Each of these subclasses are used to configure a specific - module. - - Public classes: - Config ; Read the configuration file and store the data in subclasses. +Module for reading the config file and splitting up the variables into +subclasses. Each of these subclasses are used to configure a specific +module. """ class Config() : """ - Read the configuration file and store the data in subclasses. - - Public subclasses: - Retriever ; Container for the Retriever configuration variables. - Db ; Container for the Db configuration variables. - Output ; Container for the Output configuration variables. - Mutator ; Container for the Mutator configuration variables. - Scheduler ; Container for the Scheduler configuration variables. - File ; Container for the File configuration variables. - GBparser ; Container for the File configuration variables. - - Special Methods: - __init__ ; Read the configuration file and initialise the - subclasses. + Read the configuration file and store the data in subclasses. + + Special Methods: + - __init__ ; Read the configuration file and initialise the + subclasses. """ + # Public subclasses: + # - Retriever ; Container for the Retriever configuration variables. + # - Db ; Container for the Db configuration variables. + # - Output ; Container for the Output configuration variables. + # - Mutator ; Container for the Mutator configuration variables. + # - Scheduler ; Container for the Scheduler configuration variables. + # - File ; Container for the File configuration variables. + # - GBparser ; Container for the File configuration variables. + class Retriever() : """ - Container class for the Retriever configuration variables. + Container class for the Retriever configuration variables. - Public variables: - email ; Email address used for Entrez. - cache ; Location of the cache directory. - cachesize ; Maximum size of the cache directory in bytes. - maxDldSize ; Maximum size of a GenBank record in bytes. - minDldSize ; Minimum size of a GenBank record in bytes. - lrgURL ; base URL of LRG files + Public variables: + - email ; Email address used for Entrez. + - cache ; Location of the cache directory. + - cachesize ; Maximum size of the cache directory in bytes. + - maxDldSize ; Maximum size of a GenBank record in bytes. + - minDldSize ; Minimum size of a GenBank record in bytes. + - lrgURL ; base URL of LRG files. """ pass @@ -45,31 +42,31 @@ class Config() : class Db() : """ - Container class for the Db configuration variables. + Container class for the Db configuration variables. - Public variables: - internalDb ; Name of the internal database. - dbNames ; Name of the mapping databases - LocalMySQLuser ; Username for the local databases. - LocalMySQLhost ; Hostname of the local databases. + Public variables: + - internalDb ; Name of the internal database. + - dbNames ; Name of the mapping databases + - LocalMySQLuser ; Username for the local databases. + - LocalMySQLhost ; Hostname of the local databases. - RemoteMySQLuser ; Username for the remote UCSC database. - RemoteMySQLhost ; Hostname of the UCSC database server. - UpdateInterval ; Time window (in days) to search for - updates. - TempFile ; Location for downloaded updates. + - RemoteMySQLuser ; Username for the remote UCSC database. + - RemoteMySQLhost ; Hostname of the UCSC database server. + - UpdateInterval ; Time window (in days) to search for + updates. + - TempFile ; Location for downloaded updates. """ #Db class Output() : """ - Container class for the Output configuration variables. + Container class for the Output configuration variables. - Public variables: - log ; Name and location of the logfile. - datestring ; Prefix for log messages. - loglevel ; Default level for logging. - outputlevel ; Default level for output. + Public variables: + - log ; Name and location of the logfile. + - datestring ; Prefix for log messages. + - loglevel ; Default level for logging. + - outputlevel ; Default level for output. """ pass @@ -77,14 +74,14 @@ class Config() : class Mutator() : """ - Container class for the Mutator configuration variables. + Container class for the Mutator configuration variables. - Public variables: - flanksize ; Length of the flanking sequences in the - visualisation. - maxvissize ; Maximum length of the variation in the - visualisation. - flankclipsize ; Length of the inserted/deleted flanks. + Public variables: + - flanksize ; Length of the flanking sequences in the + visualisation. + - maxvissize ; Maximum length of the variation in the + visualisation. + - flankclipsize ; Length of the inserted/deleted flanks. """ pass @@ -92,14 +89,14 @@ class Config() : class Scheduler() : """ - Container class for the Scheduler configuration variables. + Container class for the Scheduler configuration variables. - Public variables: - processName ; Name of the scheduler in the process list. - mailFrom ; Return e-mail address. - mailMessage ; Template e-mail. - mailSubject ; Subject of the e-mail. - resultsDir ; Location of the results. + Public variables: + - processName ; Name of the scheduler in the process list. + - mailFrom ; Return e-mail address. + - mailMessage ; Template e-mail. + - mailSubject ; Subject of the e-mail. + - resultsDir ; Location of the results. """ pass @@ -107,10 +104,10 @@ class Config() : class Batch() : """ - Container class for the Scheduler configuration variables. + Container class for the Scheduler configuration variables. - Public variables: - PIDfile ; Location of the PID file + Public variables: + - PIDfile ; Location of the PID file. """ pass @@ -118,15 +115,15 @@ class Config() : class File() : """ - Container class for the File configuration variables. + Container class for the File configuration variables. - Public variables: - bufSize ; Amount of bytes to be read for determining the file + Public variables: + - bufSize ; Amount of bytes to be read for determining the file type. - header ; The obligatory header in batch request files. - tempDir ; Directory for temporary files. - threshold ; The threshold under which the percentage of errors - is allowed in a batchfile + - header ; The obligatory header in batch request files. + - tempDir ; Directory for temporary files. + - threshold ; The threshold under which the percentage of errors + is allowed in a batchfile. """ pass @@ -134,34 +131,36 @@ class Config() : class GBparser() : """ - Container class for the GBparser configuration variables. + Container class for the GBparser configuration variables. - Public variables: - upstream ; Number of upstream nucleotides when searching for a - transcript. - downstream ; Number of downstream nucleotides when searching for a - transcript. + Public variables: + - upstream ; Number of upstream nucleotides when searching for a + transcript. + - downstream ; Number of downstream nucleotides when searching for a + transcript. """ pass - #File + #GBparser class GenRecord() : pass def __init__(self) : """ - Initialise the class with variables read from the configuration - file. In principle, this is the only place in the code where a - hard coded constant is used (the name and path to the configuration - file). - - Public subclasses (altered): - Retriever ; Initialised with Retriever configuration variables. - Db ; Initialised with Db configuration variables. - Output ; Initialised with Output configuration variables. - Mutator ; Initialised with Mutator configuration variables. - Scheduler ; Initialised with Scheduler configuration variables. + Initialise the class with variables read from the configuration + file. In principle, this is the only place in the code where a + hard coded constant is used (the name and path to the configuration + file). + + Public subclasses (altered): + - Retriever ; Initialised with Retriever configuration variables. + - Db ; Initialised with Db configuration variables. + - Output ; Initialised with Output configuration variables. + - Mutator ; Initialised with Mutator configuration variables. + - Scheduler ; Initialised with Scheduler configuration variables. + + @requires: ConfigObj """ from configobj import ConfigObj # ConfigObj() diff --git a/src/Modules/Crossmap.py b/src/Modules/Crossmap.py index 24a0a89362e773e1674491a6da334f59343a8ec4..08c5747a43c41af32bc6662f814e7523f1c51b92 100644 --- a/src/Modules/Crossmap.py +++ b/src/Modules/Crossmap.py @@ -1,74 +1,86 @@ #!/usr/bin/python """ - Module for conversion from genomic coordinates to coding sequence - orientated coordinates and vice versa. - The conversions are done based upon a list of splice sites, the CDS start - and stop and the orientation of a transcript. +Module for conversion from genomic coordinates to coding sequence +orientated coordinates and vice versa. +The conversions are done based upon a list of splice sites, the CDS start +and stop and the orientation of a transcript. - Public classes: - Crossmap ; Convert from g. to c. or n. notation or vice versa. """ +#Public classes: +# - Crossmap ; Convert from g. to c. or n. notation or vice versa. class Crossmap() : """ - Convert from g. to c. or n. notation or vice versa. - - Private variables: - __STOP ; CDS stop in c. notation. - __crossmapping ; A list that contains either c. or n. positions - corresponding to the g. positions in the RNA list. - - Public variables: - RNA ; The list of RNA splice sites. - CDS ; CDS start and stop (if present). - orientation ; The orientation of the transcript: 1 = forward - -1 = reverse. - - Special methods: - __init__(RNA, CDS, orientation) ; Initialise the class and do the - cross mapping of the splice - sites. - - Private methods: - __plus(a, b) ; A protected '+' that skips 0 if - a <= 0 and a + b >= 0. - __minus(a, b) ; A protected '-' that skips 0 if - a >= 0 and a - b <= 0. - __minusr(a, b) ; A protected '-' that skips 0 if - a > 0 and b < 0. - __crossmap_splice_sites() ; Calculate the __crossmapping list. - - Public methods: - int2main(a) ; Translate from __STOP to '*' notation. - main2int(s) ; Translate from '*' to __STOP notation. - g2x(a) ; Translate from g. notation to c. or n. notation. - x2g(a, b) ; Translate c. or n. notation to g. notation. + Convert from I{g.} to I{c.} or I{n.} notation or vice versa. + + Private variables: + - __STOP ; CDS stop in I{c.} notation. + - __crossmapping ; A list that contains either I{c.} or I{n.} positions + corresponding to the I{g.} positions in the RNA list. + + Public variables: + - RNA ; The list of RNA splice sites. + - CDS ; CDS start and stop (if present). + - orientation ; The orientation of the transcript: 1 = forward + -1 = reverse. + + Special methods: + - __init__(RNA, CDS, orientation) ; Initialise the class and do the + cross mapping of the splice sites. + + Private methods: + - __plus(a, b) ; A protected '+' that skips 0 if + a <= 0 and a + b >= 0. + - __minus(a, b) ; A protected '-' that skips 0 if + a >= 0 and a - b <= 0. + - __minusr(a, b) ; A protected '-' that skips 0 if + a > 0 and b < 0. + - __crossmap_splice_sites() ; Calculate the __crossmapping list. + + Public methods: + - int2main(a) ; Translate from __STOP to '*' notation. + - main2int(s) ; Translate from '*' to __STOP notation. + - g2x(a) ; Translate from I{g.} notation to I{c.} or I{n.} notation. + - x2g(a, b) ; Translate I{c.} or I{n.} notation to I{g.} notation. + - int2offset(t) ; Convert a tuple of integers to offset-notation. + - offset2int(s) ; Convert an offset in HGVS notation to an integer. + - tuple2string(t) ; Convert a tuple (main, offset) in __STOP notation + to I{c.} notation. + - g2c(a) ; Uses both g2x() and tuple2string() to translate a genomic + position to __STOP notation to I{c.} notation. + - info() ; Return transcription start, transcription end and CDS stop. + - getSpliceSite(number) ; Return the coordinate of a splice site. + - numberOfIntrons() ; Returns the number of introns. + - numberOfExons() ; Returns the number of exons. """ def __init__(self, RNA, CDS, orientation) : """ - Initialise the class and do the cross mapping of the splice sites. - - Arguments: - RNA ; The list of RNA splice sites. - CDS ; CDS start and stop (may be empty). - orientation ; The orientation of the transcript. - - Private variables (altered): - __STOP ; CDS stop in c. notation. - __crossmapping ; A list that contains either c. or n. positions - corresponding to the g. positions in the RNA - list. - __trans_start ; Transcription start site in c. notation. - __trans_end ; Transcription end side in c. notation. - - Public variables (altered): - RNA ; The list of RNA splice sites. - CDS ; CDS start and stop (if present). - orientation ; The orientation of the transcript: 1 = forward - -1 = reverse. - """ + Initialise the class and do the cross mapping of the splice sites. + + Private variables (altered): + - __STOP ; CDS stop in I{c.} notation. + - __crossmapping ; A list that contains either I{c.} or I{n.} + positions corresponding to the I{g.} positions in + the RNA list. + - __trans_start ; Transcription start site in I{c.} notation. + - __trans_end ; Transcription end side in I{c.} notation. + + @arg RNA: The list of RNA splice sites + @type RNA: list + @arg CDS: CDS start and stop (if present, may be empty) + @type CDS: list + @arg orientation: The orientation of the transcript + - 1 = forward + - E{-}1 = reverse + @type orientation: integer + """ +# Public variables (altered): +# - RNA ; The list of RNA splice sites. +# - CDS ; CDS start and stop (if present). +# - orientation ; The orientation of the transcript: 1 = forward +# -1 = reverse. self.__STOP = None self.__crossmapping = len(RNA) * [None] @@ -88,16 +100,17 @@ class Crossmap() : def __plus(self, a, b) : """ - This method returns a + b unless a is smaller than zero and the - result is larger than zero, in that case it returns a + b + 1. - In effect the number 0 is skipped while adding. + This method returns a + b unless a is smaller than zero and the + result is larger than zero, in that case it returns a + b + 1. + In effect the number 0 is skipped while adding. - Arguments: - a ; First argument of the addition. - b ; Second argument of the addition. + @arg a: First argument of the addition + @type a: integer + @arg b: Second argument of the addition + @type b: integer - Returns: - integer ; a + b or a + b + 1. + @return: a + b or a + b + 1 + @rtype: integer """ r = a + b @@ -110,16 +123,17 @@ class Crossmap() : def __minus(self, a, b) : """ - This method returns a - b unless a is larger than zero and the - result is smaller than zero, in that case it returns (a - b) - 1. - In effect the number 0 is skipped while subtracting. + This method returns a - b unless a is larger than zero and the + result is smaller than zero, in that case it returns (a - b) - 1. + In effect the number 0 is skipped while subtracting. - Arguments: - a ; First argument of the subtraction. - b ; Second argument of the subtraction. + @arg a: First argument of the subtraction + @type a: integer + @arg b: Second argument of the subtraction + @type b: integer - Returns: - integer ; a - b or (a - b) - 1. + @return: a - b or (a - b) - 1 + @rtype: integer """ r = a - b @@ -132,16 +146,17 @@ class Crossmap() : def __minusr(self, a, b) : """ - This method returns a - b unless a is larger than zero and b is - smaller than zero, in that case it returns (a - b) - 1. - In effect the number 0 is skipped while subtracting. + This method returns a - b unless a is larger than zero and b is + smaller than zero, in that case it returns (a - b) - 1. + In effect the number 0 is skipped while subtracting. - Arguments: - a ; First argument of the subtraction. - b ; Second argument of the subtraction. + @arg a: First argument of the subtraction + @type a: integer + @arg b: Second argument of the subtraction + @type b: integer - Returns: - integer ; a - b or (a - b) - 1. + @return: a - b or (a - b) - 1 + @rtype: integer """ r = a - b @@ -154,46 +169,50 @@ class Crossmap() : def __crossmap_splice_sites(self) : """ - This method calculates either: - 1: The c. notation of the CDS start and stop, including splice - sites. - 2: The c. notation of the RNA splice sites. - 3: The n. notation of the RNA splice sites. - - For option 1 only provide an list with CDS splice sites. - For option 2 provide an list with RNA splice sites and one with - the CDS start and stop. - For option 3 only provide an list with RNA splice sites. - - Examples: - - Get the n. notation of the RNA splice sites. The input is in + This method calculates either: + 1. The I{c.} notation of the CDS start and stop, including splice + sites. + 2. The I{c.} notation of the RNA splice sites. + 3. The I{n.} notation of the RNA splice sites. + + For option 1 only provide an list with CDS splice sites. + For option 2 provide an list with RNA splice sites and one with + the CDS start and stop. + For option 3 only provide an list with RNA splice sites. + + Examples: + + Crossmap(RNA, [], 1) + - Get the I{n.} notation of the RNA splice sites. The input is in forward notation. - Crossmap(RNA, [], 1) - - Get the c. notation of the CDS start and stop, and the internal + + Crossmap(CDS, [], 1) + - Get the I{c.} notation of the CDS start and stop, and the internal splice sites. The input is in forward notation. - Crossmap(CDS, [], 1) - - Get the c. notation of the RNA splice sites. The input is in + + Crossmap(RNA, CDS, -1) + - Get the I{c.} notation of the RNA splice sites. The input is in reverse complement. - Crossmap(RNA, CDS, -1) + - The output is straightforward, except for the c. notation of the - downstream RNA splice sites. This is denoted by __STOP + the - distance to the stop codon, as an alternative to the *-notation. + The output is straightforward, except for the I{c.} notation of the + downstream RNA splice sites. This is denoted by __STOP + the + distance to the stop codon, as an alternative to the *-notation. - Private variables (altered): - __crossmapping ; A list that contains either c. or n. positions - corresponding to the g. positions in the RNA - list. + Private variables (altered): + - __crossmapping ; A list that contains either I{c.} or I{n.} positions + corresponding to the I{g.} positions in the RNA + list. - Private variables: - __STOP ; A large number to indicate positions after CDS - stop. + Private variables: + - __STOP ; A large number to indicate positions after CDS + stop. - Public variables: - RNA ; The list of RNA splice sites. - CDS ; CDS start and stop (if present). - orientation ; The orientation of the transcript: 1 = forward - -1 = reverse. + Public variables: + - RNA ; The list of RNA splice sites. + - CDS ; CDS start and stop (if present). + - orientation ; The orientation of the transcript: 1 = forward, + -1 = reverse. """ RNAlen = len(self.RNA) @@ -237,47 +256,49 @@ class Crossmap() : def g2x(self, a) : """ - This function calculates either: - 1: The n. notation from a g. notation. - 2: The c. notation from a g. notation. - - For option 1 only provide an array with mRNA splice sites and one - with the c. notation of the splice sites. - For option 2 provide an array with mRNA splice sites, one with the - c. notation of the splice sites and an array with the CDS start and - stop. - - Examples: - - Get the n. notation of a g. position i. The input is in forward + This function calculates either: + 1. The I{n.} notation from a I{g.} notation. + 2. The I{c.} notation from a I{g.} notation. + + For option 1 only provide an array with mRNA splice sites and one + with the I{c.} notation of the splice sites. + For option 2 provide an array with mRNA splice sites, one with the + I{c.} notation of the splice sites and an array with the CDS start and + stop. + + Examples: + + Crossmap(RNA, [], 1) + g2x(i) + - Get the I{n.} notation of a I{g.} position i. The input is in forward + notation. + + Crossmap(mRNA, CDS, -1); + g2x(i); + - Get the I{c.} notation of a I{g.} position i. The input is in reverse notation. - Crossmap(RNA, [], 1) - g2x(i) - - Get the c. notation of a g. position i. The input is in reverse - notation. - Crossmap(mRNA, CDS, -1); - g2x(i); - The output is fully compatible with the HVGS nomenclature as - defined on 01-07-2009. + The output is fully compatible with the HVGS nomenclature as defined + on 01-07-2009. - Arguments: - a ; The genomic position that must be translated. + Private variables: + - __crossmapping ; A list that contains either I{c.} or I{n.} positions + corresponding to the I{g.} positions in the RNA + list. + - __STOP ; A large number to indicate positions after CDS + stop. - Private variables: - __crossmapping ; A list that contains either c. or n. positions - corresponding to the g. positions in the RNA - list. - __STOP ; A large number to indicate positions after CDS - stop. + Public variables: + - RNA ; The list of RNA splice sites. + - CDS ; CDS start and stop (if present). + - orientation ; The orientation of the transcript: 1 = forward, + -1 = reverse. - Public variables: - RNA ; The list of RNA splice sites. - CDS ; CDS start and stop (if present). - orientation ; The orientation of the transcript: 1 = forward - -1 = reverse. + @arg a: The genomic position that must be translated + @type a: integer - Returns: - string ; The c. or n. notation of position a. + @return: The I{c.} or I{n.} notation of position a + @rtype: string """ # TODO update documentation. @@ -313,39 +334,42 @@ class Crossmap() : def x2g(self, a, b) : """ - This function calculates either: - 1: The g. notation from a n. notation. - 2: The g. notation from a c. notation. + This function calculates either: + 1. The I{g.} notation from a I{n.} notation. + 2. The I{g.} notation from a I{c.} notation. - Whether option 1 or 2 applies depends on the content of mRNAm. + Whether option 1 or 2 applies depends on the content of mRNAm. - Examples: - - Get the g. notation of a n. position i. The input is in forward - notation. - Crossmap(RNA, [], 1) - x2g(i) - - Get the g. notation of a c. position i with offset j. The input - is in reverse notation. - Crossmap(mRNA, CDS, -1); - x2g(i, j); + Examples: - Arguments: - a ; The n. or c. position to be translated. - b ; The offset of position a. + Crossmap(RNA, [], 1) + x2g(i) + - Get the I{g.} notation of a I{n.} position i. The input is in forward + notation. - Private variables: - __crossmapping ; A list that contains either c. or n. positions - corresponding to the g. positions in the RNA - list. + Crossmap(mRNA, CDS, -1); + x2g(i, j); + - Get the I{g.} notation of a I{c.} position i with offset j. The input + is in reverse notation. - Public variables: - RNA ; The list of RNA splice sites. - orientation ; The orientation of the transcript: 1 = forward - -1 = reverse. + Private variables: + - __crossmapping ; A list that contains either I{c.} or I{n.} positions + corresponding to the I{g.} positions in the RNA + list. + + Public variables: + - RNA ; The list of RNA splice sites. + - orientation ; The orientation of the transcript: 1 = forward + -1 = reverse. - Returns: - integer ; A g. position. + @arg a: The I{n.} or I{c.} position to be translated + @type a: integer + @arg b: The offset of position a + @type b: integer + + @return: A I{g.} position + @rtype: integer """ d = self.orientation @@ -373,16 +397,16 @@ class Crossmap() : def int2main(self, a) : """ - This method converts the __STOP notation to the '*' notation. + This method converts the __STOP notation to the '*' notation. - Arguments: - a ; An integer in __STOP notation. + Private variables: + - __STOP ; CDS stop in I{c.} notation. - Private variables: - __STOP ; CDS stop in c. notation. + @arg a: An integer in __STOP notation + @type a: integer - Returns: - string ; The converted notation (may be unaltered). + @return: The converted notation (may be unaltered) + @rtype: string """ if a > self.__STOP : @@ -393,16 +417,16 @@ class Crossmap() : def main2int(self, s) : """ - This method converts the '*' notation to the __STOP notation. + This method converts the '*' notation to the __STOP notation. - Arguments: - s ; A string in '*' notation. + Private variables: + - __STOP ; CDS stop in I{c.} notation. - Private variables: - __STOP ; CDS stop in c. notation. + @arg s: A string in '*' notation + @type s: string - Returns: - integer ; The converted notation (may be unaltered). + @return: The converted notation (may be unaltered) + @rtype: integer """ if s[0] == '*' : @@ -413,15 +437,15 @@ class Crossmap() : def int2offset(self, t) : """ - Convert a tuple of integers to offset-notation. This adds a `+', - and `u' or `d' to the offset when appropriate. The main value is - not returned. + Convert a tuple of integers to offset-notation. This adds a `+', + and `u' or `d' to the offset when appropriate. The main value is + not returned. - Arguments: - t ; A tuple of integers: (main, offset) in __STOP notation. + @arg t: A tuple of integers: (main, offset) in __STOP notation + @type t: tuple - Returns: - string ; The offset in HGVS notation. + @return: The offset in HGVS notation + @rtype: string """ if t[1] > 0 : # The exon boundary is downstream. @@ -439,15 +463,15 @@ class Crossmap() : def offset2int(self, s) : """ - Convert an offset in HGVS notation to an integer. This removes - `+', `u' and `d' when present. It also converts a `?' to something - sensible. + Convert an offset in HGVS notation to an integer. This removes + `+', `u' and `d' when present. It also converts a `?' to something + sensible. - Arguments: - s ; An offset in HGVS notation. + @arg s: An offset in HGVS notation + @type s: string - Returns: - int ; The offset as an integer. + @return: The offset as an integer + @rtype: integer """ if not s : # No offset given. @@ -468,36 +492,52 @@ class Crossmap() : def tuple2string(self, t) : """ - Convert a tuple (main, offset) in __STOP notation to c. notation. + Convert a tuple (main, offset) in __STOP notation to I{c.} notation. - Arguments: - t ; A tuple (main, offset) in __STOP notation. + @arg t: A tuple (main, offset) in __STOP notation + @type t: tuple - Returns: - string ; The position in HGVS notation. + @return: The position in HGVS notation + @rtype: string """ return str(self.int2main(t[0])) + str(self.int2offset(t)) #tuple2string def g2c(self, a) : + """ + Uses both g2x() and tuple2string() to translate a genomic position + to __STOP notation to I{c.} notation. + + @arg a: The genomic position that must be translated + @type a: integer + + @return: The position in HGVS notation + @rtype: string + """ return self.tuple2string(self.g2x(a)) #g2c def info(self) : """ - Return transcription start, transcription end and CDS stop. + Return transcription start, transcription end and CDS stop. - Returns: - triple ; (trans_start, trans_stop, CDS_stop) + @return: (trans_start, trans_stop, CDS_stop) + @rtype: triple """ return (self.__trans_start, self.__trans_end, self.__STOP) #info def getSpliceSite(self, number) : - #TODO documentation """ + Return the coordinate of a splice site. + + @arg number: the number of the RNA splice site counting from + transcription start. + @type number: integer + @return: coordinate of the RNA splice site. + @rtype: integer """ if self.orientation == 1 : @@ -506,15 +546,21 @@ class Crossmap() : #getSpliceSite def numberOfIntrons(self) : - #TODO documentation """ + Returns the number of introns. + + @return: number of introns + @rtype: integer """ return len(self.RNA) / 2 - 1 def numberOfExons(self) : - #TODO documentation """ + Returns the number of exons. + + @return: number of exons + @rtype: integer """ return len(self.RNA) / 2 diff --git a/src/Modules/Db.py b/src/Modules/Db.py index 4e40c521b43c4bf5e29160dfbcd1ce50357d3c40..375de4e8a945b8944f7a9442a792673284e75a5f 100644 --- a/src/Modules/Db.py +++ b/src/Modules/Db.py @@ -1,21 +1,27 @@ #!/usr/bin/python """ - Module for database access. - The Db class is a superclass of the rest of the classes and should not be - used as such. The superclass mainly consists of a wrapper for SQL - statements. - - - Public classes: - Db ; Log in to a database and keep it open for queries. - Mapping ; Mapping of transcripts and genes. - Remote ; Retrieving updates for the mapping databases. - Update ; Updating the mapping databases. - Cache ; Cache administration. - Batch ; Batch checker. +Module for database access. +The Db class is a superclass of the rest of the classes and should not be +used as such. The superclass mainly consists of a wrapper for SQL +statements. + +@requires: MySQLdb +@requires: types +@requires: time +@requires: os +@requires: Modules.Misc """ +#Public classes: +# - Db ; Log in to a database and keep it open for queries. +# - Mapping ; Mapping of transcripts and genes. +# - Remote ; Retrieving updates for the mapping databases. +# - Update ; Updating the mapping databases. +# - Cache ; Cache administration. +# - Batch ; Batch checker. + + import MySQLdb # connect(), escape_string() import types # TupleType import time # strftime() @@ -32,29 +38,31 @@ from Modules import Misc # ID() class Db() : """ - Log in to a database and keep it open for queries. + Log in to a database and keep it open for queries. - Private variables: - __db ; Interface to the database. + Private variables: + - __db ; Interface to the database. - Special methods: - __init__(dbName, mySqlUser, mySqlHost) ; Do the login. + Special methods: + - __init__(dbName, mySqlUser, mySqlHost) ; Do the login. - Public methods: - query(statement) ; General query function. + Public methods: + - query(statement) ; General query function. """ def __init__(self, dbName, mySqlUser, mySqlHost) : """ - Log in to the database. + Log in to the database. - Arguments: - dbName ; The name of the database to use. - mySqlUser ; User name for the database. - mySqlHost ; Host name for the database. + Private variables (altered): + - __db ; The interface to the database. - Private variables (altered): - __db ; The interface to the database. + @arg dbName: The name of the database to use + @type dbName: string + @arg mySqlUser: User name for the database + @type mySqlUser: string + @arg mySqlHost: Host name for the database + @type mySqlHost: string """ self.__db = MySQLdb.connect(user = mySqlUser, db = dbName, @@ -63,17 +71,16 @@ class Db() : def query(self, statement) : """ - Query the database. + Query the database. - Arguments: - statement ; The statement that is to be queried, consists of - a tuple: (string, (args)). + Private variables: + - __db ; Interface to the database. - Returns: - list ; The result of the query. + @arg statement: The statement that is to be queried + @type statement: tuple (string, (args)) - Private variables: - __db ; Interface to the database. + @return: The result of the query + @rtype: list """ # Convert the arguments to a tuple. @@ -107,38 +114,38 @@ class Db() : class Mapping(Db) : """ - Database functions for mapping of transcripts and genes. - - Special methods: - __init__(build, config) ; Initialise the class. - - Public methods: - get_protAcc(mrnaAcc) ; Query the database for a protein ID. - get_NM_info(mrnaAcc) ; Retrieve various data for an NM number. - get_NM_version(mrnaAcc) ; Get the version number of an accession - number. - get_Transcripts(chrom, ; Get a list of transcripts, given a - position, chromosome and a range. - overlap) - get_GeneName(mrnaAcc) ; Get the gene name, given an NM number. - isChrom(name) ; Check whether we know this name to be - a chromosome name. - - Inherited methods from Db: - query(statement) ; General query function. - - SQL tables from dbNames: - map ; Accumulated mapping info. + Database functions for mapping of transcripts and genes. + + Special methods: + - __init__(build, config) ; Initialise the class. + + Public methods: + - get_protAcc(mrnaAcc) ; Query the database for a protein ID. + - get_NM_info(mrnaAcc) ; Retrieve various data for an NM number. + - get_NM_version(mrnaAcc) ; Get the version number of an accession + number. + - get_Transcripts(chrom, p1, p2, overlap) ; Get a list of transcripts, + given a chromosome and a range. + - get_GeneName(mrnaAcc) ; Get the gene name, given an NM number. + - isChrom(name) ; Check whether we know this name to be + a chromosome name. + + Inherited methods from Db: + - query(statement) ; General query function. + + SQL tables from dbNames: + - map ; Accumulated mapping info. """ def __init__(self, build, config) : """ - Initialise the Db parent class. Use the local database for a - certain build. + Initialise the Db parent class. Use the local database for a certain + build. - Arguments: - build ; The version of the mapping database. - config ; Configuration variables. + @arg build: The version of the mapping database + @type build: string + @arg config: Configuration variables + @type config: class instance """ Db.__init__(self, build, config.LocalMySQLuser, config.LocalMySQLhost) @@ -146,16 +153,16 @@ class Mapping(Db) : def get_protAcc(self, mrnaAcc) : """ - Query the database for a protein ID given an mRNA ID. + Query the database for a protein ID given an mRNA ID. - Arguments: - mrnaAcc ; The ID of an mRNA. + SQL tables from dbNames: + - map ; Accumulated mapping info. - SQL tables from dbNames: - map ; Accumulated mapping info. + @arg mrnaAcc: The ID of an mRNA + @type mrnaAcc: string - Returns: - string ; The protein ID . + @return: The protein ID + @rtype: string """ statement = """ @@ -167,25 +174,28 @@ class Mapping(Db) : return self.query(statement)[0][0] #get_protAcc - def get_NM_info(self, mrnaAcc, version=None) : + def get_NM_info(self, mrnaAcc, version = None) : """ - Retrieve various data for an NM number. + Retrieve various data for an NM number. - Arguments: - mrnaAcc ; The ID of an mRNA. + SQL tables from dbNames: + - map ; Accumulated mapping info. - SQL tables from dbNames: - map ; Accumulated mapping info. + @arg mrnaAcc: The ID of an mRNA + @type mrnaAcc: string + @arg version: version number of the accession number (not used) + @type version: integer - Returns: - list: - exonStarts ; List of exon start sites. - exonEnds ; List of exon end sites. - cdsStart ; Position of the start codon. - cdsEnd ; Position of the end codon. - strand ; The orientation of the gene (+ = forward, - - = reverse). + @return: + - exonStarts ; List of exon start sites. + - exonEnds ; List of exon end sites. + - cdsStart ; Position of the start codon. + - cdsEnd ; Position of the end codon. + - strand ; Orientation of the gene (+ = forward, + - = reverse) + @rtype: list """ + statement = """ SELECT exonStarts, exonEnds, cdsStart, cdsEnd, strand FROM map @@ -197,16 +207,16 @@ class Mapping(Db) : def get_NM_version(self, mrnaAcc) : """ - Get the version number of an accession number. + Get the version number of an accession number. - Arguments: - mrnaAcc ; The ID of an mRNA. + SQL tables from dbNames: + - map ; Accumulated mapping info. - SQL tables from dbNames: - map ; Accumulated mapping info. + @arg mrnaAcc: The ID of an mRNA + @type mrnaAcc: string - Returns: - integer ; The version number. + @return: The version number + @rtype: integer """ statement = """ @@ -220,18 +230,19 @@ class Mapping(Db) : def getAllFields(self, mrnaAcc, version): """ - Get all Fields of an accession number and version number. - If the version number is None, use the "newest" version number + Get all Fields of an accession number and version number. + If the version number is None, use the "newest" version number. - Arguments: - mrnaAcc ; The ID of an mRNA. - version ; The version number + SQL tables from dbNames: + - map ; Accumulated mapping info. - SQL tables from dbNames: - map ; Accumulated mapping info. + @arg mrnaAcc: The ID of an mRNA + @type mrnaAcc: string + @arg version: The version number + @type version: integer - Returns: - integer ; The version number. + @return: The version number + @rtype: integer """ q = """ @@ -260,26 +271,29 @@ class Mapping(Db) : def get_Transcripts(self, chrom, p1, p2, overlap) : """ - Get a list of transcripts, given a chromosome and a range. If - all transcripts that are hit should be returned, set overlap to 1, - if only the transcripts that completely reside within a range - should be returned, set overlap to 0. - - Arguments: - chrom ; The chromosome (coded as "chr1", ..., "chrY"). - p1 ; The position relative to the start of the chromosome. - p2 ; The position relative to the start of the chromosome. - overlap ; Specify the behaviour of the selection: - 0 ; Return only the transcripts that completely fall - in the range [p1, p2]. - 1 ; Return all hit transcripts. - - SQL tables from dbNames: - map ; Accumulated mapping info. + Get a list of transcripts, given a chromosome and a range. If + all transcripts that are hit should be returned, set overlap to 1, + if only the transcripts that completely reside within a range + should be returned, set overlap to 0. - Returns: - list ; All accession numbers that are hit according to the - overlap criterium. + SQL tables from dbNames: + - map ; Accumulated mapping info. + + @arg chrom: The chromosome (coded as "chr1", ..., "chrY") + @type chrom: string + @arg p1: The position relative to the start of the chromosome + @type p1: integer + @arg p2: The position relative to the start of the chromosome + @type p2: integer + @arg overlap: Specify the behaviour of the selection: + - 0 ; Return only the transcripts that completely fall in the + range [p1, p2] + - 1 ; Return all hit transcripts + @type overlap: boolean + + @return: All accession numbers that are hit according to the overlap + criterium + @rtype: list """ q = """ select acc, @@ -342,16 +356,16 @@ class Mapping(Db) : def get_GeneName(self, mrnaAcc) : """ - Get the name of a gene, given a transcript identifier (NM number). + Get the name of a gene, given a transcript identifier (NM number). - Arguments: - mrnaAcc ; The ID of an mRNA. + SQL tables from dbNames: + - map ; Accumulated mapping info. - SQL tables from dbNames: - map ; Accumulated mapping info. + @arg mrnaAcc: The ID of an mRNA + @type mrnaAcc: string - Returns: - string ; The gene name. + @return: The gene name + @rtype: string """ statement = """ @@ -368,17 +382,17 @@ class Mapping(Db) : def isChrom(self, name) : """ - Check if the given name is a valid chromosome name. + Check if the given name is a valid chromosome name. - Arguments: - name ; The name to be tested. + SQL tables from dbNames: + - map ; Accumulated mapping info. - SQL tables from dbNames: - map ; Accumulated mapping info. + @arg name: The name to be tested + @type name: string - Returns: - boolean ; True if the name is found to be a chromosome name, - False otherwise. + @return: True if the name is found to be a chromosome name, False + otherwise + @rtype: boolean """ statement = """ @@ -394,16 +408,16 @@ class Mapping(Db) : def chromName(self, accNo) : """ - Get the name of a chromosome, given an accession number. + Get the name of a chromosome, given an accession number. - Arguments: - accNo ; The accession number of a chromosome. + SQL tables from dbNames: + - ChrName ; Assembly release notes. - SQL tables from dbNames: - ChrName ; Assembly release notes. + @arg accNo: The accession number of a chromosome + @type accNo: string - Returns: - string ; The name of a chromosome. + @return: The name of a chromosome + @rtype: string """ statement = """ @@ -420,16 +434,16 @@ class Mapping(Db) : def chromAcc(self, name) : """ - Get the accession number of a chromosome, given a name. + Get the accession number of a chromosome, given a name. - Arguments: - name ; The name of a chromosome. + SQL tables from dbNames: + - ChrName ; Assembly release notes. - SQL tables from dbNames: - ChrName ; Assembly release notes. + @arg name: The name of a chromosome + @type name: string - Returns: - string ; The accession number of a chromosome. + @return: The accession number of a chromosome + @rtype: string """ statement = """ @@ -446,17 +460,16 @@ class Mapping(Db) : def get_chromName(self, acc) : """ - Get the chromosome name, given a transcript identifier (NM number). + Get the chromosome name, given a transcript identifier (NM number). - Arguments: - acc ; The NM accession number (version NOT included) + SQL tables from dbNames: + - map ; Accumulated mapping info. - SQL tables from dbNames: - map ; . - - Returns: - string ; The chromosome name (e.g. chr1) + @arg acc: The NM accession number (version NOT included) + @type acc: string + @return: The chromosome name (e.g. chr1) + @rtype: string """ statement = """ @@ -474,37 +487,38 @@ class Mapping(Db) : class Remote(Db) : """ - Database functions for retrieving updates for the mapping databases. + Database functions for retrieving updates for the mapping databases. - Special methods: - __init__(config) ; Initialise the class. + Special methods: + - __init__(config) ; Initialise the class. - Public methods: - get_Update() ; Retrieve new mapping info from the UCSC. + Public methods: + - get_Update() ; Retrieve new mapping info from the UCSC. - Inherited methods from Db: - query(statement) ; General query function. + Inherited methods from Db: + - query(statement) ; General query function. - SQL tables from dbNames: - gbStatus ; acc -> version mapping (NM to NM + version), - type, modDate - refGene ; name -> geneName mapping (NM to gene name), - txStart, txEnd, cdsStart, cdsEnd, exonStarts, - exonEnds, chrom, strand. - refLink ; mrnaAcc -> protAcc mapping (NM to NP). + SQL tables from dbNames: + - gbStatus ; acc -> version mapping (NM to NM + version), + type, modDate + - refGene ; name -> geneName mapping (NM to gene name), + txStart, txEnd, cdsStart, cdsEnd, exonStarts, + exonEnds, chrom, strand. + - refLink ; mrnaAcc -> protAcc mapping (NM to NP). """ def __init__(self, build, config) : """ - Initialise the Db parent class. Use the remote database for a - certain build. + Initialise the Db parent class. Use the remote database for a + certain build. - Arguments: - build ; The version of the mapping database. - config ; Configuration variables. + Private variables (altered): + - __config ; Configuration variables. - Private variables (altered): - __config ; Configuration variables. + @arg build: The version of the mapping database + @type build: string + @arg config: Configuration variables + @type config: class instance """ self.__config = config @@ -513,22 +527,22 @@ class Remote(Db) : def get_Update(self) : """ - Retrieve all mapping updates from the UCSC within a certain time - window (defined in the configuration file) and gather the results - into one mapping table. + Retrieve all mapping updates from the UCSC within a certain time + window (defined in the configuration file) and gather the results + into one mapping table. - The results will be written to a temporary file (also defined in - the configuration file) to be imported in the local database with - the load_Update() function. + The results will be written to a temporary file (also defined in + the configuration file) to be imported in the local database with + the load_Update() function. - SQL tables from dbNames: - gbStatus ; acc -> version mapping (NM to NM + version), - type, modDate - refGene ; name -> geneName mapping (NM to gene name), - txStart, txEnd, cdsStart, cdsEnd, exonStarts, - exonEnds, chrom, strand. - refLink ; mrnaAcc -> protAcc mapping (NM to NP). + SQL tables from dbNames: + - gbStatus ; acc -> version mapping (NM to NM + version), + type, modDate + - refGene ; name -> geneName mapping (NM to gene name), + txStart, txEnd, cdsStart, cdsEnd, exonStarts, + exonEnds, chrom, strand. + - refLink ; mrnaAcc -> protAcc mapping (NM to NP). """ statement = """ @@ -557,44 +571,45 @@ class Remote(Db) : class Update(Db) : """ - Database functions for updating the mapping databases. - - Public methods: - load_Update() ; Load new mapping info into the local database. - count_Updates() ; Count the number of entries in the new - mapping info table. - backup_cdsUpdates() ; Make a backup of updates that overwrite the - old mapping info. - count_cdsUpdates() ; Count the number of updates that overwrite - the old mapping info. - merge_cdsUpdates() ; Merge the backup of old mapping info with the - other old info. - merge_Update() ; Merge the new mapping info from the UCSC with - what we already have. - - Inherited methods from Db: - query(statement) ; General query function. - - SQL tables from dbNames: - map ; Accumulated mapping info. - map_temp ; Newly found data. - map_new ; Merge of map_temp and map. - map_cdsBackup_temp ; Entries that were updated without an increment - of the version number. - map_cdsBackup ; Merge of map_cdsBackup_temp and itself. + Database functions for updating the mapping databases. + + Public methods: + - load_Update() ; Load new mapping info into the local database. + - count_Updates() ; Count the number of entries in the new + mapping info table. + - backup_cdsUpdates() ; Make a backup of updates that overwrite the + old mapping info. + - count_cdsUpdates() ; Count the number of updates that overwrite + the old mapping info. + - merge_cdsUpdates() ; Merge the backup of old mapping info with the + other old info. + - merge_Update() ; Merge the new mapping info from the UCSC with + what we already have. + + Inherited methods from Db: + - query(statement) ; General query function. + + SQL tables from dbNames: + - map ; Accumulated mapping info. + - map_temp ; Newly found data. + - map_new ; Merge of map_temp and map. + - map_cdsBackup_temp ; Entries that were updated without an increment + of the version number. + - map_cdsBackup ; Merge of map_cdsBackup_temp and itself. """ def __init__(self, build, config) : """ - Initialise the Db parent class. Use the remote database for a - certain build. + Initialise the Db parent class. Use the remote database for a + certain build. - Arguments: - build ; The version of the mapping database. - config ; Configuration variables. + Private variables (altered): + - __config ; Configuration variables. - Private variables (altered): - __config ; Configuration variables. + @arg build: The version of the mapping database + @type build: string + @arg config: Configuration variables + @type config: class instance """ self.__config = config @@ -603,15 +618,15 @@ class Update(Db) : def load_Update(self) : """ - Load the updates from the temporary file (defined in the - configuration file) created by the get_Update() function and import - it in the local database. + Load the updates from the temporary file (defined in the + configuration file) created by the get_Update() function and import + it in the local database. - SQL tables from dbNames (altered): - map_temp ; Created and loaded with data from TempFile. + SQL tables from dbNames (altered): + - map_temp ; Created and loaded with data from TempFile. - SQL tables from dbNames: - map ; Accumulated mapping info. + SQL tables from dbNames: + - map ; Accumulated mapping info. """ # The statements in this function may be combined when MYSQL_BUG is @@ -633,15 +648,14 @@ class Update(Db) : def count_Updates(self) : """ - Count the number of updates. This function will only work if it - is preceeded by the load_Update() function. Otherwise the map_temp - table may not exist. This function can not be used after the - merge_Update() function has been executed, since it drops the - map_temp table. + Count the number of updates. This function will only work if it + is preceeded by the load_Update() function. Otherwise the map_temp + table may not exist. This function can not be used after the + merge_Update() function has been executed, since it drops the + map_temp table. - Returns: - int ; The number of entries in the table of updated mapping - info. + @return: The number of entries in the table of updated mapping info + @rtype: integer """ statement = """ @@ -654,19 +668,19 @@ class Update(Db) : def backup_cdsUpdates(self) : """ - Copy all mapping entries where there was an update, but no - increment in the version number, to a backup table. Note that - we use acc, version, txStart as the primary key because members - of a gene family are mapped multiple times. + Copy all mapping entries where there was an update, but no + increment in the version number, to a backup table. Note that + we use acc, version, txStart as the primary key because members + of a gene family are mapped multiple times. - SQL tables from dbNames (altered): - map_cdsBackup_temp ; Created and filled with entries that - were updated without an increment of the - version number. + SQL tables from dbNames (altered): + - map_cdsBackup_temp ; Created and filled with entries that + were updated without an increment of the + version number. - SQL tables from dbNames: - map ; Accumulated mapping info. - map_temp ; Freshly downloaded mapping info. + SQL tables from dbNames: + - map ; Accumulated mapping info. + - map_temp ; Freshly downloaded mapping info. """ statement = """ @@ -687,18 +701,18 @@ class Update(Db) : def count_cdsUpdates(self) : """ - Count the number of mapping entries that have changed without an - increment in the version number. This function can only be called - after backup_cdsUpdates() has been executed and before - merge_cdsUpdates has been executed. + Count the number of mapping entries that have changed without an + increment in the version number. This function can only be called + after backup_cdsUpdates() has been executed and before + merge_cdsUpdates has been executed. - SQL tables from dbNames: - map_cdsBackup_temp ; Entries that wre updated without an - increment of the version number. + SQL tables from dbNames: + - map_cdsBackup_temp ; Entries that wre updated without an + increment of the version number. - Returns: - int ; The number of mapping entries that have changed without - an increment in the version number. + @return: The number of mapping entries that have changed without an + increment in the version number + @rtype: integer """ statement = """ @@ -711,14 +725,14 @@ class Update(Db) : def merge_cdsUpdates(self) : """ - Merge the mapping entries that have changed without an increment in - the version number with a table that contains backups of these - entries. + Merge the mapping entries that have changed without an increment in + the version number with a table that contains backups of these + entries. - SQL tables from dbNames (altered): - map_cdsBackup ; Extended with the entries in - map_cdsBackup_temp. - map_cdsBackup_temp ; Dropped. + SQL tables from dbNames (altered): + - map_cdsBackup ; Extended with the entries in + map_cdsBackup_temp. + - map_cdsBackup_temp ; Dropped. """ # The statements in this function may be combined when MYSQL_BUG is @@ -739,13 +753,13 @@ class Update(Db) : def merge_Update(self) : """ - Merge the new mapping data with the old ones. + Merge the new mapping data with the old ones. - SQL tables from dbNames (altered): - map_new ; Created and filled with the merge of map_temp and - map. Dropped after use. - map_temp ; Merged with map to form map_new. Dropped after use. - map ; Overwritten with the merged info in map_new. + SQL tables from dbNames (altered): + - map_new ; Created and filled with the merge of map_temp and map. + Dropped after use. + - map_temp ; Merged with map to form map_new. Dropped after use. + - map ; Overwritten with the merged info in map_new. """ # The statements in this function may be combined when MYSQL_BUG is @@ -791,46 +805,38 @@ class Update(Db) : class Cache(Db) : """ - Database functions for cache administration. - - Special methods: - __init__(config) ; Initialise the class. - - Public methods: - insertGB(accNo, GI, ; Insert info about a GenBank record. - fileHash, - ChrAccVer, - ChrStart, - ChrStop, - orientation, - url) - updateHash(accNo, ; Update the hash of an accession number. - fileHash) - getGBFromLoc(ChrAccVer, ; Get the accession number from slicing - ChrStart, information. - ChrStop, - orientation) - getGBFromHash(fileHash) ; Get the accession number from its hash. - getGBFromGI(GI) ; Get the accession number from its GI - number. - getLoc(accNo) ; Get the slicing information of an - accession number. - getHash(accNo) ; Get the hash of a GenBank record. - getUrl(accNo) ; Get the URL of an accession number. - - Inherited methods from Db: - query(statement) ; General query function. - - SQL tables from internalDb: - GBInfo ; Information about cached and uploaded GenBank files. + Database functions for cache administration. + + Special methods: + - __init__(config) ; Initialise the class. + + Public methods: + - insertGB(accNo, GI, fileHash, ChrAccVer, ChrStart, ChrStop, + orientation, url) ; Insert info about a GenBank record. + - updateHash(accNo, fileHash) ; Update the hash of an accession number. + - getGBFromLoc(ChrAccVer, ChrStart, ChrStop, orientation) ; Get the + accession number from slicing information. + - getGBFromHash(fileHash) ; Get the accession number from its hash. + - getGBFromGI(GI) ; Get the accession number from its GI + number. + - getLoc(accNo) ; Get the slicing information of an + accession number. + - getHash(accNo) ; Get the hash of a GenBank record. + - getUrl(accNo) ; Get the URL of an accession number. + + Inherited methods from Db: + - query(statement) ; General query function. + + SQL tables from internalDb: + - GBInfo ; Information about cached and uploaded GenBank files. """ def __init__(self, config) : """ - Initialise the Db parent class. Use the internalDb. + Initialise the Db parent class. Use the internalDb. - Arguments: - config ; Configuration variables. + @arg config: Configuration variables + @type config: class instance """ Db.__init__(self, config.internalDb, config.LocalMySQLuser, @@ -840,35 +846,41 @@ class Cache(Db) : def insertGB(self, accNo, GI, fileHash, ChrAccVer, ChrStart, ChrStop, orientation, url) : """ - Insert information about a GenBank record in the internal database. + Insert information about a GenBank record in the internal database. - The accNo and fileHash arguments are mandatory. + The accNo and fileHash arguments are mandatory. - If the record is a normal RefSeq, then the GI number should be - provided. - - If the record is a chromosome slice, then the ChrAccVer, - ChrStart, ChrStop and orientation variables should be specified. - - If the record is downloaded from the internet, the url should - be provided. - - If all fields except the mandatory ones are empty, the record - is assumed to be uploaded. - - Arguments: - accNo ; The name associated with this record. - GI ; The GI number (if available). - fileHash ; The hash of the content of the record. - ChrAccVer ; The accession number of the chromosome (if - available). - ChrStart ; The start of the record in chromosomal - coordinates (if available). - ChrStop ; The end of the record in chromosomal coordinates - (if available). - orientation ; The orientation of the record relative to the - chromosome (if available) (1 = forward, - 2 = reverse complement). - url ; The originating URL (if available). - - SQL tables from internalDb (altered): - GBInfo ; Information about cached and uploaded GenBank files. + provided. + - If the record is a chromosome slice, then the ChrAccVer, ChrStart, + ChrStop and orientation variables should be specified. + - If the record is downloaded from the internet, the url should be + provided. + - If all fields except the mandatory ones are empty, the record is + assumed to be uploaded. + + SQL tables from internalDb (altered): + - GBInfo ; Information about cached and uploaded GenBank files. + + @arg accNo: The name associated with this record + @type accNo: string + @arg GI: The GI number (if available) + @type GI: string + @arg fileHash: The hash of the content of the record + @type fileHash: string + @arg ChrAccVer: The accession number of the chromosome (if available) + @type ChrAccVer: string + @arg ChrStart: The start of the record in chromosomal + coordinates (if available) + @type ChrStart: integer + @arg ChrStop: The end of the record in chromosomal coordinates + (if available) + @type ChrStop: integer + @arg orientation: The orientation of the record relative to the + chromosome (if available) (1 = forward, + 2 = reverse complement) + @type orientation: integer + @arg url: The originating URL (if available) + @type url: string """ statement = """ @@ -882,8 +894,18 @@ class Cache(Db) : def insertLRG(self, accNo, fileHash, url): """ - Insert information about a LRG record in the internal database + Insert information about a LRG record in the internal database. + + See insertGB() for more information. + + @arg accNo: The name associated with this record + @type accNo: string + @arg fileHash: The hash of the content of the record + @type fileHash: string + @arg url: The originating URL (if available) + @type url: string """ + statement = """ INSERT INTO GBInfo VALUES (%s, %s, %s, %s, %s, %s, %s, %s); @@ -895,14 +917,15 @@ class Cache(Db) : def updateHash(self, accNo, fileHash) : """ - Update the hash of an accession number. + Update the hash of an accession number. - Arguments: - accNo ; The accession number of a GenBank record. - fileHash ; The hash of a GenBank record. + SQL tables from internalDb (altered): + - GBInfo ; Information about cached and uploaded GenBank files. - SQL tables from internalDb (altered): - GBInfo ; Information about cached and uploaded GenBank files. + @arg accNo: The accession number of a GenBank record + @type accNo: string + @arg fileHash: The hash of a GenBank record + @type fileHash: string """ statement = """ @@ -916,22 +939,25 @@ class Cache(Db) : def getGBFromLoc(self, ChrAccVer, ChrStart, ChrStop, orientation) : """ - Get the accession number from a chromosomic location, used - to make a slice, typically this only affects UD-numbers. + Get the accession number from a chromosomic location, used + to make a slice, typically this only affects UD-numbers. - Arguments: - ChrAccVer ; The accession number of the chromosome. - ChrStart ; Start position of the slice. - ChrStop ; End position of the slice. - orientation ; Orientation of the slice: - 1 ; Forward. - 2 ; Reverse complement. + SQL tables from internalDb: + - GBInfo ; Information about cached and uploaded GenBank files. - SQL tables from internalDb: - GBInfo ; Information about cached and uploaded GenBank files. + @arg ChrAccVer: The accession number of the chromosome + @type ChrAccVer: string + @arg ChrStart: Start position of the slice + @type ChrStart: integer + @arg ChrStop: End position of the slice + @type ChrStop: integer + @arg orientation: Orientation of the slice: + 1. Forward + 2. Reverse complement + @type orientation: integer - Returns: - string ; The accession number. + @return: The accession number + @rtype: string """ statement = """ @@ -951,16 +977,16 @@ class Cache(Db) : def getGBFromHash(self, fileHash) : """ - Get the accession number from its hash. + Get the accession number from its hash. - Arguments: - fileHash ; The hash of a GenBank record. + SQL tables from internalDb: + - GBInfo ; Information about cached and uploaded GenBank files. - SQL tables from internalDb: - GBInfo ; Information about cached and uploaded GenBank files. + @arg fileHash: The hash of a GenBank record + @type fileHash: string - Returns: - string ; The accession number. + @return: The accession number + @rtype: string """ statement = """ @@ -977,17 +1003,17 @@ class Cache(Db) : def getGBFromGI(self, GI) : """ - Get the accession number from its GI number, typically this only - affects RefSeq sequences. + Get the accession number from its GI number, typically this only + affects RefSeq sequences. - Arguments: - GI ; The GI number of a GenBank record. + SQL tables from internalDb: + - GBInfo ; Information about cached and uploaded GenBank files. - SQL tables from internalDb: - GBInfo ; Information about cached and uploaded GenBank files. + @arg GI: The GI number of a GenBank record + @type GI: string - Returns: - string ; The accession number. + @return: The accession number + @rtype: string """ statement = """ @@ -1004,22 +1030,22 @@ class Cache(Db) : def getLoc(self, accNo) : """ - Get the slicing information of an accession number, typically this - only affects UD numbers. + Get the slicing information of an accession number, typically this + only affects UD numbers. - Arguments: - accNo ; The accession number of a genbank record. + SQL tables from internalDb: + - GBInfo ; Information about cached and uploaded GenBank files. - SQL tables from internalDb: - GBInfo ; Information about cached and uploaded GenBank files. + @arg accNo: The accession number of a genbank record + @type accNo: string - Returns: - list ; The slicing information: - ChrAccVer ; Accession number of the chromosome. - ChrStart ; Start position of the slice. - ChrStop ; End position of the slice. - orientation ; Orientation of the slice (1 = forward, - 2 = reverse complement). + @return: The slicing information: + - ChrAccVer ; Accession number of the chromosome + - ChrStart ; Start position of the slice + - ChrStop ; End position of the slice + - orientation ; Orientation of the slice (1 = forward, + 2 = reverse complement) + @rtype: list """ statement = """ @@ -1036,16 +1062,16 @@ class Cache(Db) : def getHash(self, accNo) : """ - Get the hash of a GenBank record identified by an accession number. + Get the hash of a GenBank record identified by an accession number. - Arguments: - accNo ; The accession number of a genbank record. + SQL tables from internalDb: + - GBInfo ; Information about cached and uploaded GenBank files. - SQL tables from internalDb: - GBInfo ; Information about cached and uploaded GenBank files. + @arg accNo: The accession number of a genbank record + @type accNo: string - Returns: - string ; The hash of the GenBank record. + @return: The hash of the GenBank record + @rtype: string """ statement = """ @@ -1062,17 +1088,17 @@ class Cache(Db) : def getUrl(self, accNo) : """ - Get the URL of an accession number, typically this only affects - uploaded UD numbers. + Get the URL of an accession number, typically this only affects + uploaded UD numbers. - Arguments: - accNo ; The accession number of a genbank record. + SQL tables from internalDb: + - GBInfo ; Information about cached and uploaded GenBank files. - SQL tables from internalDb: - GBInfo ; Information about cached and uploaded GenBank files. + @arg accNo: The accession number of a genbank record + @type accNo: string - Returns: - string ; The URL of the GenBank record. + @return: The URL of the GenBank record + @rtype: string """ statement = """ @@ -1089,13 +1115,16 @@ class Cache(Db) : def getGI(self, accNo) : """ - Get the GI number that is connected to the accession number. + Get the GI number that is connected to the accession number. - Arguments: - accNo ; The accession number. + SQL tables from internalDb: + - GBInfo ; Information about cached and uploaded GenBank files. - SQL tables from internalDb: - GBInfo ; Information about cached and uploaded GenBank files. + @arg accNo: The accession number + @type accNo: string + + @return: GI number + @rtype: string """ statement = """ @@ -1112,6 +1141,17 @@ class Cache(Db) : def getProtAcc(self, mrnaAcc) : """ + Gets the protein accession number for the given mRNA accession + number. + + SQL tables from internalDb: + - Link ; mRNA and associated protein IDs. + + @arg mrnaAcc: The ID of an mRNA + @type mrnaAcc: string + + @return: The protein accession number + @rtype: string """ statement = """ @@ -1128,6 +1168,16 @@ class Cache(Db) : def getmrnaAcc(self, protAcc) : """ + Gets the mRNA accession number for a given protein accession number. + + SQL tables from internalDb: + - Link ; mRNA and associated protein IDs. + + @arg protAcc: The protein ID + @type protAcc: string + + @return: The mRNA accession number + @rtype: string """ statement = """ @@ -1145,6 +1195,16 @@ class Cache(Db) : def insertLink(self, mrnaAcc, protAcc) : """ + Inserts the given mRNA and protein accession numbers into the Link + table. + + SQL tables from internalDb: + - Link ; mRNA and associated protein IDs. + + @arg protAcc: The protein ID + @type protAcc: string + @arg mrnaAcc: The ID of an mRNA + @type mrnaAcc: string """ statement = """ @@ -1158,40 +1218,37 @@ class Cache(Db) : class Batch(Db) : """ - Database functions for the batch checker. - - Special methods: - __init__(config) ; Initialise the class. - - Public methods: - isJobListEmpty() ; See if there are active jobs. - addJob(outputFilter, ; Add a job and give it a unique ID. - email, - fromHost) - getJobs() ; Get a list of active jobs. - removeJob(jobID) ; Remove a job and return information about - the job submitter. - addToQueue(jobID, ; Add a request belonging to a certain job to - accNo, the queue. - gene, - variant) - getFromQueue(jobID) ; Get a request belonging to a certain job - from the queue. - - Inherited methods from Db: - query(statement) ; General query function. - - SQL tables from internalDb: - BatchJob ; Job information. - BatchQueue ; Requests. + Database functions for the batch checker. + + Special methods: + - __init__(config) ; Initialise the class. + + Public methods: + - isJobListEmpty() ; See if there are active jobs. + - addJob(outputFilter, email, fromHost); Add a job and give it a unique + ID. + - getJobs() ; Get a list of active jobs. + - removeJob(jobID) ; Remove a job and return information about + the job submitter. + - addToQueue(jobID, accNo, gene, variant) ; Add a request belonging to a + certain job to the queue. + - getFromQueue(jobID) ; Get a request belonging to a certain job + from the queue. + + Inherited methods from Db: + - query(statement) ; General query function. + + SQL tables from internalDb: + - BatchJob ; Job information. + - BatchQueue ; Requests. """ def __init__(self, config) : """ - Initialise the Db parent class. Use the internalDb. + Initialise the Db parent class. Use the internalDb. - Arguments: - config ; Configuration variables. + @arg config: Configuration variables + @type config: class instance """ Db.__init__(self, config.internalDb, config.LocalMySQLuser, @@ -1200,13 +1257,13 @@ class Batch(Db) : def isJobListEmpty(self) : """ - See if there are active jobs. + See if there are active jobs. - SQL tables from internalDb: - BatchJob ; Job information. + SQL tables from internalDb: + - BatchJob ; Job information. - Returns: - boolean ; False if there are active jobs, True otherwise. + @return: False if there are active jobs, True otherwise + @rtype: boolean """ statement = """ @@ -1221,16 +1278,16 @@ class Batch(Db) : def entriesLeftForJob(self, jobID): """ - Count the number of entries of a job that are still to be processed + Count the number of entries of a job that are still to be processed - Arguments: - jobID ; The JobID of interest + SQL tables from internalDB: + - BatchQueue ; Queue information - SQL tables from internalDB: - BatchQueue ; Queue information + @arg jobID: The JobID of interest + @type jobID: string - Returns: - int ; The number of entries + @return: The number of entries + @rtype: integer """ statement = """ SELECT COUNT(*) @@ -1244,18 +1301,20 @@ class Batch(Db) : def addJob(self, outputFilter, email, fromHost, jobType, Arg1) : """ - Add a job and give it a unique ID. + Add a job and give it a unique ID. - Arguments: - outputFilter ; Output settings for all requests in this job. - email ; Contact information of the submitter. - jobType ; The type of batch job + SQL tables from internalDb (altered): + - BatchJob ; Job information. - SQL tables from internalDb (altered): - BatchJob ; Job information. + @arg outputFilter: Output settings for all requests in this job + @type outputFilter: string + @arg email: Contact information of the submitter + @type email: string + @arg jobType: The type of batch job + @type jobType: string - Returns: - int ; A job ID. + @return: A job ID + @rtype: integer """ M = Misc.Misc() @@ -1272,13 +1331,13 @@ class Batch(Db) : def getJobs(self) : """ - Get a list of active jobs. + Get a list of active jobs. - SQL tables from internalDb: - BatchJob ; Job information. + SQL tables from internalDb: + - BatchJob ; Job information. - Returns: - list ; List of tuples (job ID, job Type). + @return: List of tuples (job ID, job Type) + @rtype: list """ statement = """ @@ -1294,14 +1353,14 @@ class Batch(Db) : Remove a job (because the queue for this job is empty) and return information needed to alert the job submitter. - Arguments: - jobID ; Identifier of a job. - SQL tables from internalDb (altered): - BatchJob ; Job information. + - BatchJob ; Job information. - Returns: - triple ; Data for the job submitter. + @arg jobID: Identifier of a job + @type jobID: string + + @return: Data for the job submitter + @rtype: triple """ # First retrieve all information about this job. @@ -1325,14 +1384,15 @@ class Batch(Db) : def addToQueue(self, jobID, inputl, flag): """ - Add a request belonging to a certain job to the queue. + Add a request belonging to a certain job to the queue. - Arguments: - jobID ; Identifier of a job. - inputl ; The input line of an entry + SQL tables from internalDb (altered): + - BatchQueue ; Requests. - SQL tables from internalDb (altered): - BatchQueue ; Requests. + @arg jobID: Identifier of a job + @type jobID: string + @arg inputl: The input line of an entry + @type inputl: string """ # The first value (QueueID) will be auto increased by MySQL. @@ -1346,23 +1406,27 @@ class Batch(Db) : def updateBatchDb(self, jobID, old, new, flag, whereNot): """ - Update the Entries of a BatchJob. This is used to alter - batch entries that would otherwise take a long time to process. - e.g. a batch job with a lot of the same accession numbers without - version numbers would take a long time because mutalyzer would - fetch the file from the NCBI for each entry. A database update - over all entries with the same accession number speeds up the - job considerably. - - Arguments: - jobID ; Identifier of a job. - old ; String to be replaced - new ; String to replace old with - flag ; The reason of subsitution - whereNot; A negative selector to prevent false positives - - SQL tables from internalDb (altered): - BatchQueue ; Requests. + Update the Entries of a BatchJob. This is used to alter + batch entries that would otherwise take a long time to process. + e.g. a batch job with a lot of the same accession numbers without + version numbers would take a long time because mutalyzer would + fetch the file from the NCBI for each entry. A database update + over all entries with the same accession number speeds up the + job considerably. + + SQL tables from internalDb (altered): + - BatchQueue ; Requests. + + @arg jobID: Identifier of a job + @type jobID: string + @arg old: String to be replaced + @type old: string + @arg new: String to replace old with + @type new: string + @arg flag: The reason of subsitution + @type flag: string + @arg whereNot: A negative selector to prevent false positives + @type whereNot: string """ #update whereNot to escape parenthesis whereNot = whereNot.replace("(","[(]").replace(")","[)]") @@ -1378,16 +1442,18 @@ class Batch(Db) : def skipBatchDb(self, jobID, where, flag): """ - Flag batch entries to be skipped. This is used if it is certain - that an entry will cause an error, or that its output is ambiguous. + Flag batch entries to be skipped. This is used if it is certain + that an entry will cause an error, or that its output is ambiguous. - Arguments: - jobID ; Identifier of a job - where ; Look for occurencus of this string - flag ; The reason of skipping + SQL tables from internalDB (alterd): + - BatchQueue ; Requests - SQL tables from internalDB (alterd): - BatchQueue ; Requests + @arg jobID: Identifier of a job + @type jobID: string + @arg where: Look for occurencus of this string + @type where: string + @arg flag: The reason of skipping + @type flag: string """ #update where to escape parenthesis where = where.replace("(","[(]").replace(")","[)]") @@ -1404,21 +1470,21 @@ class Batch(Db) : def getFromQueue(self, jobID) : """ - Get a request belonging to a certain job from the queue. If a - request is found, remove it from the queue and return it. Otherwise - return nothing. + Get a request belonging to a certain job from the queue. If a + request is found, remove it from the queue and return it. Otherwise + return nothing. - Arguments: - jobID ; Identifier of a job. + SQL tables from internalDb (altered): + - BatchQueue ; Requests. - SQL tables from internalDb (altered): - BatchQueue ; Requests. + @arg jobID: Identifier of a job + @type jobID: string - Returns: - triple: - accNo ; The accession number of a request. - gene ; The gene and transcript variant information. - variant ; The variant. + @return: + - accNo ; The accession number of a request + - gene ; The gene and transcript variant information + - variant ; The variant + @rtype: triple """ statement = """ @@ -1431,7 +1497,7 @@ class Batch(Db) : results = self.query(statement) if results : - queueID, inputl,flags = results[0] + queueID, inputl, flags = results[0] else : return None, None diff --git a/src/Modules/File.py b/src/Modules/File.py index 3cbfabf25f171a433272a31a4318030a4139f46b..2b9f9c2c1c6ec37caa036c05031ccebc23ed71c3 100644 --- a/src/Modules/File.py +++ b/src/Modules/File.py @@ -1,13 +1,21 @@ #!/usr/bin/python -#TODO: Check ODS, XLS compatibility - """ - Module for parsing CSV files and spreadsheets. - - Public classes: - File ; Parse CSV files and spreadsheets. +Module for parsing CSV files and spreadsheets. + +@todo: Check ODS, XLS compatibility +@requires: magic +@requires: csv +@requires: xlrd +@requires: zipfile +@requires: xml.dom.minidom +@requires: os +@requires: types +@requires: Modules.Misc """ +# Public classes: +# - File ; Parse CSV files and spreadsheets. + import magic # open(), MAGIC_MIME, MAGIC_NONE import csv # Sniffer(), reader(), Error @@ -21,55 +29,60 @@ from Modules import Misc class File() : """ - Parse CSV files and spreadsheets. - - Private variables: - __config ; Configuration variables. - __output ; The Output object. - - Special methods: - __init__(config, output) ; Initialse the class. - - Private methods: - __tempFileWrapper(func, ; Call func() with a filename. - handle) - __getMimeType(handle) ; Get the mime type of a stream. - __parseCsvFile(handle) ; Parse a CSV file. - __parseXlsFile(handle) ; Parse an Excel file. - __parseOdsFile(handle) ; Parse an OpenDocument Spreadsheet file. - __checkBatchFormat(job) ; Check a batch job and sanitize it. - - Public methods: - parseFileRaw(handle) ; Parse a stream with the appropriate parser. - parseBatchFile(handle) ; Parse a stream with the appropriate parser - and sanitize the output. + Parse CSV files and spreadsheets. + + Private variables: + - __config ; Configuration variables. + - __output ; The Output object. + + Special methods: + - __init__(config, output) ; Initialise the class. + + Private methods: + - __tempFileWrapper(func, handle) ; Call func() with a filename. + - __parseCsvFile(handle) ; Parse a CSV file. + - __parseXlsFile(handle) ; Parse an Excel file. + - __parseOdsFile(handle) ; Parse an OpenDocument Spreadsheet file. + - __checkBatchFormat(job) ; Check a batch job and sanitize it. + + Public methods: + - getMimeType(handle) ; Get the mime type of a stream. + - parseFileRaw(handle) ; Parse a stream with the appropriate parser. + - parseBatchFile(handle) ; Parse a stream with the appropriate parser + and sanitize the output. """ def __init__(self, config, output) : """ - Initialise the class. - - Private variables (altered): - __config ; Initialised with configuration variables. - __output ; Set to the Output object. + Initialise the class. + + Private variables (altered): + - __config ; Initialised with configuration variables. + - __output ; Set to the Output object. + + @arg config: Configuration variables + @type config: class instance + @arg output: Output object + @type output: class instance """ self.__config = config - self.__output = output + self.__output = output #: The Output object #__init__ def __tempFileWrapper(self, func, handle) : """ - Make a temporary file, put the content of a stream in it and pass - the filename to a general function. Return whatever this function - returns. + Make a temporary file, put the content of a stream in it and pass + the filename to a general function. Return whatever this function + returns. - Arguments: - func ; A general function that needs a file name as argument. - handle ; A stream. + @arg func: general function that needs a file name as argument + @type func: function + @arg handle: A stream + @type handle: stream - Returns: - unknown ; The output of func(). + @return: unknown; the output of func(). + @rtype: ? """ # Generate an unique filename in the tempDir directory. @@ -92,50 +105,19 @@ class File() : return ret #__tempFileWrapper - def getMimeType(self, handle) : - """ - Get the mime type of a stream by inspecting a fixed number of bytes. - The stream is rewinded after use. - - Arguments: - handle ; A handle to a stream. - - Private variables: - __config ; The bufSize configuration variables. - - Returns: - string ; The mime type of a file. - """ - - handle.seek(0) - buf = handle.read(self.__config.bufSize) - - MagicInstance = magic.open(magic.MAGIC_MIME) - MagicInstance.load() - mimeType = MagicInstance.buffer(buf).split(';')[0] - MagicInstance.close() - MagicInstance = magic.open(magic.MAGIC_NONE) - MagicInstance.load() - description = MagicInstance.buffer(buf) - del MagicInstance - handle.seek(0) - - return mimeType, description - #getMimeType - def __parseCsvFile(self, handle) : """ - Parse a CSV file. - The stream is not rewinded after use. + Parse a CSV file. + The stream is not rewinded after use. - Arguments: - handle ; A handle to a stream. + Private variables: + - __config ; The bufSize configuration variables. - Private variables: - __config ; The bufSize configuration variables. + @arg handle: A handle to a stream + @type handle: stream - Returns: - list ; A list of lists. + @return: list of lists + @rtype: list """ handle.seek(0) @@ -164,14 +146,14 @@ class File() : def __parseXlsFile(self, handle) : """ - Parse an Excel file. - The stream is not rewinded after use. + Parse an Excel file. + The stream is not rewinded after use. - Arguments: - handle ; A handle to a stream. + @arg handle: A handle to a stream + @type handle: stream - Returns: - list ; A list of lists. + @return: A list of lists + @rtype: list """ workBook = self.__tempFileWrapper(xlrd.open_workbook, handle) @@ -196,15 +178,14 @@ class File() : def __parseOdsFile(self, handle) : """ - Parse an OpenDocument Spreadsheet file. - The stream is not rewinded after use. - - Arguments: - handle ; A handle to a stream. + Parse an OpenDocument Spreadsheet file. + The stream is not rewinded after use. - Returns: - list ; A list of lists. + @arg handle: A handle to a stream + @type handle: stream + @return: A list of lists + @rtype: list """ #zipFile = self.__tempFileWrapper(zipfile.ZipFile, handle) @@ -229,20 +210,22 @@ class File() : def __checkBatchFormat(self, job) : """ - Check if a job is of the correct format. - - Each row should consist of three elements. - - The first and the last element should be non-empty. - - The first line should be the header defined in the config file. + Check if a job is of the correct format. + - Each row should consist of three elements. + - The first and the last element should be non-empty. + - The first line should be the header defined in the config file. - Arguments: - job ; list of lists. + Private variables: + - __config ; The header configuration variable. + + @todo: Add more new style old style logic + @todo: if not inputl: try to make something out of it - Private variables: - __config ; The header configuration variable. + @arg job: list of lists + @type job: list - Returns: - list ; A sanitised list of lists (without a header or empty - lines). + @return: A sanitised list of lists (without a header or empty lines) + @rtype: list """ #store original line numbers line 1 = job[0] jobl = [(l+1, row) for l, row in enumerate(job)] @@ -339,16 +322,48 @@ class File() : return None #__checkBatchFormat + def getMimeType(self, handle) : + """ + Get the mime type of a stream by inspecting a fixed number of bytes. + The stream is rewinded after use. + + Private variables: + - __config: The bufSize configuration variables. + + @arg handle: A handle to a stream + @type handle: stream + + @return: The mime type of a file + @rtype: string + """ + + handle.seek(0) + buf = handle.read(self.__config.bufSize) #: The bufSize configuration variables. + + + MagicInstance = magic.open(magic.MAGIC_MIME) + MagicInstance.load() + mimeType = MagicInstance.buffer(buf).split(';')[0] + MagicInstance.close() + MagicInstance = magic.open(magic.MAGIC_NONE) + MagicInstance.load() + description = MagicInstance.buffer(buf) + del MagicInstance + handle.seek(0) + + return mimeType, description + #getMimeType + def parseFileRaw(self, handle) : """ - Check which format a stream has and parse it with the appropriate - parser if the stream is recognised. + Check which format a stream has and parse it with the appropriate + parser if the stream is recognised. - Arguments: - handle ; A handle to a stream. + @arg handle: A handle to a stream + @type handle: stream - Returns: - list ; A list of lists, None if an error occured. + @return: A list of lists, None if an error occured + @rtype: list """ mimeType = self.getMimeType(handle) @@ -365,15 +380,15 @@ class File() : def parseBatchFile(self, handle) : """ - Check which format a stream has and parse it with the appropriate - parser if the stream is recognised. + Check which format a stream has and parse it with the appropriate + parser if the stream is recognised. - Arguments: - handle ; A handle to a stream. + @arg handle: A handle to a stream + @type handle: stream - Returns: - list ; A sanitised list of lists (without a header or empty - lines), or None if an error occured. + @return: A sanitised list of lists (without a header or empty lines), + or None if an error occured + @rtype: list """ job = self.parseFileRaw(handle) @@ -384,8 +399,20 @@ class File() : #File def makeList(l, maxlen=10): + """ + Converts a list of lines to a string to be used in output messages for + incompatible fields. + + @arg l: list of lines + @type l: list + @arg maxlen: maximum length of the string you want to return + @type maxlen: integer + @return: a list converted to a string with comma's and spaces + @rtype: string + """ ret = ", ".join(str(i) for i in l[:maxlen]) if len(l)>maxlen: return ret+", ..." else: return ret +#makeList \ No newline at end of file diff --git a/src/Modules/GBparser.py b/src/Modules/GBparser.py index 8751df4fd5e97d9991034e8c1f5f179a5a399fa7..75f82e00535363f0b9a85eb728fc3c0a4fbb7853 100644 --- a/src/Modules/GBparser.py +++ b/src/Modules/GBparser.py @@ -1,19 +1,48 @@ -from Bio import SeqIO, Entrez # read() -import bz2 # BZ2Compressor(), BZ2File() -from GenRecord import PList, Locus, Gene, Record, GenRecord -import Db +#!/usr/bin/python """ - Module contains one public function createGBRecord which returns a - mutalyzer GenRecord.Record populated with data from a GenBank file. +Module contains one public function createGBRecord which returns a +mutalyzer GenRecord. Record populated with data from a GenBank file. + +@requires: bz2 +@requires: Db +@requires: Bio.SeqIO +@requires: Bio.Entrez +@requires: GenRecord.PList +@requires: GenRecord.Locus +@requires: GenRecord.Gene +@requires: GenRecord.Record +@requires: GenRecord.GenRecord """ +import bz2 # BZ2Compressor(), BZ2File() +import Db + +from Bio import SeqIO, Entrez # read() +from GenRecord import PList, Locus, Gene, Record, GenRecord + class tempGene() : """ + Container class for a given gene name. + + Special methods: + - __init__(name) ; Initialise the class. + + Public variables: + - rnaList ; List of splice sites. + - cdsList ; CDS list (including internal splice sites). """ def __init__(self, name) : """ + Initialise the class for a given gene name. + + Public variables: + - rnaList ; List of splice sites. + - cdsList ; CDS list (including internal splice sites). + + @arg name: Gene name + @type name: string """ self.name = name @@ -24,10 +53,20 @@ class tempGene() : class GBparser() : """ + @todo: documentation """ def __init__(self) : """ + Initialise the class + + Public variables: + - config ; Config object. + + Private variables: + - __database ; Db.Cache object + + @requires: Config """ import Config @@ -38,13 +77,13 @@ class GBparser() : def __location2pos(self, location) : """ - Convert a location object to a tuple of integers. + Convert a location object to a tuple of integers. - Arguments: - location ; A location object (see the BioPython documentation). + @arg location: A location object (see the BioPython documentation) + @type location: location object - Returns: - List ; A tuple of integers. + @return: A tuple of integers + @rtype: list """ ret = [] @@ -62,14 +101,13 @@ class GBparser() : def __locationList2posList(self, locationList) : """ - Convert a list of locations to a list of integers. + Convert a list of locations to a list of integers. - Arguments: - locationList ; A list of locations - (see the BioPython documentation). + @arg locationList: A list of locations (see the BioPython documentation) + @type locationList: list (location objects) - Returns: - List ; A list (of even length) of integers. + @return: A list (of even length) of integers + @rtype: list (integers) """ ret = [] @@ -96,18 +134,17 @@ class GBparser() : def __transcriptToProtein(self, transcriptAcc) : """ - Try to find the protein linked to a transcript id. + Try to find the protein linked to a transcript id. - First look in our database, if a link can not be found, try to - retrieve it via the NCBI. Store the result in our database. + First look in our database, if a link can not be found, try to + retrieve it via the NCBI. Store the result in our database. - Arguments: - transcriptAcc ; Accession number of the transcript for which we - want to find the protein. - - Returns: - string ; Accession number of a protein or None if nothing can - be found. + @arg transcriptAcc: Accession number of the transcript for which we + want to find the protein + @type transcriptAcc: string + + @return: Accession number of a protein or None if nothing can be found + @rtype: string """ proteinAcc = self.__database.getProtAcc(transcriptAcc) @@ -142,19 +179,19 @@ class GBparser() : def __findMismatch(self, productList, direction) : """ - Find the index of the first or last word that distinguishes one - sentence from an other. - - If direction equals 1, search for the first word. - If direction equals -1, search for the last word. - - Arguments: - productList ; A list of sentences. - direction ; The direction in which to search. - - Returns: - integer ; The index of the word where sentences start to - differ. + Find the index of the first or last word that distinguishes one + sentence from an other. + + If direction equals 1, search for the first word. + If direction equals -1, search for the last word. + + @arg productList: A list of sentences + @type productList: list of strings + @arg direction: The direction in which to search + @type direction: integer (1 or -1) + + @return: The index of the word where sentences start to differ + @rtype: integer """ i = 0 @@ -174,13 +211,13 @@ class GBparser() : def __tagByDict(self, locus, key) : """ - Transfer a variable in the qualifiers dictionary to the locus - object. If the variable does not exist, set it to the empty string. + Transfer a variable in the qualifiers dictionary to the locus + object. If the variable does not exist, set it to the empty string. - Arguments: - locus ; The locus object on which the transfer should be - performed. - key ; The name of the variable that should be transferred. + @arg locus: The locus object on which the transfer should be performed + @type locus: locus object + @arg key: The name of the variable that should be transferred + @type key: string """ if locus.qualifiers.has_key(key) : @@ -191,14 +228,14 @@ class GBparser() : def __tagLocus(self, locusList) : """ - Enrich a list of locus objects (mRNA or CDS) with information used - for linking (locus_tag, proteinLink and productTag). Also - transfer the variables transcript_id, protein_id, gene and product - to each of the locus objects. If these variables do not exist, set - them to the empty string. - - Arguments: - locusList ; A list of locus objects. + Enrich a list of locus objects (mRNA or CDS) with information used + for linking (locus_tag, proteinLink and productTag). Also + transfer the variables transcript_id, protein_id, gene and product + to each of the locus objects. If these variables do not exist, set + them to the empty string. + + @arg locusList: A list of locus objects + @type locusList: list """ productList = [] @@ -247,12 +284,13 @@ class GBparser() : def __checkTags(self, locusList, tagName) : """ - Check whether all tags in a locus list are unique. Prune all the - non unique tags. + Check whether all tags in a locus list are unique. Prune all the + non unique tags. - Arguments: - locusList ; A list of loci. - tagName ; Name of the tag to be checked. + @arg locusList: A list of loci + @type locusList: list + @arg tagName: Name of the tag to be checked + @type tagName: string """ tags = [] @@ -275,17 +313,19 @@ class GBparser() : def __matchByRange(self, mrna, cds) : """ - Match the mRNA list to the CDS list. - - Arguments: - mrnaList ; List of splice sites. - cdsList ; CDS list (including internal splice sites). - - Returns: - integer ; -1 : False. - 0 : Don't know. - 1 : Maybe true. - 2 : Probably true. + Match the mRNA list to the CDS list. + + @arg mrna: List of splice sites + @type mrna: list + @arg cds: CDS list (including internal splice sites) + @type cds: list + + @return: + - E{-}1 : False + - 0 : Don't know + - 1 : Maybe true + - 2 : Probably true + @rtype: integer """ if not cds or not mrna : @@ -316,26 +356,29 @@ class GBparser() : def link(self, rnaList, cdsList) : """ - Link mRNA loci to CDS loci (all belonging to one gene). - - First of all, the range of the CDS must be a sub range of that of - the mRNA. If this is true, then we try to link both loci. The first - method is by looking at the locus_tag, if this fails, we try to - match the proteinLink tags, if this also fails, we try the - productTag. - If no link could be found, but there is only one possibility left, - the loci are linked too. - The method that was used to link the loci, is put in the linkmethod - variable of the transcript locus. The link variable of the - transcript locus is a pointer to the CDS locus. Furthermore, the - linked variable of the CDS locus is set to indicate that this locus - is no longer available for linking. - - Available link methods are: locus, protein, product and exhaustion. - - Arguments: - rnaList ; A list of mRNA loci. - cdsList ; A list of CDS loci. + Link mRNA loci to CDS loci (all belonging to one gene). + + First of all, the range of the CDS must be a subrange of that of + the mRNA. If this is true, then we try to link both loci. The first + method is by looking at the locus_tag, if this fails, we try to + match the proteinLink tags, if this also fails, we try the + productTag. + + If no link could be found, but there is only one possibility left, + the loci are linked too. + + The method that was used to link the loci, is put in the linkmethod + variable of the transcript locus. The link variable of the + transcript locus is a pointer to the CDS locus. Furthermore, the + linked variable of the CDS locus is set to indicate that this locus + is no longer available for linking. + + Available link methods are: locus, protein, product and exhaustion. + + @arg rnaList: A list of mRNA loci + @type rnaList: list + @arg cdsList: A list of CDS loci + @type cdsList: list """ # Enrich the lists with as much information we can find. @@ -407,11 +450,11 @@ class GBparser() : """ Create a GenRecord.Record from a GenBank file - Arguments: - filename ; The full path to the compresed GenBank file + @arg filename: The full path to the compressed GenBank file + @type filename: string - Returns: - record ; A GenRecord.Record instance + @return: A GenRecord.Record instance + @rtype: object (record) """ # first create an intermediate genbank record with BioPython diff --git a/src/Modules/GenRecord.py b/src/Modules/GenRecord.py index 9e942d3e838d97e0ef2f7e58990d3d5d35f555df..4dc7ff325c41182549da39bf5f56ea70242f590d 100644 --- a/src/Modules/GenRecord.py +++ b/src/Modules/GenRecord.py @@ -1,50 +1,55 @@ #!/usr/bin/python +""" +Module to convert a GenBank record to a nested dictionary consisting of +a list of genes, which itself consists of a list of loci. This structure +makes it possible to iterate over genes and transcripts without having to +search for them each time. + +@requires: Crossmap +@requires: Bio +@requires: Db +""" +# Public classes: +# - PList ; Store a general location and a list of splice sites. +# - Locus ; Store data about the mRNA and CDS splice sites. +# - Gene ; Store a list of Locus objects and the orientation. +# - Record ; Store a geneList and other additional information. +# - GenRecord ; Convert a GenBank record to a nested dictionary. + + import Crossmap import Bio import Db -""" - Module to convert a GenBank record to a nested dictionary consisting of - a list of genes, which itself consists of a list of loci. This structure - makes it possible to iterate over genes and transcripts without having to - search for them each time. - - Public classes: - PList ; Store a general location and a list of splice sites. - Locus ; Store data about the mRNA and CDS splice sites. - Gene ; Store a list of Locus objects and the orientation. - Record ; Store a geneList and other additional information. - GenRecord ; Convert a GenBank record to a nested dictionary. -""" class PList(object) : """ - A position list object, to store a general location and a list of - specific splice sites (if available). + A position list object, to store a general location and a list of + specific splice sites (if available). - These objects are used to describe either a list of mRNA splice sites - or a list of CDS splice sites. These splice sites are stored in the - list element. The location element is a fallback in case the splice - sites are not available. + These objects are used to describe either a list of mRNA splice sites + or a list of CDS splice sites. These splice sites are stored in the + list element. The location element is a fallback in case the splice + sites are not available. - Special methods: - __init__() ; Initialise the class. + Special methods: + - __init__() ; Initialise the class. - Public variables: - location ; A tuple of integers between which the object resides. - list ; A list (with an even amount of entries) of splice sites. + Public variables: + - location ; A tuple of integers between which the object resides. + - list ; A list (with an even amount of entries) of splice sites. """ def __init__(self) : """ - Initialise the class. + Initialise the class. - Public variables (altered): - location ; A tuple of integers between which the object - resides. - list ; A list (with an even amount of entries) of splice - sites. + Public variables (altered): + - location ; A tuple of integers between which the object + resides. + - POSITIONlist ; A list (with an even amount of entries) of splice + sites. """ self.location = [] @@ -54,28 +59,31 @@ class PList(object) : class Locus(object) : """ - A Locus object, to store data about the mRNA and CDS splice sites. + A Locus object, to store data about the mRNA and CDS splice sites. - Special methods: - __init__() ; Initialise the class. + Special methods: + - __init__() ; Initialise the class. - Public variables: - mRNA ; A position list object. - CDS ; A position list object. - exon ; A position list object. + Public variables: + - mRNA ; A position list object. + - CDS ; A position list object. + - exon ; A position list object. """ def __init__(self, name) : """ - Initialise the class. + Initialise the class. - Public variables (altered): - mRNA ; A position list object. - CDS ; A position list object. - location ; - exon ; A position list object. - txTable ; The translation table. - CM ; A Crossmap object. + Public variables (altered): + - mRNA ; A position list object. + - CDS ; A position list object. + - location ; + - exon ; A position list object. + - txTable ; The translation table. + - CM ; A Crossmap object. + + @arg name: identifier of the locus + @type name: string """ self.name = name @@ -103,6 +111,10 @@ class Locus(object) : def addToDescription(self, rawVariant) : """ + Expands the DNA description with a new raw variant. + + @arg rawVariant: description of a single mutation + @type rawVariant: string """ if self.description : @@ -114,25 +126,32 @@ class Locus(object) : class Gene(object) : """ - A Gene object, to store a list of Locus objects and the orientation of - the gene. + A Gene object, to store a list of Locus objects and the orientation of + the gene. - Special methods: - __init__() ; Initialise the class. + Special methods: + - __init__() ; Initialise the class. - Public variables: - orientation ; The orientation of the gene: 1 = forward, - -1 = reverse. - list ; A list of Locus objects. + Public variables: + - orientation; The orientation of the gene: 1 = forward, -1 = reverse. + - TRANSCRIPTSlist; A list of Locus objects. """ def __init__(self, name) : """ - Initialise the class. + Initialise the class. - Public variables (altered): - orientation ; The orientation of the gene. - list ; A list of Locus objects. + Public variables (altered): + - name + - orientation ; The orientation of the gene. + - transcriptList ; A list of transcripts + - location ; + - longName ; + Private variables (altered): + - __locusTag ; + + @arg name: gene name + @type name: string """ self.name = name @@ -145,6 +164,10 @@ class Gene(object) : def newLocusTag(self) : """ + Generates a new Locus tag. + + @return: Locus tag + @rtype: integer (3 digits, if < 100 preceeded with 0's) """ self.__locusTag = "%03i" % (int(self.__locusTag) + 1) @@ -154,6 +177,13 @@ class Gene(object) : def findLocus(self, name) : """ + Find a transcript, given its name. + + @arg name: transcript variant number + @type name: string + + @return: transcript + @rtype: object """ for i in self.transcriptList : @@ -164,6 +194,10 @@ class Gene(object) : def listLoci(self) : """ + Provides a list of transcript variant numbers + + @return: list of transcript variant numbers + @rtype: list """ ret = [] @@ -174,50 +208,57 @@ class Gene(object) : def findLink(self, protAcc) : """ + Look in the list of transcripts for a given protein accession number. + + @arg protAcc: protein accession number + @type protAcc: string + + @return: transcript + @rtype: object """ for i in self.transcriptList : if i.link == protAcc : return i return None - #findCDS + #findLink #Gene class Record(object) : """ - A Record object, to store a geneList and other additional - information. - - Special methods: - __init__() ; Initialise the class. - - Public variables: - geneList ; List of Gene objects. - mol_type ; Variable to indicate the sequence type (DNA, RNA, ...) - organelle ; Variable to indicate whether the sequence is from the - nucleus or from an onganelle (if so, also from which - one). - source ; A fake gene that can be used when no gene information - is present. + A Record object, to store a geneList and other additional + information. + + Special methods: + - __init__() ; Initialise the class. + + Public variables: + - geneList ; List of Gene objects. + - mol_type ; Variable to indicate the sequence type (DNA, RNA, ...) + - organelle ; Variable to indicate whether the sequence is from the + nucleus or from an organelle (if so, also from which + one). + - source ; A fake gene that can be used when no gene information + is present. """ def __init__(self) : """ - Initialise the class. + Initialise the class. - Public variables (altered): - geneList ; List of Gene objects. - molType ; Variable to indicate the sequence type (DNA, RNA, - ...) - seq ; The reference sequence - mapping ; The mapping of the reference sequence to the genome - include a list of differences between the sequences - organelle ; Variable to indicate whether the sequence is from - the nucleus or from an onganelle (if so, also from - which one). - source ; A fake gene that can be used when no gene - information is present. + Public variables (altered): + - geneList ; List of Gene objects. + - molType ; Variable to indicate the sequence type (DNA, RNA, + ...) + - seq ; The reference sequence + - mapping ; The mapping of the reference sequence to the genome + include a list of differences between the sequences + - organelle ; Variable to indicate whether the sequence is from + the nucleus or from an organelle (if so, also from + which one). + - source ; A fake gene that can be used when no gene + information is present. """ self.geneList = [] @@ -237,6 +278,13 @@ class Record(object) : def findGene(self, name) : """ + Returns a Gene object, given its name. + + @arg name: Gene name + @type name: string + + @return: Gene object + @rtype: object """ for i in self.geneList : @@ -247,6 +295,11 @@ class Record(object) : def listGenes(self) : """ + List the names of all genes found in this record. + + @return: Genes list + @rtype: list + """ ret = [] @@ -257,6 +310,10 @@ class Record(object) : def addToDescription(self, rawVariant) : """ + Expands the DNA description with a new raw variant. + + @arg rawVariant: description of a single mutation + @type rawVariant: string """ if self.description : @@ -267,6 +324,14 @@ class Record(object) : def toChromPos(self, i) : """ + Converts a g. position (relative to the start of the record) to a + chromosomal g. position + + @arg i: g. position (relative to the start of the record) + @type i: integer + + @return: chromosomal g. position + @rtype: integer """ if self.orientation == 1 : @@ -276,6 +341,7 @@ class Record(object) : def addToChromDescription(self, rawVariant) : """ + @todo document me """ if not self.chromOffset : @@ -290,14 +356,23 @@ class Record(object) : class GenRecord() : """ - Convert a GenBank record to a nested dictionary. + Convert a GenBank record to a nested dictionary. - Public methods: - checkRecord() ; Check and repair self.record + Public methods: + - checkRecord() ; Check and repair self.record. """ def __init__(self, output, config) : """ + Initialise the class. + + Public variable: + - record ; A record object + + @arg output: an output object + @type output: object + @arg config: a config object + @type config: object """ self.__output = output @@ -307,6 +382,15 @@ class GenRecord() : def __checkExonList(self, exonList, CDSpos) : """ + @todo document me + + @arg exonList: list of splice sites + @type exonList: list (object) + @arg CDSpos: location of the CDS + @type CDSpos: object + + @return: + @rtype: boolean """ if not exonList : @@ -332,6 +416,16 @@ class GenRecord() : def __constructCDS(self, mRNA, CDSpos) : """ + Construct a list of coordinates that contains CDS start and stop and + the internal splice sites. + + @arg mRNA: mRNA positions/coordinates list + @type mRNA: list (integer) + @arg CDSpos: coding DNA positions/coordinates + @type CDSpos: list (integer) + + @return: CDS positions plus internal splice sites + @rtype: list (integer) """ i = 1 @@ -352,6 +446,17 @@ class GenRecord() : def __maybeInvert(self, gene, string) : """ + Return the reverse-complement of a DNA sequence if the gene is in + the reverse orientation. + + @arg gene: Gene + @type gene: object + @arg string: DNA sequence + @type string: string + + @return: reverse-complement (if applicable), otherwise return the + original. + @rtype: string """ if gene.orientation == -1 : @@ -361,9 +466,11 @@ class GenRecord() : def checkRecord(self) : """ - Check if the record in self.record is compatible with mutalyzer - - update the mRNA PList with the exon and CDS data + Check if the record in self.record is compatible with mutalyzer. + Update the mRNA PList with the exon and CDS data. + + @todo: This function should really check the record for minimal + requirements """ #TODO: This function should really check @@ -479,6 +586,21 @@ class GenRecord() : def name(self, start_g, stop_g, varType, arg1, arg2, roll) : """ + Generate variant descriptions for all genes, transcripts, etc. + + @arg start_g: start position + @type start_g: integer + @arg stop_g: stop position + @type stop_g: integer + @arg varType: variant type + @type varType: string + @arg arg1: argument 1 of a raw variant + @type arg1: string + @arg arg2: argument 2 of a raw variant + @type arg2: string + @arg roll: ??? + @type roll: tuple (integer, integer) + """ forwardStart = start_g @@ -558,6 +680,19 @@ class GenRecord() : #name def checkIntron(self, gene, transcript, position) : + """ + Checks if a position is on or near a splice site + + @arg gene: Gene + @type gene: object + @arg transcript: transcript + @type transcript: object + @arg position: g. position + @type position: integer + + @return: + @todo: Also check a range properly. + """ # TODO Also check a range properly. intronPos = abs(transcript.CM.g2x(position)[1]) if intronPos : diff --git a/src/Modules/LRGparser.py b/src/Modules/LRGparser.py index b607e87c1492b89b17aa07fa6638e2d04ca31e90..43fc5954a556e28dd47bb5e8a42907021fd2ae0a 100644 --- a/src/Modules/LRGparser.py +++ b/src/Modules/LRGparser.py @@ -1,21 +1,27 @@ +#!/usr/bin/python + """ - Module contains one public function createLrgRecord which returns a - mutalyzer GenRecord.Record populated with data from a LRG file. - - A LRG file is an XML formatted file and consists of a fixed and - updatable section. The fixed section contains a DNA sequence - and for that sequence a number of transcripts. - - The updatable region could contain all sorts of annotation for the - sequence and transcripts. It can also contain additional (partial) - transcripts and mapping information. - - This module is based on the result of the minidom xml parser. - NOTE: - A strong alternative to the minidom parser would be ElementTree - http://docs.python.org/library/xml.etree.elementtree.html - which is added in python2.5 - Its main strengths are speed and readability [pythonesque] +Module contains one public function createLrgRecord which returns a +mutalyzer GenRecord.Record populated with data from a LRG file. + +A LRG file is an XML formatted file and consists of a fixed and +updatable section. The fixed section contains a DNA sequence +and for that sequence a number of transcripts. + +The updatable region could contain all sorts of annotation for the +sequence and transcripts. It can also contain additional (partial) +transcripts and mapping information. + +This module is based on the result of the minidom xml parser. + +NOTE: A strong alternative to the minidom parser would be ElementTree which is +added in python2.5. Its main strengths are speed and readability [pythonesque]. +(http://docs.python.org/library/xml.etree.elementtree.html) + +@requires: xml.dom.minidom +@requires: xml.parsers.expat.ExpatError +@requires: Bio.Seq.Seq +@requires: Bio.Alphabet.IUPAC """ from Bio.Seq import Seq @@ -28,8 +34,15 @@ __all__ = ["createLrgRecord"] # Only import createLrgRecord from this module def __debugParsedData(title, data): """ - Output additional data to stdout. Used for debugging the - intermediate format used while parsing a LRG file. + Output additional data to stdout. Used for debugging the + intermediate format used while parsing a LRG file. + + @requires: pprint + + @arg title: + @type title: string + @arg data: minidom object + @type data: object """ import pprint #Only imported when the debug flag is set print "#"*79+"\nDEBUG: Start of "+title+"\n"+"#"*79 @@ -39,15 +52,15 @@ def __debugParsedData(title, data): def _getContent(data, refname): """ - Return string-content of an XML textnode. + Return string-content of an XML textnode. - Arguments: - data ; a minidom object - refname ; the name of a member of the minidom object + @arg data: a minidom object + @type data: object + @arg refname: the name of a member of the minidom object + @type refname: string - Returns: - String ; The UTF-8 content of the textnode - or an emtpy string + @return: The UTF-8 content of the textnode or an emtpy string + @rtype: string """ temp = data.getElementsByTagName(refname) if temp: @@ -58,15 +71,15 @@ def _getContent(data, refname): def _attr2dict(attr): """ - Create a dictionary from the attributes of an XML node + Create a dictionary from the attributes of an XML node - Arguments: - attr ; a minidom node + @arg attr: a minidom node + @type attr: object - Returns: - Dict ; A dictionary with pairing of node-attribute names - and values. Integer string values are converted to - integers. String values are converted to UTF-8 + @return: A dictionary with pairing of node-attribute names and values. + Integer string values are converted to integers. String values are converted + to UTF-8 + @rtype: dictionary """ ret = {} for key, value in attr.items(): @@ -77,13 +90,13 @@ def _attr2dict(attr): def createLrgRecord(data): """ - Create a GenRecord.Record of a LRG <xml> formatted string + Create a GenRecord.Record of a LRG <xml> formatted string. - Input: - data ; Content of LRG file [String] + @arg data: Content of LRG file + @type data: string - Output - record ; GenRecord.Record instance + @return: GenRecord.Record instance + @rtype: object """ # Initiate the GenRecord.Record record = GenRecord.Record() @@ -185,14 +198,14 @@ def createLrgRecord(data): def genesFromUpdatable(updParsed): """ - populate GenRecord.Gene instances with updatable LRG node data + Populate GenRecord.Gene instances with updatable LRG node data. - Input: - updParsed ; Intermediate nested dict of updatable section + @arg updParsed: Intermediate nested dict of updatable section + @type updParsed: dictionary - Output - genes ; List of GenRecord.Gene instances, populated with - the content of the updatable section + @return: genes ; List of GenRecord.Gene instances, populated with the + content of the updatable section + @rtype: list """ genes = [] for geneSymbol, geneData in updParsed["NCBI"].items(): @@ -216,14 +229,14 @@ def genesFromUpdatable(updParsed): def transcriptsFromParsed(parsedData): """ - populate GenRecord.Locus instances with updatable LRG node data + Populate GenRecord.Locus instances with updatable LRG node data - Input: - parsedData ; Dict of transcript data. See getFeaturesAnnotation + @arg parsedData: Dictionary of transcript data. See getFeaturesAnnotation + @type parsedData: dictionary - Output - transcripts ; List of GenRecord.Locus instances, populated with - the content of the parsed Data + @return: transcripts ; List of GenRecord.Locus instances, populated with the + content of the parsed Data + @rtype: list """ transcripts = [] @@ -245,16 +258,18 @@ def transcriptsFromParsed(parsedData): def _emptyTranscripts(data): #TODO: This function can be moved to the GenRecord.checkRecord method """ - populate a GenRecord.Locus instance with minimal data to make the - gene compatible with mutalyzer. Data abstracted from the gene. - - Input: - data ; Data from the gene which is used to populate - the create a minimal GenRecord.Locus instance. - - Output - list ; List with a single bogus GenRecord.Locus instance, - in which location and mRNA are copied from the gene. + Populate a GenRecord.Locus instance with minimal data to make the + gene compatible with mutalyzer. Data abstracted from the gene. + + @todo: This function can be moved to the GenRecord.checkRecord method. + + @arg data: Data from the gene which is used to populate the create a minimal + GenRecord.Locus instance + @type data: dictionary + + @return: List with a single bogus GenRecord.Locus instance, in which + location and mRNA are copied from the gene + @rtype: list """ transcript = GenRecord.Locus('') transcript.molType = 'n' @@ -268,15 +283,16 @@ def _emptyTranscripts(data): def _transcriptPopulator(trName, trData): """ - populate GenRecord.Locus instance with updatable LRG node data - - Input: - trName ; Name of the transcript. - trData ; Data associated with the transcript. - - Output - transcript ; GenRecord.Locus instance, populated with - the content of the parsed Data + Populate GenRecord.Locus instance with updatable LRG node data. + + @arg trName: Name of the transcript + @type trName: string + @arg trData: Data associated with the transcript + @type trData: dictionary + + @return: transcript ; GenRecord.Locus instance, populated with the content + of the parsed Data + @rtype: object """ transcript = GenRecord.Locus(trName) transcript.transcriptProduct = trData.get("transLongName") @@ -304,15 +320,16 @@ def _transcriptPopulator(trName, trData): def getMapping(rawMapData): """ - Collect all necessary info to map the current LRG sequence to the - genomic reference supplied by the file. + Collect all necessary info to map the current LRG sequence to the + genomic reference supplied by the file. - Input: - rawMapData ; A list of dictionaries with the raw mapping info + @arg rawMapData: A list of dictionaries with the raw mapping info + @type rawMapData: list - Output - dict ; + @return: dictionary with the mapping info + @rtype: dictionary """ + mapp, span, diffs = rawMapData ret = { "assembly": mapp.get("assembly"), # Assembly Reference "chr_name": mapp.get("chr_name"), # Chromosome name @@ -329,19 +346,18 @@ def getMapping(rawMapData): def parseUpdatable(data): """ - Mediator function which transforms the minidom object to a nested dict - and filters information needed to construct the GenRecord.Record. + Mediator function which transforms the minidom object to a nested dict + and filters information needed to construct the GenRecord.Record. - NOTE: an xml node has attributes and elements, this function squashes - this ambiguity and collects only the attributes and elements of - interest + NOTE: an xml node has attributes and elements, this function squashes this + ambiguity and collects only the attributes and elements of interest - Input: - data ; The LRG file's updatable section node + @arg data: The LRG file's updatable section node + @type data: dictionary - Output - nested dict ; Contains the fields of interest of the LRG - NCBI and Ensembl sections of the updatable node. + @return: Contains the fields of interest of the LRG NCBI and Ensembl + sections of the updatable node + @rtype: dictionary """ ret = {"LRG":{}, "NCBI":{}, "Ensembl":{}} annotation_nodes = data.getElementsByTagName("annotation_set") @@ -362,16 +378,16 @@ def parseUpdatable(data): def getLrgAnnotation(data): """ - Retrieves three parts of the LRG annotation: - - the mapping of this LRG file to a genomic reference - - a diference list between the LRG sequence and the ref seq - - the genename of the main gene annotated by this LRG file + Retrieves three parts of the LRG annotation: + - the mapping of this LRG file to a genomic reference + - a diference list between the LRG sequence and the ref seq + - the genename of the main gene annotated by this LRG file - Input: - data ; updatable section -> Annotations -> LRG node + @arg data: updatable section -> Annotations -> LRG node + @type data: dictionary - Output - dict ; Contains the mapping [+ opt. diffs] and the genename + @return: Contains the mapping [+ opt. diffs] and the genename + @rtype: dictionary """ ret = {"mapping": (), "genename":""} # Get the mapping @@ -396,38 +412,38 @@ def getLrgAnnotation(data): def getFeaturesAnnotation(data): """ - Retrieves feature annotations from NCBI & Ensembl nodes. - - List of genes - - List of transcripts per gene - - Potential Product of a transcript - - If a transcript can not be linked to a transcript from the fixed - section it is stored in the noFixedId list. - - NOTE: an xml node has attributes and elements, this function squashes - this ambiguity and collects only the attributes and elements of - interest - Input: - data ; updatable section -> Annotations -> NCBI | Ensembl - - Output - nested dict ; toplevel contains the genesymbols as keys e.g: - COL1A1 : - geneAttr : {} - geneLongName : "" - transcripts : {} - geneAttr contains the start, end and strand info - transcripts contains a list of transcripts that - could not be linked to the fixed section AND - it contains each linked transcript with the - locustag as key e.g: - 1 : - transAttr : {} - transLongName : "" - proteinAttr : {} - proteinLongName : "" - transAttr & proteinAttr contain - reference, start and end info + Retrieves feature annotations from NCBI & Ensembl nodes. + - List of genes + - List of transcripts per gene + - Potential Product of a transcript + + If a transcript can not be linked to a transcript from the fixed section it + is stored in the noFixedId list. + + NOTE: an xml node has attributes and elements, this function squashes this + ambiguity and collects only the attributes and elements of interest + + @todo: check documentation + + @arg data: updatable section -> Annotations -> NCBI | Ensembl + @type data: dictionary + + @return: nested dict ; toplevel contains the genesymbols as keys e.g: + - COL1A1 : + - geneAttr : {} + - geneLongName : "" + - transcripts : {} + - geneAttr contains the start, end and strand info + - transcripts contains a list of transcripts that could not be linked to + the fixed section AND it contains each linked transcript with the + locustag as key e.g: + - 1 : + - transAttr : {} + - transLongName : "" + - proteinAttr : {} + - proteinLongName : "" + - transAttr & proteinAttr contain reference, start and end info + @rtype: dictionary """ ret = {} # Get annotation per gene symbol {"COL1A1":{}} #Check if features exists diff --git a/src/Modules/Mapper.py b/src/Modules/Mapper.py index 9b769d1030a7dadd77fad312e8d9b2783514c430..863f7dd06e8041fb2dabec301f285fc6df58329c 100644 --- a/src/Modules/Mapper.py +++ b/src/Modules/Mapper.py @@ -1,21 +1,33 @@ #!/usr/bin/python """ - Search for an NM number in the MySQL database, if the version number - matches, get the start and end positions in a variant and translate these - positions to g. notation if the variant is in c. notation and vice versa. +Search for an NM number in the MySQL database, if the version number +matches, get the start and end positions in a variant. Translate these +positions to I{g.} notation if the variant is in I{c.} notation or vice versa. - - If no end position is present, the start position is assumed to be the - end position. + - If no end position is present, the start position is assumed to be the end + position. - If the version number is not found in the database, an error message is - generated and a suggestion for an other version is given. + generated and a suggestion for an other version is given. - If the reference sequence is not found at all, an error is returned. - - If no variant is present, the transcription start and end and CDS end - in c. notation is returned. + - If no variant is present, the transcription start and end and CDS end in + I{c.} notation is returned. - If the variant is not accepted by the nomenclature parser, a parse error - will be printed. - + will be printed. + +@requires: sys +@requires: Modules.Config +@requires: Modules.Db +@requires: Modules.Crossmap +@requires: Modules.Parser +@requires: Modules.Output +@requires: Modules.Serializers.SoapMessage +@requires: Modules.Serializers.Mapping +@requires: Modules.Serializers.Transcript +@requires: Bio.Seq.reverse_complement +@requires: collections.defaultdict """ + import sys # argv from Modules import Config # Config() from Modules import Db # Db(), get_NM_version(), get_NM_info() @@ -30,11 +42,13 @@ from collections import defaultdict def _sl2il(l) : """ - Convert a list of strings to a list of integers. + Convert a list of strings to a list of integers. - Arguments: l ; A list of strings. + @arg l: A list of strings + @type l: list - Returns: list ; A list of integers. + @returns: A list of integers + @rtype: list """ return [int(s) for s in l] @@ -42,18 +56,20 @@ def _sl2il(l) : def _getcoords(C, Loc, Type) : """ - Return main, offset and g positions given either a position in - c. or in g. notation. - - Arguments: - C ; A crossmapper. - Loc ; Either a location in g. or c. notation. - Type ; The reference type. - Returns: - triple: - 0 ; Main coordinate in c. notation. - 1 ; Offset coordinate in c. notation. - 2 ; Position in g. notation. + Return main, offset and g positions given either a position in + I{c.} or in I{g.} notation. + + @arg C: A crossmapper + @type C: object + @arg Loc: A location in either I{g.} or I{c.} notation + @type Loc: object + @arg Type: The reference type + @type Type: string + @returns: triple: + 0. Main coordinate in I{c.} notation + 1. Offset coordinate in I{c.} notation + 2. Position in I{g.} notation + @rtype: triple (integer, integer, integer) """ if Type == 'c' : @@ -69,11 +85,24 @@ def _getcoords(C, Loc, Type) : return (main, offset, g) #__getcoords -class Converter(object): +class Converter(object) : """ - Converter object docstring + Converter object docstring """ - def __init__(self, build, C, O): + + def __init__(self, build, C, O) : + """ + Initialise the class. + + @arg build: the genome build version of the organism (e.g. hg19 for + human genome build version 19) + @type build: string + @arg C: crossmapper object + @type C: object + @arg O: output object + @type O: object + """ + self.build = None self.__output = O self.__config = C @@ -83,40 +112,79 @@ class Converter(object): self.parseTree = None self.crossmap = None self.dbFields = {} + #__init__ + + def _changeBuild(self, build) : + """ + @todo document me (figure out what is does) + Change the build if it different from the one previously set?????. + + @arg build: the genome build version of the organism (e.g. hg19 for + human genome build version 19) + @type build: string + """ - def _changeBuild(self, build): - if self.build != build: + if self.build != build : self.crossmap = None self.dbFields = {} self.build = build self.__database = Db.Mapping(build, C.Db) + #if + #_changeBuild - def _reset(self): + def _reset(self) : self.crossmap = None self.dbFields = {} + #_reset + + def _parseInput(self, variant) : + """ + Parse a variant. + + @arg variant: variant description + @type variant: string + + @return: parsetree object + @rtype: object + """ - def _parseInput(self, variant): P = Parser.Nomenclatureparser(self.__output) parseTree = P.parse(variant) - if not parseTree: + if not parseTree : self.__output.addMessage(__file__, 4, "EPARSE", "Could not parse the given variant") return None - if parseTree.SingleAlleleVarSet: + #if + if parseTree.SingleAlleleVarSet : #Only simple mutations self.__output.addMessage(__file__, 4, "EPARSE", "Can not process multiple mutation variant") return None + #if if not parseTree.RefSeqAcc: #In case of LRG for example self.__output.addMessage(__file__, 4, "EONLYGB", "Currently we only support GenBank Records") return None + #if self.parseTree = parseTree return parseTree #_parseInput - def _populateFields(self, Fields): + def _populateFields(self, Fields) : #TODO: ADD Error Messages, unlikely that CDS info is missing + """ + Create a Mutalyzer compatible exon list. + + @todo: ADD Error Messages, unlikely that CDS info is missing. + + @arg Fields: dictionary with exon start and end positions taken from the + MySQL database + @type Fields: dictionary + + @return: Exon list + @rtype: list + """ + Fields["exonStarts"] =\ _sl2il(Fields["exonStarts"].split(',')[:-1]) Fields["exonEnds"] =\ @@ -131,42 +199,66 @@ class Converter(object): # Create Mutalyzer compatible exon list Fields["exons"] = [] - for exon in zip(Fields["exonStarts"], Fields["exonEnds"]): + for exon in zip(Fields["exonStarts"], Fields["exonEnds"]) : Fields["exons"].extend(exon) self.dbFields = Fields return Fields + #_populateFields + + def _FieldsFromValues(self, values) : + """ + Combines labels with the given values to a dictionary. + (zip returns a list of tuples, where the i-th tuple contains the i-th + element from each of the argument sequences or iterables. + dict(arg) creates a new data dictionary, with items taken from arg.) + + @arg values: list of values take from the MySQL database + @type values: list + + @return: dictionary with values taken from the MySQL database + @rtype: dictionary + """ - def _FieldsFromValues(self, values): Fields = dict(zip( ("acc", "txStart", "txEnd", "cdsStart", "cdsEnd", "exonStarts", "exonEnds", "geneName", "chrom", "strand", "protAcc", "version"), values)) return self._populateFields(Fields) + #_FieldsFromValues + + def _FieldsFromDb(self, acc, version) : + """ + Get data from database and populate dbFields dict. + + @arg acc: NM_ accession number (without version) + @type acc: string + @arg version: version number + @type version: integer + """ - def _FieldsFromDb(self, acc, version): - """Get data from database and populate dbFields dict""" - if not version: + if not version : version = 0 version = int(version) versions = self.__database.get_NM_version(acc) - if not versions: + if not versions : self.__output.addMessage(__file__, 4, "EACCNOTINDB", "The accession number: %s could not be " "found in our database." % acc) self.__output.addOutput("LOVDERR", "Reference sequence not found.") - return None #Excplicit return of None in case of error - else: - if version in versions: + return None #Explicit return of None in case of error + #if + else : + if version in versions : Values = self.__database.getAllFields(acc, version) return self._FieldsFromValues(Values) - - if not version: + #if + if not version : self.__output.addMessage(__file__, 4, "ENOVERSION", "Version number missing for %s" % acc) - else: + else : self.__output.addMessage(__file__, 4, "EACCNOTINDB", "The accession number: %s version %s " "could not be found in our database." % @@ -186,18 +278,24 @@ class Converter(object): # "Reference sequence version not found. " # "Available: %s.%s" % (acc, sorted(versions)[-1])) return None + #else + #_FieldsFromDb def makeCrossmap(self) : - ''' Build the crossmapper + """ + Build the crossmapper. + + @todo: ADD Error Messages + + @return: Cross ; A Crossmap object + @rtype: object + """ - Returns: - Cross ; A Crossmap object. - ''' #TODO: ADD Error Messages if not self.dbFields: return None CDS = [] - if self.dbFields["cdsStart"] != self.dbFields["cdsEnd"]: + if self.dbFields["cdsStart"] != self.dbFields["cdsEnd"] : #The counting from 0 conversion. CDS = [self.dbFields["cdsStart"] + 1, self.dbFields["cdsEnd"]] @@ -213,13 +311,14 @@ class Converter(object): return self.crossmap #makeCrossmap - def _coreMapping(self): - ''' - Build the Mapping ClassSerializer + def _coreMapping(self) : + """ + Build the Mapping ClassSerializer. + + @return: Mapping ; A ClassSerializer object + @rtype: object + """ - Returns: - Mapping ; A ClassSerializer object. - ''' Cross = self.makeCrossmap() if not Cross : return None @@ -239,7 +338,7 @@ class Converter(object): endmain, endoffset, end_g = \ _getcoords(Cross, mutation.EndLoc.PtLoc, self.parseTree.RefType) - else: + else : end_g, endmain, endoffset = start_g, startmain, startoffset # Assign these values to the Mapping ClassSerializer @@ -253,32 +352,47 @@ class Converter(object): V.mutationType = mutation.MutationType return V + #_coreMapping def giveInfo(self, accNo) : - if '.' not in accNo: + """ + Returns transcription start, transcription end and CDS stop, if + available. + + @arg accNo: transcript (NM_) accession number (with or without version) + @type accNo: string + + @return: transcription start, transcription end and CDS stop + @rtype: triple + """ + + if '.' not in accNo : acc, ver = accNo, None - else: + else : acc, ver = accNo.split('.') self._FieldsFromDb(acc, ver) CM = self.makeCrossmap() - if CM: return CM.info() + if CM : + return CM.info() + #giveInfo - def mainTranscript(self, accNo): + def mainTranscript(self, accNo) : """ - One of the entry points (called by the HTML publisher). + One of the entry points (called by the HTML publisher). - Arguments: - acc ; The full NM accession number (including version). + @arg accNo: The full NM accession number (including version) + @type accNo: string - Returns: - T ; ClassSerializer object with the types trans_start, - trans_stop and CDS_stop. + @return: T ; ClassSerializer object with the types trans_start, + trans_stop and CDS_stop + @rtype: object """ + # Initiate ClassSerializer object info = self.giveInfo(accNo) T = Transcript() - if info: + if info : T.trans_start = info[0] T.trans_stop = info[1] T.CDS_stop = info[2] @@ -287,10 +401,19 @@ class Converter(object): def mainMapping(self, accNo, mutation) : """ - One of the entry points (called by the HTML publisher). + One of the entry points (called by the HTML publisher). + + @arg accNo: transcript (NM_) accession number (with version?) + @type accNo: string + @arg mutation: the 'mutation' (e.g. c.123C>T) + @type mutation: string + + @return: ClassSerializer object + @rtype: object """ + variant = "%s:%s" % (accNo, mutation) - if self._parseInput(variant): + if self._parseInput(variant) : acc = self.parseTree.RefSeqAcc version = self.parseTree.Version self._FieldsFromDb(acc, version) @@ -298,59 +421,69 @@ class Converter(object): mapping = self._coreMapping() soaperrors = self.__output.getSoapMessages() - if mapping is None: # Something went wrong + if mapping is None : # Something went wrong mapping = Mapping() mapping.errorcode = len(soaperrors) - else: + else : mapping.errorcode = 0 mapping.messages = soaperrors return mapping - #main_Mapping def c2chrom(self, variant) : """ - Converts a complete HGVS c. notation into a chromosomal notation - - Arguments: - variant ; The variant in HGVS c.notation. + Converts a complete HGVS I{c.} notation into a chromosomal notation. - Returns: - var_in_g ; The variant in HGVS g. notation (string). + @arg variant: The variant in HGVS I{c.} notation + @type variant: string + @return: var_in_g ; The variant in HGVS I{g.} notation + @rtype: string """ - if self._parseInput(variant): + + if self._parseInput(variant) : acc = self.parseTree.RefSeqAcc version = self.parseTree.Version self._FieldsFromDb(acc, version) + #if M = self._coreMapping() - if M is None: return None + if M is None : + return None # construct the variant description chromAcc = self.__database.chromAcc(self.dbFields["chrom"]) f_change = self._constructChange(False) r_change = self._constructChange(True) - if self.dbFields["strand"] == "+": + if self.dbFields["strand"] == "+" : change = f_change - else: + else : change = r_change - if M.start_g != M.end_g: + if M.start_g != M.end_g : if self.dbFields["strand"] == '+' : var_in_g = "g.%s_%s%s" % (M.start_g, M.end_g, change) else : var_in_g = "g.%s_%s%s" % (M.end_g, M.start_g, change) + #if else : var_in_g = "g.%s%s" % (M.start_g, change) return "%s:%s" % (chromAcc, var_in_g) - #cTog + #c2chrom + + def correctChrVariant(self, variant) : + """ + @arg variant: + @type variant: string + + @return: variant ; + @rtype: string + """ - def correctChrVariant(self, variant): #Pre split check - if ':' not in variant: + if ':' not in variant : self.__output.addMessage(__file__, 4, "EPARSE", "The variant needs a colon") return None @@ -358,49 +491,63 @@ class Converter(object): #Remove whitespace variant = variant.replace(" ","") - if variant.startswith("chr"): + if variant.startswith("chr") : preco, postco = variant.split(":") chrom = self.__database.chromAcc(preco) - if chrom is None: + if chrom is None : self.__output.addMessage(__file__, 4, "ENOTINDB", "Chromosome %s could not be found in our database" % preco) return None - else: + #if + else : variant = "%s:%s" % (chrom, postco) + #if return variant + #correctChrVariant - def chrom2c(self, variant, rt): - if not self._parseInput(variant): + def chrom2c(self, variant, rt) : + """ + @arg variant: a variant description + @type variant: string + @arg rt: the return type + @type rt: string + + @return: HGVS_notatations ; + @rtype: dictionary or list + """ + + if not self._parseInput(variant) : return None acc = self.parseTree.RefSeqAcc version = self.parseTree.Version chrom = self.__database.chromName("%s.%s" % (acc, version)) - if not chrom: + if not chrom : self.__output.addMessage(__file__, 4, "ENOTINDB", "Accession number: %s could not be found in our database" % acc) return None + #if f_change = self._constructChange(False) r_change = self._constructChange(True) #FIXME This should be a proper conversion. loc = int(self.parseTree.RawVar.StartLoc.PtLoc.Main) - if self.parseTree.RawVar.EndLoc: + if self.parseTree.RawVar.EndLoc : loc2 = int(self.parseTree.RawVar.EndLoc.PtLoc.Main) - else: + else : loc2 = loc transcripts = self.__database.get_Transcripts(\ chrom, loc-5000, loc2+5000, 1) HGVS_notatations = defaultdict(list) NM_list = [] - for transcript in transcripts: + for transcript in transcripts : self._reset() self._FieldsFromValues(transcript) M = self._coreMapping() - if M is None: + if M is None : #balen continue # construct the variant description @@ -410,22 +557,22 @@ class Converter(object): startp = self.crossmap.tuple2string((M.startmain, M.startoffset)) endp = self.crossmap.tuple2string((M.endmain, M.endoffset)) - if strand: + if strand : change = f_change - else: + else : change = r_change startp, endp = endp, startp #Check if n or c type info = self.crossmap.info() - if info[0] == '1' and info[1] == info[2]: + if info[0] == '1' and info[1] == info[2] : mtype = 'n' - else: + else : mtype = 'c' - if M.start_g != M.end_g: + if M.start_g != M.end_g : loca = "%s_%s" % (startp, endp) - else: + else : loca = "%s" % startp variant = "%s:%c.%s%s" % (accNo, mtype, loca, change) @@ -437,25 +584,35 @@ class Converter(object): return HGVS_notatations #chrom2c - def _constructChange(self, revc=False): + def _constructChange(self, revc = False) : + """ + @todo document me + + @arg revc: + @type revc: + + @return: + @rtype: string + """ + p = self.parseTree - if not p or p.SingleAlleleVarSet: + if not p or p.SingleAlleleVarSet : return None var = p.RawVar - if revc: + if revc : arg1 = reverse_complement(var.Arg1 or "") #imported from Bio.Seq arg2 = reverse_complement(var.Arg2 or "") - else: + #if + else : arg1 = var.Arg1 arg2 = var.Arg2 + #else - if var.MutationType == "subst": + if var.MutationType == "subst" : change = "%s>%s" % (arg1, arg2) - else: + else : change = "%s%s" % (var.MutationType, arg1 or arg2 or "") return change #_constructChange - - - +#Converter diff --git a/src/Modules/Misc.py b/src/Modules/Misc.py index 8f76dfc0b61cada60cebb9727a2a83c2451e3975..69ab2cb4044222848809e626ff448918d1db9e75 100644 --- a/src/Modules/Misc.py +++ b/src/Modules/Misc.py @@ -1,16 +1,23 @@ #!/usr/bin/python """ +@todo: documentation """ import time class Misc() : """ + @todo: documentation """ def ID(self) : """ + Generates an ID using time() + @todo: documentation + + @return: + @rtype: """ IDsPerSec = 100 diff --git a/src/Modules/Mutator.py b/src/Modules/Mutator.py index 313c74b8863010d19e02b96d06c5fac1143c5ccd..6fb16c14840f0301a77967ee3bd3624602dc8a54 100644 --- a/src/Modules/Mutator.py +++ b/src/Modules/Mutator.py @@ -1,98 +1,104 @@ #!/usr/bin/python -from Bio import Restriction -from Bio.Seq import Seq -from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA -from Bio.Seq import reverse_complement # reverse_complement() - """ - Module for mutating a string. +Module for mutating a string. - Mutations are described in the original coordinates. These coordinates are - transfered to the mutated coordinates with the aid of an internal shift - list, which keeps track of the sizes of changes. Using the original - coordinates greatly simplifies combined mutations in a variant. A - visualisation of each raw variant within a combined variant is made and - effects on restriction sites are also analysed. +Mutations are described in the original coordinates. These coordinates are +transfered to the mutated coordinates with the aid of an internal shift +list, which keeps track of the sizes of changes. Using the original +coordinates greatly simplifies combined mutations in a variant. A +visualisation of each raw variant within a combined variant is made and +effects on restriction sites are also analysed. - The original as well as the mutated string are stored here. +The original as well as the mutated string are stored here. - Public classes: - Mutator ; Mutate a string and register all shift points. +@requires: Bio.Restriction +@requires: Bio.Seq.Seq +@requires: Bio.Alphabet.IUPAC.IUPACAmbiguousDNA +@requires: Bio.Seq.reverse_complement """ +# Public classes: +# - Mutator ; Mutate a string and register all shift points. -class Mutator() : - """ - Mutate a string and register all shift points. For each mutation a - visualisation is made (on genomic level) and the addition or deletion - of restriction sites is detected. Output for each raw variant is stored - in the output object as 'visualisation', 'deletedRestrictionSites' and - 'addedRestrictionSites' respectively. - - Private variables: - __config ; Configuration variables of this class: - __output ; The output object. - __shift ; A sorted list of tuples (position, shiftsize) - where the modifications in length are stored. - Each first element of the tuples in this list - is unique, each second element is non-zero. - __restrictionBatch ; +from Bio import Restriction +from Bio.Seq import Seq +from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA +from Bio.Seq import reverse_complement # reverse_complement() - Public variables: - orig ; The original string. - mutated ; The mutated string. - - Special methods: - __init__(orig) ; Initialise the class with the original string. - - Private methods: - __sortins(tuple) ; Insert a tuple in a sorted list, after - insertion the list stays sorted. - __makeRestrictionSet() - __mutate(pos1, pos2, ins) ; A general mutation function that does a - delins on interbase coordinates of the - original string. - - Public methods: - shiftpos(position) ; Calculate the position in the mutated - string given the position in the - original string. - newSplice(sites) ; Generate a list of new splice sites. - delM(pos1, pos2) ; Delete a range from non-interbase - position pos1 to pos2. - insM(pos, ins) ; Insert a string at interbase position - pos. - delimsM(pos1, pos2, ins) ; Delete a range from non-interbase - position pos1 to pos2 and insert ins. - subM(pos, nuc) ; Substitute a nucleotite at non-interbase - position pos for nuc. - invM(pos1, pos2) ; Invert a range from non-interbase - position pos1 to pos2. - dupM(pos1, pos2) ; Duplicate a range from non-interbase - position pos1 to pos2. +class Mutator() : + """ + Mutate a string and register all shift points. For each mutation a + visualisation is made (on genomic level) and the addition or deletion + of restriction sites is detected. Output for each raw variant is stored + in the output object as 'visualisation', 'deletedRestrictionSites' and + 'addedRestrictionSites' respectively. + + Private variables: + - __config ; Configuration variables of this class. + - __output ; The output object. + - __shift ; A sorted list of tuples (position, shiftsize) + where the modifications in length are stored. + Each first element of the tuples in this list + is unique, each second element is non-zero. + - __restrictionBatch ; + + Public variables: + - orig ; The original string. + - mutated ; The mutated string. + + Special methods: + - __init__(orig) ; Initialise the class with the original string. + + Private methods: + - __sortins(tuple) ; Insert a tuple in a sorted list, after + insertion the list stays sorted. + - __makeRestrictionSet() + - __mutate(pos1, pos2, ins) ; A general mutation function that does a + delins on interbase coordinates of the + original string. + + Public methods: + - shiftpos(position) ; Calculate the position in the mutated + string given the position in the + original string. + - newSplice(sites) ; Generate a list of new splice sites. + - delM(pos1, pos2) ; Delete a range from non-interbase + position pos1 to pos2. + - insM(pos, ins) ; Insert a string at interbase position + pos. + - delimsM(pos1, pos2, ins) ; Delete a range from non-interbase + position pos1 to pos2 and insert ins. + - subM(pos, nuc) ; Substitute a nucleotite at non-interbase + position pos for nuc. + - invM(pos1, pos2) ; Invert a range from non-interbase + position pos1 to pos2. + - dupM(pos1, pos2) ; Duplicate a range from non-interbase + position pos1 to pos2. """ def __init__(self, orig, config, output) : """ - Initialise the class with the original string. - - Arguments: - orig ; The original string before mutation. - config ; Configuration variables. - output ; The output object. - - Private variables (altered): - __config ; Initialised with the configuration - variables. - __output ; Initialised with the output object. - __shift ; Initialised to the empty list. - __restrictionBatch ; Initialised to a default set of - restriction enzymes. - - Public variables (altered): - orig ; Initialised to the parameter orig. - mutated ; Initialised to the parameter orig. + Initialise the class with the original string. + + Private variables (altered): + - __config ; Initialised with the configuration + variables. + - __output ; Initialised with the output object. + - __shift ; Initialised to the empty list. + - __restrictionBatch ; Initialised to a default set of + restriction enzymes. + + Public variables (altered): + - orig ; Initialised to the parameter orig. + - mutated ; Initialised to the parameter orig. + + @arg orig: The original string before mutation + @type orig: string + @arg config: Configuration variables + @type config: object + @arg output: The output object + @type output: object """ self.__config = config @@ -106,20 +112,21 @@ class Mutator() : def __sortins(self, tuple) : """ - Insert a tuple in a sorted list, the list is sorted on the first - element of the tuples. After insertion the list stays sorted. - If a tuple is inserted where tuple[0] already exists, this entry - is altered. - If an altered entry has zero as its second element, the entry is - removed. - - Arguments: - tuple ; An ordered pair where tuple[0] denotes a position and - tuple[1] denotes the change in shift at this position. - - Private variables (altered): - __shift ; A tuple can be added, removed or altered. + Insert a tuple in a sorted list, the list is sorted on the first + element of the tuples. After insertion the list stays sorted. + If a tuple is inserted where tuple[0] already exists, this entry + is altered. + If an altered entry has zero as its second element, the entry is + removed. + + Private variables (altered): + - __shift ; A tuple can be added, removed or altered. + + @arg tuple: An ordered pair where tuple[0] denotes a position and + tuple[1] denotes the change in shift at this position + @type tuple: tuple (integer) """ + if not tuple[1] : # Only non-zero shift sizes are relevant. return @@ -143,17 +150,17 @@ class Mutator() : def __makeRestrictionList(self, seq) : """ - Return a set of restriction enzymes that can bind in a certain - sequence. + Return a set of restriction enzymes that can bind in a certain + sequence. - Arguments: - seq ; The sequence to be analysed. + Private variables: + - __restrictionBatch ; A RestrictionBatch object. - Returns: - list ; A list of restriction enzymes. + @arg seq: The sequence to be analysed + @type seq: string - Private variables: - __restrictionBatch ; A RestrictionBatch object. + @return: A list of restriction enzymes + @rtype: list """ restrictionAnalysis = Restriction.Analysis(self.__restrictionBatch, seq) @@ -169,8 +176,18 @@ class Mutator() : #__makeRestrictionSet def __restrictionDiff(self, list1, list2) : - #TODO documentation """ + Compare two lists, and count those elements which are only present + in list1. + + @arg list1: some list + @type list1: list + @arg list2: some (other) list + @type list2: list + + @return: the elements only present in list 1, together with the number + of occurrences, if more than once present + @rtype: list """ tempList = list(list1) @@ -193,26 +210,31 @@ class Mutator() : def __mutate(self, pos1, pos2, ins) : """ - A general mutation function that does a delins on interbase - coordinates of the original string. The change in length (if any) - is stored by calling the __sortins() function. - The coordinates are those of the original string, so we use the - __shifsize() function to map them to the mutated string, on which - we perform the alteration. - - Arguments: - pos1 ; The first interbase position of the deletion. - pos2 ; The second interbase position of the deletion. - ins ; The insertion. - - Private variables: - __config ; The variables maxvissize, flanksize and flankclipsize - are used in the visualisation. - __output ; Visualisation information is added. - - Public variables (altered): - mutated ; This string will reflect the result of the given - delins. + A general mutation function that does a delins on interbase + coordinates of the original string. The change in length (if any) + is stored by calling the __sortins() function. + The coordinates are those of the original string, so we use the + __shifsize() function to map them to the mutated string, on which + we perform the alteration. + + Private variables: + - __config ; The variables maxvissize, flanksize and flankclipsize + are used in the visualisation. + - __output ; Visualisation information is added. + + Public variables (altered): + - mutated ; This string will reflect the result of the given + delins. + + @arg pos1: The first interbase position of the deletion + @type pos1: integer + @arg pos2: The second interbase position of the deletion + @type pos2: integer + @arg ins: The insertion + @type ins: string + + @return: visualisation + @rtype: string """ # @@ -276,6 +298,14 @@ class Mutator() : def visualiseLargeString(self, string) : """ + If the length of a sequence is larger than a certain maxvissize, the + string is clipped; otherwise the string is just returned. + + @arg string: DNA sequence + @type string: string + + @return: either the original sequence, or an abbreviation of it + @rtype: string """ if len(string) > self.__config.maxvissize : @@ -287,18 +317,18 @@ class Mutator() : def shiftpos(self, position) : """ - Calculate the position in the mutated string, given a position in - the original string. + Calculate the position in the mutated string, given a position in + the original string. - Arguments: - position ; The position in the original string for which we - want the shift size. + Private variables: + - __shift ; Used to calculate the shift. - Private variables: - __shift ; Used to calculate the shift. + @arg position: The position in the original string for which we want the + shift size + @type position: integer - Returns: - integer ; The position in the mutated string. + @return: The position in the mutated string + @rtype: integer """ ret = position @@ -314,13 +344,13 @@ class Mutator() : def newSplice(self, sites) : """ - Generate a list of new splice sites. + Generate a list of new splice sites. - Arguments: - sites ; A list of old splice sites. + @arg sites: A list of old splice sites + @type sites: list - Returns: - list ; A list of new splice sites. + @return: A list of new splice sites + @rtype: list """ ret = [] @@ -341,14 +371,15 @@ class Mutator() : def delM(self, pos1, pos2) : """ - Delete a range from non-interbase position pos1 to pos2. + Delete a range from non-interbase position pos1 to pos2. - Arguments: - pos1 ; The first nucleotide of the range to be deleted. - pos2 ; The last nucleotide of the range to be deleted. + Private variables: + - __output ; Visualisation information is added. - Private variables: - __output ; Visualisation information is added. + @arg pos1: The first nucleotide of the range to be deleted + @type pos1: integer + @arg pos2: The last nucleotide of the range to be deleted + @type pos2: integer """ if pos1 == pos2 : @@ -362,15 +393,15 @@ class Mutator() : def insM(self, pos, ins) : """ - Insert a string at interbase position pos. + Insert a string at interbase position pos. - Arguments: - pos ; The interbase position where the insertion should take - place. - ins ; The insertion, a string. + Private variables: + - __output ; Visualisation information is added. - Private variables: - __output ; Visualisation information is added. + @arg pos: The interbase position where the insertion should take place + @type pos: integer + @arg ins: The insertion + @type ins: string """ visualisation = ["insertion between %i and %i" % (pos, pos + 1)] @@ -380,13 +411,15 @@ class Mutator() : def delinsM(self, pos1, pos2, ins) : """ - Delete a range from non-interbase position pos1 to pos2 and insert - ins. - - Arguments: - pos1 ; The first nucleotide of the range to be deleted. - pos2 ; The last nucleotide of the range to be deleted. - ins ; The insertion, a string. + Delete a range from non-interbase position pos1 to pos2 and insert + ins. + + @arg pos1: The first nucleotide of the range to be deleted + @type pos1: integer + @arg pos2: The last nucleotide of the range to be deleted. + @type pos2: integer + @arg ins: The insertion + @type ins: string """ visualisation = ["delins from %i to %i" % (pos1, pos2)] @@ -396,14 +429,15 @@ class Mutator() : def subM(self, pos, nuc) : """ - Substitute a nucleotite at non-interbase position pos for nuc. + Substitute a nucleotide at non-interbase position pos for nuc. - Arguments: - pos ; The position where the substitution should take place. - nuc ; The new nucleotide. + Private variables: + - __output ; Visualisation information is added. - Private variables: - __output ; Visualisation information is added. + @arg pos: The position where the substitution should take place + @type pos: integer + @arg nuc: The new nucleotide + @type nuc: string """ visualisation = ["substitution at %i" % pos] @@ -413,14 +447,15 @@ class Mutator() : def invM(self, pos1, pos2) : """ - Invert a range from non-interbase position pos1 to pos2. + Invert a range from non-interbase position pos1 to pos2. - Arguments: - pos1 ; The first nucleotide of the range to be inverted. - pos2 ; The last nucleotide of the range to be inverted. + Public variables: + - orig ; The original string. - Public variables: - orig ; The original string. + @arg pos1: The first nucleotide of the range to be inverted + @type pos1: integer + @arg pos2: The last nucleotide of the range to be inverted + @type pos2: integer """ visualisation = ["inversion between %i and %i" % (pos1, pos2)] @@ -431,14 +466,15 @@ class Mutator() : def dupM(self, pos1, pos2) : """ - Duplicate a range from non-interbase position pos1 to pos2. + Duplicate a range from non-interbase position pos1 to pos2. - Arguments: - pos1 ; The first nucleotide of the range to be duplicated. - pos2 ; The last nucleotide of the range to be duplicated. + Public variables: + - orig ; The original string. - Public variables: - orig ; The original string. + @arg pos1: The first nucleotide of the range to be duplicated + @type pos1: integer + @arg pos2: The last nucleotide of the range to be duplicated + @type pos2: integer """ visualisation = ["duplication from %i to %i" % (pos1, pos2)] diff --git a/src/Modules/Output.py b/src/Modules/Output.py index 5aa37432904a351a9bccad42b8c50d9b71bff073..a5ebf0b5aba1ed32ae67933799ddd26f80b83483 100644 --- a/src/Modules/Output.py +++ b/src/Modules/Output.py @@ -1,123 +1,86 @@ #!/usr/bin/python """ - Module for storing output and messages. - Output is stored as a named list that can be expanded. - Messages can be retrieved at a later time to provide flexibility. Message - levels are defined to increase or decrease the amount of logging and ouput. - The position of the log file, as well as the levels are defined in the - configuration file. - - Message levels: - -1 : Log ; Specifically log a message. - 0 : Debug ; Debug information. - 1 : Info ; Info. - 2 : Warning ; Regular warnings. - 3 : Error ; Serious errors that can be compensated for. - 4 : Fatal ; Errors that are not recoverable. - 5 : Off ; Can be used as a log/output level to turn off output. - - Public classes: - Message ; Container class for message variables. - Output ; Output interface for errors, warnings and logging. +Module for storing output and messages. +Output is stored as a named list that can be expanded. +Messages can be retrieved at a later time to provide flexibility. Message +levels are defined to increase or decrease the amount of logging and ouput. +The position of the log file, as well as the levels are defined in the +configuration file. + +Message levels: + - E{-}1 : Log ; Specifically log a message. + - 0 : Debug ; Debug information. + - 1 : Info ; Info. + - 2 : Warning ; Regular warnings. + - 3 : Error ; Serious errors that can be compensated for. + - 4 : Fatal ; Errors that are not recoverable. + - 5 : Off ; Can be used as a log/output level to turn off output. + +@requires: time """ +# Public classes: +# - Message ; Container class for message variables. +# - Output ; Output interface for errors, warnings and logging. -import time # strftime() - -class Message() : - """ - Container class for message variables. - - Special methods: - __init__(origin, level, code, description) ; Make a message object. - - Public variables: - origin ; Name of the module creating this object. - level ; Importance of the message. - code ; The error code of the message. - description ; A description of the message. - """ - def __init__(self, origin, level, code, description) : - """ - Make a new message object. - - Arguments: - origin ; Name of the module creating this object. - level ; Importance of the message. - code ; The error code of the message. - description ; A description of the message. - - Public variables (altered): - origin ; Name of the module creating this object. - level ; Importance of the message. - code ; The error code of the message. - description ; A description of the message. - """ - - self.origin = origin - self.level = level - self.code = code - self.description = description - #__init__ -#Message +import time # strftime() class Output() : """ - Provide an output interface for errors, warnings and logging purposes. - - Private variables: - __config ; Configuration variables. - __outputdata ; The output dictionary. - __messages ; The messages list. - __instance ; The name of the module that made this object. - __loghandle ; The handle of the log file. - __errors ; The number of errors that have been processed. - __warnings ; The number of warnings that have been processed. - - Special methods: - __init__(instance, config) ; Initialise the class with variables - from the config file and the calling - module. - __del__() ; Close the logfile and clean up. - - Private methods: - __niceName(filename) ; Strip the path and the extention from a - filename. - __levelToName(level) ; Convert a log level to a readable string. - - Public methods: - addMessage(filename, ; Add a message to the message list. - level, - code, - description) - getMessages() ; Print all messages that exceed the - configured output level. - addOutput(name, data) ; Add output to the output dictionary. - getOutput(name) ; Retrieve data from the output dictionary. - Summary() ; Print a summary of the number of errors - and warnings. + Provide an output interface for errors, warnings and logging purposes. + + Private variables: + - __config ; Configuration variables. + - __outputdata ; The output dictionary. + - __messages ; The messages list. + - __instance ; The name of the module that made this object. + - __loghandle ; The handle of the log file. + - __errors ; The number of errors that have been processed. + - __warnings ; The number of warnings that have been processed. + + Special methods: + - __init__(instance, config) ; Initialise the class with variables + from the config file and the calling + module. + - __del__() ; Close the logfile and clean up. + + Private methods: + - __niceName(filename) ; Strip the path and the extention from a + filename. + - __levelToName(level) ; Convert a log level to a readable string. + + Public methods: + - addMessage(filename, level, code, description) ; Add a message to + the message list. + - getMessages() ; Print all messages that exceed the + configured output level. + - addOutput(name, data) ; Add output to the output dictionary. + - getOutput(name) ; Retrieve data from the output dictionary. + - Summary() ; Print a summary of the number of errors + and warnings. """ def __init__(self, instance, config) : """ - Initialise the class private variables with variables from the - config file and the calling module. - - Arguments: - instance ; The filename of the module that created this object. - config ; The configuration object. - - Private variables (altered): - __config ; Configuration variables. - __outputdata ; The output dictionary. - __messages ; The messages list. - __instance ; Initialised with the name of the module that - created this object. - __loghandle ; Initialised as the handle of the log file - defined in the configuration file. - __errors ; Initialised to 0. - __warnings ; Initialised to 0. + Initialise the class private variables with variables from the + config file and the calling module. + + Private variables (altered): + - __config ; Configuration variables. + - __outputdata ; The output dictionary. + - __messages ; The messages list. + - __instance ; Initialised with the name of the module that + created this object. + - __loghandle ; Initialised as the handle of the log file + defined in the configuration file. + - __errors ; Initialised to 0. + - __warnings ; Initialised to 0. + + @arg instance: The filename of the module that created this object + @type instance: string + @arg config: The configuration object + @type config: object """ self.__config = config @@ -131,14 +94,14 @@ class Output() : def __del__(self) : """ - Clean up the output dictionary, the messages list and close the log - file. - - Private variables(altered): - __loghandle ; The handle of the log file defined in the - configuration file. - __outputdata ; The output dictionary. - __messages ; The messages list. + Clean up the output dictionary, the messages list and close the log + file. + + Private variables(altered): + - __loghandle ; The handle of the log file defined in the + configuration file. + - __outputdata ; The output dictionary. + - __messages ; The messages list. """ self.__loghandle.close() @@ -150,13 +113,13 @@ class Output() : def __niceName(self, filename) : """ - Strip the path and the extention from a filename. + Strip the path and the extention from a filename. - Arguments: - filename ; A complete path plus extention. + @arg filename: A complete path plus extention + @type filename: string - Returns: - string ; The bare filename without a path and extention. + @return: The bare filename without a path and extention + @rtype: string """ return filename.split('/')[-1].split('.')[0] @@ -164,13 +127,13 @@ class Output() : def __levelToName(self, level) : """ - Convert a log level to a readable string. + Convert a log level to a readable string. - Arguments: - level ; A log level (an integer between -1 and 5). + @arg level: A log level (an integer between -1 and 5) + @type level: integer - Returns: - string ; A readable description of the log level. + @return: A readable description of the log level + @rtype: string """ if level == 0 : @@ -188,27 +151,26 @@ class Output() : def addMessage(self, filename, level, code, description) : """ - Add a message to the message list. - If the level exceeds the configured loglevel or if the level is -1, - then the message is also logged. - If the severity equals 2, then the number of warnings is inreased, - if it exceeds 2, then the number of errors is increased. - - Arguments: - filename ; Name of the calling module. - level ; Severity of the message. - code ; Error code of the message. - description ; Description of the message. - - Private variables: - __messages ; The messages list. - __instance ; Module that created the Output object. - __config ; The variables loglevel and datestring are used. - __loghandle ; Handle to the log file. - - Private variables (altered): - __warnings ; Increased by one if the severity equals 2. - __errors ; Increased by one if the severity exceeds 2. + Add a message to the message list. + If the level exceeds the configured loglevel or if the level is -1, + then the message is also logged. + If the severity equals 2, then the number of warnings is inreased, + if it exceeds 2, then the number of errors is increased. + + Private variables: + - __messages ; The messages list. + - __instance ; Module that created the Output object. + - __config ; The variables loglevel and datestring are used. + - __loghandle ; Handle to the log file. + + Private variables (altered): + - __warnings ; Increased by one if the severity equals 2. + - __errors ; Increased by one if the severity exceeds 2. + + @arg filename: Name of the calling module + @arg level: Severity of the message + @arg code: Error code of the message + @arg description: Description of the message """ niceName = self.__niceName(filename) @@ -234,14 +196,14 @@ class Output() : def getMessages(self) : """ - Print all messages that exceed the configured output level. + Print all messages that exceed the configured output level. - Private variables: - __messages ; The messages list. - __config ; The variable outputlevel is used. + Private variables: + - __messages ; The messages list. + - __config ; The variable outputlevel is used. - Returns: - list ; A list of messages. + @return: A list of messages + @rtype: list """ ret = [] @@ -256,14 +218,16 @@ class Output() : def getSoapMessages(self): """ - Returns a list of SoapMessages for over the wire + Returns a list of SoapMessages for over the wire + + Private variables: + - __messages ; The messages list. + - __config ; The variable outputlevel is used. - Private variables: - __messages ; The messages list. - __config ; The variable outputlevel is used. + @requires: Modules.Serializers.SoapMessage - Returns: - list ; + @return: list of SoapMessages + @rtype: list """ #TODO: MOVE to top if works @@ -283,17 +247,17 @@ class Output() : def getBatchMessages(self, level): """ - Returns a list of Messages with an errorlevel >= level - and removes additional lines from a parseerror + Returns a list of Messages with an errorlevel >= level + and removes additional lines from a parseerror - Arguments: - level ; + Private variables: + - __messages ; The messages list. - Private variables: - __messages ; The messages list. + @arg level: error level + @type level: integer - Returns: - list ; + @return: list of Messages + @rtype: list """ ret = [] @@ -312,16 +276,17 @@ class Output() : def addOutput(self, name, data) : """ - If the output dictionary already has a node with the specified - name, the list that this name points to is expanded with the data. - Otherwise create a node and assign a list containing the data. + If the output dictionary already has a node with the specified + name, the list that this name points to is expanded with the data. + Otherwise create a node and assign a list containing the data. - Arguments: - name ; Name of a node in the output dictionary. - data ; The data to be stored at this node. + Private variables: + - __outputData ; The output dictionary. - Private variables: - __outputData ; The output dictionary. + @arg name: Name of a node in the output dictionary + @type name: string + @arg data: The data to be stored at this node + @type data: object """ if self.__outputData.has_key(name) : @@ -332,13 +297,16 @@ class Output() : def getOutput(self, name) : """ - Return a list of data from the output dictionary. + Return a list of data from the output dictionary. - Arguments: - name ; Name of a node in the output dictionary. + Private variables: + - __outputData ; The output dictionary. - Private variables: - __outputData ; The output dictionary. + @arg name: Name of a node in the output dictionary + @type name: string + + @return: output dictionary + @rtype: dictionary """ if self.__outputData.has_key(name) : @@ -348,19 +316,18 @@ class Output() : def getIndexedOutput(self, name, index) : """ - Return an element of a list, the list is called 'name' in de - __outputData dictionary. If either the list or the element does not - exist, return an empty list. + Return an element of a list, the list is called 'name' in de + __outputData dictionary. If either the list or the element does not + exist, return an empty list. - Arguments: - name ; Name of the list. - index ; Index of the element to be retuned. + @arg name: Name of the list. + @arg index: Index of the element to be retuned. - Private variables: - __outputData ; The output dictionary. + Private variables: + - __outputData ; The output dictionary. - Returns: - list ; The requested element. + @return: The requested element + @rtype: list """ if self.__outputData.has_key(name) : @@ -371,16 +338,16 @@ class Output() : def getMessagesWithErrorCode(self, errorcode): """ - Retrieve all messages that have a specific error code. + Retrieve all messages that have a specific error code. - Arguments: - errorcode ; The error code to filter on. + Private variables: + - __messages ; The messages list. - Private variables: - __messages ; The messages list. + @arg errorcode: The error code to filter on + @type errorcode: string - Returns: - list ; A filtered list. + @return: A filtered list + @rtype: list """ ret = [] @@ -393,17 +360,18 @@ class Output() : def Summary(self) : """ - Print a summary of the number of errors and warnings. - - Private variables: - __errors ; The number of errors. - __warnings ; The number of warnings. + Print a summary of the number of errors and warnings. - Returns: - triple: - integer ; Number of errors. - integer ; Number of warnings. - string ; Summary. + Private variables: + - __errors ; The number of errors. + - __warnings ; The number of warnings. + + @return: + triple: + - Number of errors + - Number of warnings + - Summary + @rtype: integer, integer, string """ e_s = 's' @@ -418,6 +386,47 @@ class Output() : #Summary #Output +class Message() : + """ + Container class for message variables. + + Special methods: + - __init__(origin, level, code, description) ; Make a message object. + + Public variables: + - origin ; Name of the module creating this object. + - level ; Importance of the message. + - code ; The error code of the message. + - description ; A description of the message. + """ + + def __init__(self, origin, level, code, description) : + """ + Make a new message object. + + Public variables (altered): + - origin ; Name of the module creating this object. + - level ; Importance of the message. + - code ; The error code of the message. + - description ; A description of the message. + + @arg origin: Name of the module creating this object + @type origin: string + @arg level: Importance of the message + @type level: integer + @arg code: The error code of the message + @type code: string + @arg description: A description of the message + @type description: string + """ + + self.origin = origin + self.level = level + self.code = code + self.description = description + #__init__ +#Message + # # Unit test. # diff --git a/src/Modules/Parser.py b/src/Modules/Parser.py index f397f74ff1fcfbcf94816a9523a827dbb4961c22..538076b366e4d69f369308780f10e07eb209743c 100644 --- a/src/Modules/Parser.py +++ b/src/Modules/Parser.py @@ -1,34 +1,36 @@ #!/usr/bin/python """ - Module for parting a variant described using the HGVS nomenclature. +Module for parsing a variant described using the HGVS nomenclature. - A context-free parser is defined here, the nomenclature rules are specified - in BNF, which is used (with some minor modifications) as source of this - module. +A context-free parser is defined here, the nomenclature rules are specified +in Backus-Naur Form (BNF), which is used (with some minor modifications) as source of this +module. - Public classes: - Nomenclatureparser ; Parse an input string. +@requires: pyparsing """ +# Public classes: +# - Nomenclatureparser ; Parse an input string. + from pyparsing import * class Nomenclatureparser() : """ - Parse an input string. + Parse an input string. - Private variables: - __output ; The output object. + Private variables: + - __output ; The output object. - Public variables: - All variables defined below, they are all context-free grammar - rules. + Public variables: + - All variables defined below, they are all context-free grammar + rules. - Special methods: - __init__() ; Initialise the class and enable packrat parsing. + Special methods: + - __init__() ; Initialise the class and enable packrat parsing. - Public methods: - parse(input) ; Parse the input string and return a parse tree. + Public methods: + - parse(input) ; Parse the input string and return a parse tree. """ # New: @@ -307,13 +309,13 @@ class Nomenclatureparser() : def __init__(self, output) : """ - Initialise the class and enable packrat parsing. + Initialise the class and enable packrat parsing. - Arguments: - output ; The output object. + Private variables (altered): + - __output ; Set to the output object. - Private variables (altered): - __output ; Set to the output object. + @arg output: The output object + @type output: object """ self.__output = output @@ -322,21 +324,21 @@ class Nomenclatureparser() : def parse(self, variant) : """ - Parse the input string and return a parse tree if the parsing was - successful. Otherwise print the parse error and the position in - the input where the error occurred. + Parse the input string and return a parse tree if the parsing was + successful. Otherwise print the parse error and the position in + the input where the error occurred. - Arguments: - variant ; The input string that needs to be parsed. + Private variables: + - __output ; The output object. - Private variables: - __output ; The output object. + Public variables: + - Var ; The top-level rule of our parser. - Public variables: - Var ; The top-level rule of our parser. + @arg variant: The input string that needs to be parsed + @type variant: string - Returns: - Object ; The parse tree containing the parse results. + @return: The parse tree containing the parse results + @rtype: object """ try : diff --git a/src/Modules/Retriever.py b/src/Modules/Retriever.py index b7403dca9ef03da002cfbdf4fbb0b7d1081cf28f..7288b900b32d5bd9af9d709daa7baae8cb5b8cc4 100644 --- a/src/Modules/Retriever.py +++ b/src/Modules/Retriever.py @@ -1,15 +1,30 @@ #!/usr/bin/python """ - Module for retrieving files from either the cache or the NCBI. - - A hash of every retrieved file is stored in the internal database. If a - requested file is not found, but its hash is, we use additional information - to re-download the file. - - Public classes: - Retriever ; Retrieve a record from either the cache or the NCBI. +Module for retrieving files from either the cache or the NCBI. + +A hash of every retrieved file is stored in the internal database. If a +requested file is not found, but its hash is, we use additional information +to re-download the file. + +@requires: os +@requires: bz2 +@requires: hashlib +@requires: urllib2 +@requires: StringIO +@requires: ftplib +@requires: Bio.SeqIO +@requires: Bio.Entrez +@requires: Bio.Seq.UnknownSeq +@requires: Modules.Misc +@requires: Modules.LRGparser +@requires: Modules.GBparser +@requires: xml.dom.DOMException +@requires: xml.dom.minidom """ +# Public classes: +# - Retriever ; Retrieve a record from either the cache or the NCBI. + import os # path.isfile(), link() path.isdir(), path.mkdir(), # walk(), path.getsize(), path.join(), stat(), remove() @@ -30,58 +45,58 @@ import xml.dom.minidom class Retriever(object) : """ - Retrieve a record from either the cache or the NCBI. - - Inherited variables from Db.Output.Config: - email ; The email address which we give to the NCBI. - cache ; The directory where the records are stored. - cachesize ; Maximum size of the cache. - - Special methods: - __init__(config, ; Use variables from the configuration file to - output, initialise the class private variables. - database) - - - Private methods: - _foldersize(folder) ; Return the size of a folder. - _cleancache() ; Keep the cache at a maximum size. - _nametofile(name) ; Convert a name to a filename. - _write(raw_data, ; Write a record to a file. - filename, - extract) - _calcHash(content) ; Calculate the md5sum of 'content'. - _newUD() ; Generate a new UD number. - - Public methods: - retrieveslice(accno, ; Retrieve a chromosome slice from the NCBI. - start, - stop, - orientation) - retrievegene(gene, ; Retrieve a gene from the NCBI. - organism, - upstream, - downstream) - downloadrecord(url) ; Download a GenBank file. - uploadrecord(raw_data) ; Let someone upload a GenBank file. - loadrecord(identifier) ; Load a record, store it in the - cache, manage the cache and return - the record. - - Inherited methods from Db.Output: - WarningMsg(filename, message) ; Print a warning message. - ErrorMsg(filename, message) ; Print an error message and log it. - LogMsg(filename, message) ; Log a message. + Retrieve a record from either the cache or the NCBI. + + Inherited variables from Db.Output.Config: + - email ; The email address which we give to the NCBI. + - cache ; The directory where the records are stored. + - cachesize ; Maximum size of the cache. + + Special methods: + - __init__(config, output, database) ; Use variables from the + configuration file to initialise the class private variables. + + + + Private methods: + - _foldersize(folder) ; Return the size of a folder. + - _cleancache() ; Keep the cache at a maximum size. + - _nametofile(name) ; Convert a name to a filename. + - _write(raw_data, filename, extract) ; Write a record to a file. + - _calcHash(content) ; Calculate the md5sum of 'content'. + - _newUD() ; Generate a new UD number. + + Public methods: + - retrieveslice(accno, start, stop, orientation) ; Retrieve a chromosome + slice from the NCBI. + - retrievegene(gene, organism, upstream, downstream) ; Retrieve a gene + from the NCBI. + - downloadrecord(url) ; Download a GenBank file. + - uploadrecord(raw_data) ; Let someone upload a GenBank file. + - loadrecord(identifier) ; Load a record, store it in the cache, manage + the cache and return the record. + + Inherited methods from Db.Output: + - WarningMsg(filename, message) ; Print a warning message. + - ErrorMsg(filename, message) ; Print an error message and log it. + - LogMsg(filename, message) ; Log a message. """ def __init__(self, config, output, database) : """ - Use variables from the configuration file for some simple - settings. Make the cache directory if it does not exist yet. + Use variables from the configuration file for some simple + settings. Make the cache directory if it does not exist yet. - Inherited variables from Db.Output.Config: - email ; The email address which we give to the NCBI. - cache ; The directory where the records are stored. + Inherited variables from Db.Output.Config: + - email ; The email address which we give to the NCBI. + - cache ; The directory where the records are stored. + + @arg config: + @type config: + @arg output: + @type output: + @arg database: + @type database: """ self._config = config @@ -95,13 +110,13 @@ class Retriever(object) : def _foldersize(self, folder) : """ - Return the size of a folder in bytes. - - Arguments: - folder ; Name of a directory. + Return the size of a folder in bytes. - Returns: - integer ; The size of the directory. + @arg folder: Name of a directory + @type folder: string + + @return: The size of the directory + @rtype: integer """ folder_size = 0 @@ -114,15 +129,15 @@ class Retriever(object) : def _cleancache(self) : """ - Keep removing files until the size of the cache is less than the - maximum size. - First, the cache checked for its size, if it exceeds the maximum - size the ``oldest'' files are deleted. Note that accessing a file - makes it ``new''. + Keep removing files until the size of the cache is less than the + maximum size. + First, the cache checked for its size, if it exceeds the maximum + size the ``oldest'' files are deleted. Note that accessing a file + makes it ``new''. - Inherited variables from Db.Output.Config: - cache ; Directory under scrutiny. - cachesize ; Maximum size of the cache. + Inherited variables from Db.Output.Config: + - cache ; Directory under scrutiny. + - cachesize ; Maximum size of the cache. """ if self._foldersize(self._config.cache) < self._config.cachesize: @@ -147,16 +162,16 @@ class Retriever(object) : def _nametofile(self, name) : """ - Convert an accession number to a filename. + Convert an accession number to a filename. - Arguments: - name ; The accession number. + Inherited variables from Db.Output.Config: + - cache ; Name of the cache directory. - Inherited variables from Db.Output.Config: - cache ; Name of the cache directory. + @arg name: The accession number + @type name: string - Returns: - string ; A filename. + @return: A filename + @rtype: string """ return self._config.cache + '/' + name + "." + self.fileType + ".bz2" @@ -164,14 +179,15 @@ class Retriever(object) : def _write(self, raw_data, filename) : """ - Write raw data to a compressed file. + Write raw data to a compressed file. - Arguments: - raw_data ; The raw_data to be compressed and written - filename ; The intended name of the outfile + @arg raw_data: The raw_data to be compressed and written + @type raw_data: string + @arg filename: The intended name of the outfile + @type filename: string - Returns: - outfile ; The full paht and name of the file written + @return: outfile ; The full path and name of the file written + @rtype: string """ # Compress the data to save disk space. comp = bz2.BZ2Compressor() @@ -189,13 +205,13 @@ class Retriever(object) : def _calcHash(self, content) : """ - Calculate the md5sum of a piece of text. + Calculate the md5sum of a piece of text. - Arguments: - content ; Arbitrary text. + @arg content: Arbitrary text + @type content: string - Returns: - string ; The md5sum of 'content'. + @return: The md5sum of 'content' + @rtype: string """ hashfunc = hashlib.md5() @@ -208,10 +224,10 @@ class Retriever(object) : def _newUD(self) : """ - Make a new UD number based on the current time (seconds since 1970). + Make a new UD number based on the current time (seconds since 1970). - Returns: - string ; A new UD number. + @return: A new UD number + @rtype: string """ M = Misc.Misc() @@ -223,13 +239,17 @@ class Retriever(object) : def _updateDBmd5(self, raw_data, name, GI): #TODO documentation """ - Arguments: - raw_data ; - name ; - GI ; + @todo: documentation + + @arg raw_data: + @type raw_data: + @arg name: + @type name: + @arg GI: + @type GI: - Returns: - string ; + @return: filename + @rtype: string """ currentmd5sum = self._database.getHash(name) @@ -250,14 +270,14 @@ class Retriever(object) : def snpConvert(self, rsId) : """ - Search an rsId in dbSNP and return all annotated HGVS notations of - it. + Search an rsId in dbSNP and return all annotated HGVS notations of + it. - Arguments: - rsId ; The id of the SNP. + @arg rsId: The id of the SNP + @type rsId: string - Returns: - list ; A list of HGVS notations. + @return: A list of HGVS notations + @rtype: list """ # A simple input check. @@ -295,28 +315,29 @@ class GenBankRetriever(Retriever): def write(self, raw_data, filename, extract) : """ - Write raw data to a file. The data is parsed before writing, if a - parse error occurs an error is returned and the function exits. - If 'filename' is set and 'extract' is set to 0, then 'filename' is - used for output. - If 'extract' is set to 1, then the filename is constructed from the - id of the GenBank record. Additionally the id and GI number are - returned for further processing (putting them in the internal - database). - - Arguments: - raw_data ; The data. - filename ; The intended name of the file. - extract ; Flag that indicates whether to extract the record ID - and GI number: - 0 ; Do not extract, use 'filename'. - 1 ; Extract. - - Returns: - tuple ; Depending on the value of 'extract': - 0 ; ('filename', None) - 1 ; (id, GI) - + Write raw data to a file. The data is parsed before writing, if a + parse error occurs an error is returned and the function exits. + If 'filename' is set and 'extract' is set to 0, then 'filename' is + used for output. + If 'extract' is set to 1, then the filename is constructed from the + id of the GenBank record. Additionally the id and GI number are + returned for further processing (putting them in the internal + database). + + @arg raw_data: The data + @type raw_data: string + @arg filename: The intended name of the file. + @type filename: string + @arg extract: Flag that indicates whether to extract the record ID and + GI number: + - 0 ; Do not extract, use 'filename' + - 1 ; Extract + @type extract: integer + + @return: tuple ; Depending on the value of 'extract': + - 0 ; ('filename', None) + - 1 ; (id, GI) + @rtype: tuple (string, string) """ if raw_data == "\nNothing has been found\n" : @@ -394,30 +415,34 @@ class GenBankRetriever(Retriever): def retrieveslice(self, accno, start, stop, orientation) : """ - Retrieve a slice of a chromosome. - If the arguments are recognised (found in the internal database), - we look if the associated file is still present and if so: return - its UD number. - If the arguments are recognised but no file was found, we download - the new slice and update the hash (and log if the hash changes). - If the arguments are not recognised, we download the new slice and - make a new UD number. - The content of the slice is placed in the cache with the UD number - as filename. - - Arguments: - accno ; The accession number of the chromosome. - start ; Start position of the slice. - stop ; End position of the slice. - orientation ; Orientatiion of the slice: - 1 ; Forward. - 2 ; Reverse complement. - - Inherited variables from Db.Output.Config: - maxDldSize ; Maximum size of the slice. + Retrieve a slice of a chromosome. + If the arguments are recognised (found in the internal database), + we look if the associated file is still present and if so: return + its UD number. + If the arguments are recognised but no file was found, we download + the new slice and update the hash (and log if the hash changes). + If the arguments are not recognised, we download the new slice and + make a new UD number. + The content of the slice is placed in the cache with the UD number + as filename. - Returns: - string ; An UD number. + Inherited variables from Db.Output.Config: + - maxDldSize ; Maximum size of the slice. + + @arg accno: The accession number of the chromosome + @type accno: string + @arg start: Start position of the slice + @type start: integer + @arg stop: End position of the slice. + @type stop: integer + @arg orientation: + Orientation of the slice: + - 1 ; Forward + - 2 ; Reverse complement + @type orientation: integer + + @return: An UD number + @rtype: string """ # Not a valid slice. @@ -463,16 +488,20 @@ class GenBankRetriever(Retriever): def retrievegene(self, gene, organism, upstream, downstream) : """ - Query the NCBI for the chromosomal location of a gene and make a - slice if the gene can be found. + Query the NCBI for the chromosomal location of a gene and make a + slice if the gene can be found. - Arguments: - gene ; Name of the gene. - organism ; The organism in which we search. - upstream ; Number of upstream nucleotides for the slice. - downstream ; Number of downstream nucleotides for the slice. + @arg gene: Name of the gene + @type gene: string + @arg organism: The organism in which we search. + @type organism: string + @arg upstream: Number of upstream nucleotides for the slice. + @type upstream: integer + @arg downstream: Number of downstream nucleotides for the slice. + @type downstream: integer - Returns: + @return: slice + @rtype: """ # Search the NCBI for a specific gene in an organism. @@ -532,16 +561,19 @@ class GenBankRetriever(Retriever): def downloadrecord(self, url) : """ - Download a GenBank record from a URL. - If the downloaded file is recognised by its hash, the old UD number - is used. + Download a GenBank record from a URL. + If the downloaded file is recognised by its hash, the old UD number + is used. - Arguments: - url ; Location of a GenBank record. + Inherited variables from Db.Output.Config: + - maxDldSize ; Maximum size of the file. + - minDldSize ; Minimum size of the file. - Inherited variables from Db.Output.Config: - maxDldSize ; Maximum size of the file. - minDldSize ; Minimum size of the file. + @arg url: Location of a GenBank record + @type url: string + + @return: UD or None + @rtype: string """ handle = urllib2.urlopen(url) @@ -580,12 +612,15 @@ class GenBankRetriever(Retriever): def uploadrecord(self, raw_data) : """ - Write an uploaded record to a file. - If the downloaded file is recognised by its hash, the old UD number - is used. + Write an uploaded record to a file. + If the downloaded file is recognised by its hash, the old UD number + is used. - Arguments: - raw_data ; A GenBank record. + @arg raw_data: A GenBank record + @type raw_data: string + + @return: + @rtype: string????? """ md5sum = self._calcHash(raw_data) @@ -603,15 +638,15 @@ class GenBankRetriever(Retriever): def loadrecord(self, identifier) : """ - Load a record and return it. - If the filename associated with the accession number is not found - in the cache, try to re-download it. + Load a record and return it. + If the filename associated with the accession number is not found + in the cache, try to re-download it. - Arguments: - identifier ; An accession number. + @arg identifier: An accession number + @type identifier: string - Returns: - record ; A GenBank.Record record + @return: A GenBank.Record record + @rtype: object """ if (identifier[0].isdigit()) : # This is a GI identifier. name = self._database.getGBFromGI(identifier) @@ -660,17 +695,25 @@ class GenBankRetriever(Retriever): class LRGRetriever(Retriever): """ - Retrieve a LRG record from either the cache or the web. + Retrieve a LRG record from either the cache or the web. - Public methods: - loadrecord(identifier) ; Load a record, store it in the - cache, manage the cache and return - the record. + Public methods: + - loadrecord(identifier) ; Load a record, store it in the cache, manage + the cache and return the record. """ def __init__(self, config, output, database): #TODO documentation """ + Initialize the class. + + @todo: documentation + @arg config: + @type config: + @arg output: + @type output: + @arg database: + @type database: """ # Recall init of parent @@ -681,14 +724,14 @@ class LRGRetriever(Retriever): def loadrecord(self, identifier): """ - Load and parse a LRG file based on the identifier + Load and parse a LRG file based on the identifier - Arguments: - identifier ; The name of the LRG file to read + @arg identifier: The name of the LRG file to read + @type identifier: string - Returns: - record ; GenRecord.Record of LRG file - None ; in case of failure + @return: record ; GenRecord.Record of LRG file + None ; in case of failure + @rtype: """ # Make a filename based upon the identifier. @@ -714,19 +757,18 @@ class LRGRetriever(Retriever): def fetch(self, name): """ - Fetch the LRG file and store in the cache directory. First try to - grab the file from the confirmed section, if this fails, get it - from the pending section. + Fetch the LRG file and store in the cache directory. First try to + grab the file from the confirmed section, if this fails, get it + from the pending section. - Arguments: - name ; The name of the LRG file to fetch + Inherited variables from Config.Retriever + - lrgURL ; The base url from where LRG files are fetched - Inherited variables from Config.Retriever - lrgURL ; The base url from where LRG files are fetched + @arg name: The name of the LRG file to fetch + @type name: string - Returns: - path ; the full path to the file - None ; in case of an error + @return: the full path to the file; None in case of an error + @rtype: string """ prefix = self._config.lrgURL @@ -751,18 +793,19 @@ class LRGRetriever(Retriever): def downloadrecord(self, url, name = None) : """ - Download an LRG record from an URL. + Download an LRG record from an URL. - Arguments: - url ; Location of the LRG record. + Inherited variables from Db.Output.Config: + - maxDldSize ; Maximum size of the file. + - minDldSize ; Minimum size of the file. - Inherited variables from Db.Output.Config: - maxDldSize ; Maximum size of the file. - minDldSize ; Minimum size of the file. + @arg url: Location of the LRG record + @type url: string - Returns: - filename ; The full path to the file - None ; in case of failure + @return: + - filename ; The full path to the file + - None ; in case of failure + @rtype: string """ lrgID = name or os.path.splitext(os.path.split(url)[1])[0] @@ -812,17 +855,18 @@ class LRGRetriever(Retriever): def write(self, raw_data, filename) : """ - Write raw LRG data to a file. The data is parsed before writing, - if a parse error occurs None is returned. - - Arguments: - raw_data ; The data. - filename ; The intended name of the file. + Write raw LRG data to a file. The data is parsed before writing, + if a parse error occurs None is returned. - Returns: - filename ; The full path and name of the file written - None ; In case of an error + @arg raw_data: The data + @type raw_data: string + @arg filename: The intended name of the file + @type filename: string + @return: + - filename ; The full path and name of the file written + - None ; In case of an error + @rtype: string """ # Dirty way to test if a file is valid, # Parse the file to see if it's a real LRG file. diff --git a/src/Modules/Scheduler.py b/src/Modules/Scheduler.py index 6c5a6eaaff71a3d257d6a2409e6b350394208926..4aabc806681e8e6f21fb90714eb0252af49a22b7 100644 --- a/src/Modules/Scheduler.py +++ b/src/Modules/Scheduler.py @@ -1,13 +1,23 @@ #!/usr/bin/python """ - Module used to add and manage the Batch Jobs - - Public classes: - Scheduler ; Manages the batch jobs and contains the methods for - * Batch Name Checker - * Batch Syntax Checker - * Batch Position Converter +Module used to add and manage the Batch Jobs. + +@requires: subprocess +@requires: os +@requires: smtplib +@requires: email.mime.text.MIMEText +@requires: Modules.Config +@requires: Modules.Output +@requires: Modules.Parser +@requires: Modules.Mapper +@requires: Mutalyzer """ +# Public classes: +# - Scheduler ; Manages the batch jobs and contains the methods for +# - Batch Name Checker +# - Batch Syntax Checker +# - Batch Position Converter + import subprocess # subprocess.Popen import os # os.path.exists import smtplib # smtplib.STMP @@ -22,41 +32,41 @@ import Mutalyzer # Mutalyzer.process __all__ = ["Scheduler"] -def debug(f): +def debug(f) : """ - Decorator for functions called from within the daemon. Can be used - to debug errors that are hidden because the daemon's stdout and - errout filehandlers are closed. + Decorator for functions called from within the daemon. Can be used + to debug errors that are hidden because the daemon's stdout and + errout filehandlers are closed. - Usage: Place the decorator line above the function to investigate + Usage: Place the decorator line above the function to investigate - >>> @debug - >>> def process(self) : - >>> pass # function code + >>> @debug + >>> def process(self) : + >>> pass # function code """ #NOTE: All debug functions & methods should be moved to a DEBUG module - def _tempf(*args): + def _tempf(*args) : """ - The decorated function is replaced by this function. Which sets up - the filehandle to write to and print out additional debug info. + The decorated function is replaced by this function. Which sets up + the filehandle to write to and print out additional debug info. - The original function is called from within a try, except clause - which catches [AND DOES NOT RERAISE] an exception occuring in the - debugged function. + The original function is called from within a try, except clause + which catches [AND DOES NOT RERAISE] an exception occuring in the + debugged function. - This can result in odd behaviour, therefor the decorators should - be removed from any production version. + This can result in odd behaviour, therefor the decorators should + be removed from any production version. """ of = open("/tmp/daemon.out", "a+") - try: + try : of.write("\nFunction %s\n\targs: %s\n\t" % (`f`, `args`)) ret = f(*args) # Actual function call of.write("Returns: %s" % `ret`) return ret #try - except Exception, e: + except Exception, e : import traceback of.write("\nEXCEPTION:\n") traceback.print_exc(file=of) @@ -67,30 +77,32 @@ def debug(f): class Scheduler() : """ - Manages the batch jobs and contains the methods for - Batch Name Checker - Batch Syntax Checker - Batch Position Converter - - Special methods: - __init__(config, database) ; - - Public methods: - addJob(outputFilter, eMail, queue, fromHost, jobType, Arg1) - ; Add a job to the database jobqueue and start the - batchChecker daemon. - process() ; Iterate over & process the jobs in the jobqueue + Special methods: + - __init__(config, database) ; + + Public methods: + - addJob(outputFilter, eMail, queue, fromHost, jobType, Arg1) ; Add a + job to the database jobqueue and start the batchChecker daemon. + - process() ; Iterate over & process the jobs in the jobqueue. + + @summary: Manages the batch jobs and contains the methods for + - Batch Name Checker + - Batch Syntax Checker + - Batch Position Converter """ def __init__(self, config, database) : #TODO: documentation """ - Initialize the Scheduler, which requires a config object - and a database connection. - - Arguments: - config ; - database ; + Initialize the Scheduler, which requires a config object + and a database connection. + + @todo: documentation + + @arg config: Config object + @type config: object + @arg database: + @type database: """ self.__config = config @@ -99,15 +111,18 @@ class Scheduler() : def __sendMail(self, mailTo, url) : """ - Send an e-mail containing an url to a batch job submitter. - - Arguments: - mailTo ; The batch job submitter. - url ; The url containing the results. - - Private variables: - __config ; The variables mailMessage, mailSubject and mailFrom - are used. + Send an e-mail containing an url to a batch job submitter. + + Private variables: + - __config ; The variables mailMessage, mailSubject and mailFrom + are used. + + @todo: Handle Connection errors in a try, except clause + + @arg mailTo: The batch job submitter + @type mailTo: string + @arg url: The url containing the results + @type url: string """ #TODO: Handle Connection errors in a try, except clause @@ -128,73 +143,93 @@ class Scheduler() : smtpInstance.quit() #__sendMail - def __processFlags(self, O, flags): + def __processFlags(self, O, flags) : """ - Translate the flags to error & info messages. + Translate the flags to error & info messages. - Arguments: - O ; Output object of the current batchentry - flags ; Flags of the current batchentry + Side-effect: + - Added messages to the Output object. - Returns: - skip ; True if the entry must be skipped + @arg O: Output object of the current batchentry + @arg flags: Flags of the current batchentry - Side-effect: - Added messages to the Output object + @return: skip ; True if the entry must be skipped + @rtype: boolean """ - if not flags: return - if 'S' in flags: #This entry is going to be skipped + + if not flags : + return False + if 'S' in flags : #This entry is going to be skipped #Add a usefull message to the Output object - if "S0" in flags: + if "S0" in flags : message = "Entry could not be formatted correctly, check "\ "batch input file help for details" - elif "S9" in flags: + elif "S9" in flags : message = "Empty Line" - else: + else : message = "Skipping entry" O.addMessage(__file__, 4, "EBSKIP", message) return True #skip #if - if 'A' in flags: #This entry is altered before execution + if 'A' in flags : #This entry is altered before execution O.addMessage(__file__, 3, "WEALTER", "Entry altered before " "execution") + return False #__processFlags - def __alterBatchEntries(self, jobID, old, new, flag, nselector): + def __alterBatchEntries(self, jobID, old, new, flag, nselector) : """ - Alias for the database.updateBatchDb method. - - Replace within one JobID all entries matching old with new, if - they do not match the negative selector. - - Example: - NM_002001(FCER1A_v001):c.1A>C ; this would result in the - continuous fetching of the - reference because no version - number is given. - In this case the arguments would be: - old ; NM_002001 - new ; NM_002001.2 - nselector ; NM_002001[[.period.]] - - The nselector is used to prevent the replacement of - false positives. e.g. NM_002001.1(FCER1A_v001):c.1A>C should not - be replaced. The double bracket notation is the MySQL escape char - for a regular expression. + Alias for the database.updateBatchDb method. + + Replace within one JobID all entries matching old with new, if + they do not match the negative selector. + + Example: + NM_002001(FCER1A_v001):c.1A>C ; this would result in the continuous + fetching of the reference because no version number is given. + In this case the arguments would be: + - old ; NM_002001 + - new ; NM_002001.2 + - nselector ; NM_002001[[.period.]] + + The nselector is used to prevent the replacement of + false positives. e.g. NM_002001.1(FCER1A_v001):c.1A>C should not + be replaced. The double bracket notation is the MySQL escape char + for a regular expression. + + @arg jobID: + @type jobID: + @arg old: + @type old: + @arg new: + @type new: + @arg flag: + @type flag: + @arg nselector: + @type nselector: """ + self.__database.updateBatchDb(jobID, old, new, flag, nselector) #__alterBatchEntries - def __skipBatchEntries(self, jobID, flag, selector): + def __skipBatchEntries(self, jobID, flag, selector) : """ - Alias for the database.skipBatchDb method. - - Skip all batch entries that match a certain selector. + Alias for the database.skipBatchDb method. + + Skip all batch entries that match a certain selector. + + @arg jobID: + @type jobID: + @arg flag: + @type flag: + @arg selector: + @type selector: """ + self.__database.skipBatchDb(jobID, selector, flag) #__skipBatchEntries - def _updateDbFlags(self, O, jobID): + def _updateDbFlags(self, O, jobID) : """ Check and set the flags for other entries of jobID. @@ -202,13 +237,14 @@ class Scheduler() : If these are set, this means that identical entries need to be skipped / altered. - Arguments: - O ; Output object of the current batchentry - jobID ; ID of job, so that the altering is only done within - one job. - Side-effect: - Added flags to entries in the database + - Added flags to entries in the database + + @arg O: Output object of the current batchentry + @type O: object + @arg jobID: ID of job, so that the altering is only done within one + job + @type jobID: """ flags = O.getOutput("BatchFlags") @@ -225,8 +261,8 @@ class Scheduler() : if not flags: return #First check if we need to skip - for flag, args in flags: - if 'S' in flag: + for flag, args in flags : + if 'S' in flag : selector = args # Strip argument O.addMessage(__file__, 3, "WBSKIP", "All further occurrences with '%s' will be " @@ -236,8 +272,8 @@ class Scheduler() : #if #for #If not skipflags, check if we need to alter - for flag, args in flags: - if 'A' in flag: + for flag, args in flags : + if 'A' in flag : old, new, nselector = args #Strip arguments O.addMessage(__file__, 3, "WBSUBST", "All further occurrences of %s will be substituted " @@ -249,51 +285,53 @@ class Scheduler() : def process(self) : """ - Start the mutalyzer Batch Processing. This method retrieves - all jobs from the database and processes them in a roundrobin - fashion. If all jobs are done the process checks if new jobs are - added during the last processing round. - - This method uses two database tables, BatchJob and BatchQueue. - - The jobList is an array of tuples with three elements - jobID ; The ID of the job - jobType ; The type of the job - argument1 ; Currently only used for the ConversionChecker - to send the build version. - - If the jobList is not empty, the method will iterate once over the - list and fetch the first entry of a job from the database table - BatchQueue. This request returns both the input for the batch and - the flags for the job. - - #Flags - A job can be flagged in two ways: - A ; Altered - this means that the input is altered - before execution. This could be the case if an - entry uses an accession number without a version. - If a version is retrieved from the NCBI, all - further occurences of that accession will be - replaced by the accession with version number. - S ; Skipped - this means that this batchentry will be - skipped by the batchprocess. This could be the - case if the user made a mistake that could not be - auto fixed and henceforth all occurences of the - mistake will be skipped. - A Flag consists of either an A or S followed by a digit, which - refers to the reason of alteration / skip. + Start the mutalyzer Batch Processing. This method retrieves + all jobs from the database and processes them in a roundrobin + fashion. If all jobs are done the process checks if new jobs are + added during the last processing round. + + This method uses two database tables, BatchJob and BatchQueue. + + The jobList is an array of tuples with three elements + - jobID ; The ID of the job + - jobType ; The type of the job + - argument1 ; Currently only used for the ConversionChecker + to send the build version. + + If the jobList is not empty, the method will iterate once over the + list and fetch the first entry of a job from the database table + BatchQueue. This request returns both the input for the batch and + the flags for the job. + + #Flags + A job can be flagged in two ways: + - A ; Altered - this means that the input is altered + before execution. This could be the case if an + entry uses an accession number without a version. + If a version is retrieved from the NCBI, all + further occurences of that accession will be + replaced by the accession with version number. + - S ; Skipped - this means that this batchentry will be + skipped by the batchprocess. This could be the + case if the user made a mistake that could not be + auto fixed and henceforth all occurences of the + mistake will be skipped. + + A Flag consists of either an A or S followed by a digit, which + refers to the reason of alteration / skip. """ + jobList = self.__database.getJobs() while jobList : for i, jobType, arg1 in jobList : inputl, flags = self.__database.getFromQueue(i) - if not(inputl is None): - if jobType == "NameChecker": + if not (inputl is None) : + if jobType == "NameChecker" : self._processNameBatch(inputl, i, flags) - elif jobType == "SyntaxChecker": + elif jobType == "SyntaxChecker" : self._processSyntaxCheck(inputl, i, flags) - elif jobType == "PositionConverter": + elif jobType == "PositionConverter" : self._processConversion(inputl, i, arg1, flags) else: #unknown jobType pass #TODO: Scream burning water and remove from Queue @@ -307,18 +345,20 @@ class Scheduler() : #while #process - def _processNameBatch(self, cmd, i, flags): + def _processNameBatch(self, cmd, i, flags) : """ - Process an entry from the Name Batch, write the results - to the job-file. If an Exception is raised, catch and continue. - - Arguments: - cmd ; The NameChecker input - i ; The JobID - flags ; Flags of the current entry - - Side-effect: - Output written to outputfile + Process an entry from the Name Batch, write the results + to the job-file. If an Exception is raised, catch and continue. + + Side-effect: + - Output written to outputfile. + + @arg cmd: The NameChecker input + @type cmd: + @arg i: The JobID + @type i: + @arg flags: Flags of the current entry + @type flags: """ C = Config.Config() @@ -329,18 +369,18 @@ class Scheduler() : #Read out the flags skip = self.__processFlags(O, flags) - if not skip: + if not skip : #Run mutalyzer and get values from Output Object 'O' - try: + try : Mutalyzer.process(cmd, C, O) - except Exception, e: + except Exception, e : #Catch all exceptions related to the processing of cmd O.addMessage(__file__, 4, "EBATCHU", "Unexpected error occurred, dev-team notified") import traceback O.addMessage(__file__, 4, "DEBUG", `traceback.format_exc()`) #except - finally: + finally : #check if we need to update the database self._updateDbFlags(O, i) #if @@ -350,14 +390,14 @@ class Scheduler() : outputline = "%s\t" % cmd outputline += "%s\t" % "|".join(O.getBatchMessages(3)) - if batchOutput: + if batchOutput : outputline += batchOutput[0] outputline += "\n" #Output filename = "%s/Results_%s.txt" % (self.__config.resultsDir, i) - if not os.path.exists(filename): + if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. @@ -365,7 +405,7 @@ class Scheduler() : handle = open(filename, 'a') handle.write("%s\n" % "\t".join(header)) #if - else: + else : handle = open(filename, 'a') handle.write(outputline) @@ -374,18 +414,20 @@ class Scheduler() : "Finished NameChecker batchvariant " + cmd) #_processNameBatch - def _processSyntaxCheck(self, cmd, i, flags): + def _processSyntaxCheck(self, cmd, i, flags) : """ - Process an entry from the Syntax Check, write the results - to the job-file. - - Arguments: - cmd ; The Syntax Checker input - i ; The JobID - flags ; Flags of the current entry - - Side-effect: - Output written to outputfile + Process an entry from the Syntax Check, write the results + to the job-file. + + Side-effect: + - Output written to outputfile + + @arg cmd: The Syntax Checker input + @type cmd: + @arg i: The JobID + @type i: + @arg flags: Flags of the current entry + @type flags: """ C = Config.Config() @@ -397,19 +439,19 @@ class Scheduler() : skip = self.__processFlags(O, flags) #Process - if not skip: + if not skip : parsetree = P.parse(cmd) - else: + else : parsetree = None - if parsetree: + if parsetree : result = "OK" - else: + else : result = "|".join(O.getBatchMessages(3)) #Output filename = "%s/Results_%s.txt" % (self.__config.resultsDir, i) - if not os.path.exists(filename): + if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. @@ -417,7 +459,7 @@ class Scheduler() : handle = open(filename, 'a') handle.write("%s\n" % "\t".join(header)) #if - else: + else : handle = open(filename, 'a') handle.write("%s\t%s\n" % (cmd, result)) @@ -426,21 +468,24 @@ class Scheduler() : "Finished SyntaxChecker batchvariant " + cmd) #_processSyntaxCheck - def _processConversion(self, cmd, i, build, flags): + def _processConversion(self, cmd, i, build, flags) : """ - Process an entry from the Position Converter, write the results - to the job-file. The Position Coverter is wrapped in a try except - block which ensures that he Batch Process keeps running. Errors - are caught and the user will be notified. - - Arguments: - cmd ; The Syntax Checker input - i ; The JobID - build ; The build to use for the converter - flags ; Flags of the current entry - - Side-effect: - Output written to outputfile + Process an entry from the Position Converter, write the results + to the job-file. The Position Coverter is wrapped in a try except + block which ensures that he Batch Process keeps running. Errors + are caught and the user will be notified. + + Side-effect: + - Output written to outputfile. + + @arg cmd: The Syntax Checker input + @type cmd: string + @arg i: The JobID + @type i: integer + @arg build: The build to use for the converter + @type build: string + @arg flags: Flags of the current entry + @type flags: """ C = Config.Config() @@ -454,8 +499,8 @@ class Scheduler() : "Received PositionCoverter batchvariant " + cmd) skip = self.__processFlags(O, flags) - if not skip: - try: + if not skip : + try : #process converter = Mapper.Converter(build, C, O) @@ -463,13 +508,13 @@ class Scheduler() : variant = converter.correctChrVariant(variant) #TODO: Parse the variant and check for c or g. This is ugly - if not(":c." in variant or ":g." in variant): + if not(":c." in variant or ":g." in variant) : #Bad name P = Parser.Nomenclatureparser(O) parsetree = P.parse(variant) #if - if ":c." in variant: + if ":c." in variant : # Do the c2chrom dance variant = converter.c2chrom(variant) # NOTE: @@ -480,10 +525,10 @@ class Scheduler() : # If the input is a genomic variant or if we converted a # coding variant to a genomic variant we try to find all # other affected coding variants. - if variant and ":g." in variant: + if variant and ":g." in variant : # Do the chrom2c dance variants = converter.chrom2c(variant, "dict") - if variants: + if variants : gName = variant # Due to the cyclic behavior of the Position Converter # we know for a fact that if a correct chrom name is @@ -492,7 +537,7 @@ class Scheduler() : # variants from a nested lists and store them. cNames = [cName for cName2 in variants.values() \ for cName in cName2] - except Exception, e: + except Exception, e : #Catch all exceptions related to the processing of cmd O.addMessage(__file__, 4, "EBATCHU", "Unexpected error occurred, dev-team notified") @@ -503,7 +548,7 @@ class Scheduler() : #Output filename = "%s/Results_%s.txt" % (self.__config.resultsDir, i) - if not os.path.exists(filename): + if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. @@ -511,7 +556,7 @@ class Scheduler() : handle = open(filename, 'a') handle.write("%s\n" % "\t".join(header)) #if - else: + else : handle = open(filename, 'a') handle.write("%s\t%s\t%s\t%s\n" % (cmd, error, gName, "\t".join(cNames))) @@ -524,15 +569,25 @@ class Scheduler() : def addJob(self, outputFilter, eMail, queue, fromHost, jobType, Arg1) : """ - Add a job to the Database and start the BatchChecker. - - Arguments: - outputFilter ; Filter the output of Mutalyzer - eMail ; e-mail address of batch supplier - queue ; A list of jobs - fromHost ; From where is the request made - jobType ; The type of Batch Job that should be run - Arg1 ; Batch Arguments, for now only build info + Add a job to the Database and start the BatchChecker. + + @arg outputFilter: Filter the output of Mutalyzer + @type outputFilter: + @arg eMail: e-mail address of batch supplier + @type eMail: string + @arg queue: A list of jobs + @type queue: list + @arg fromHost: From where is the request made + @type fromHost: + @arg jobType: The type of Batch Job that should be run + @type jobType: + @arg Arg1: Batch Arguments, for now only build info + @type Arg1: + + @return: jobID + @rtype: + + @todo: outputFilter is not used """ #TODO: outputFilter is not used @@ -550,12 +605,14 @@ class Scheduler() : # output in terms of input line and outputline. if inputl.startswith("~!"): #Dirty Escape inputl = inputl[2:] - if inputl: + if inputl : flag = "S0" # Flag for wrong format - else: + else : flag = "S9" # Flag for empty line inputl = " " #Database doesn't like an empty inputfield - else: + #else + #if + else : flag = None self.__database.addToQueue(jobID, inputl, flag) diff --git a/src/Modules/Serializers.py b/src/Modules/Serializers.py index 57d3e68eb8f3d757e72c8869bbcd6c3c13f88046..954b2a1ebc0134b896ab9ba162ea13b6c3bd9d22 100644 --- a/src/Modules/Serializers.py +++ b/src/Modules/Serializers.py @@ -1,5 +1,16 @@ +#!/usr/bin/python + """ - Collection of Serilizable Objects used by the webservice +Collection of Serilizable Objects used by the webservice + +@requires: soaplib.serializers.primitive.String +@requires: soaplib.serializers.primitive.Integer +@requires: soaplib.serializers.primitive.Array +@requires: soaplib.serializers.clazz.ClassSerializer +@requires: ZSI.TC +@requires: ZSI.fault.Fault + +@todo: documentation """ from soaplib.serializers.primitive import String, Integer, Array from soaplib.serializers.clazz import ClassSerializer @@ -8,11 +19,11 @@ from ZSI.fault import Fault class SoapMessage(ClassSerializer): """ - Send info message over the soapline + Send info message over the soapline - Attributes: - errorcode ; The error code affiliated with the error message - message ; The error message + Attributes: + - errorcode ; The error code affiliated with the error message + - message ; The error message """ class types(): @@ -27,21 +38,21 @@ class SoapMessage(ClassSerializer): class Mapping(ClassSerializer) : """ - Extended ClassSerializer object with mixed types of attributes - - Attributes: - startmain ; Define the type of startmain. - startoffset ; Define the type of startoffset. - endmain ; Define the type of endmain value. - endoffset ; Define the type of endoffset value. - start_g ; Define the type of start_g value. - end_g ; Define the type of end_g value. - mutationType ; Define the type of mutation type + Extended ClassSerializer object with mixed types of attributes + + Attributes: + - startmain ; Define the type of startmain. + - startoffset ; Define the type of startoffset. + - endmain ; Define the type of endmain value. + - endoffset ; Define the type of endoffset value. + - start_g ; Define the type of start_g value. + - end_g ; Define the type of end_g value. + - mutationType ; Define the type of mutation type """ class types() : """ - Types are defined here for the soaplib module. + Types are defined here for the soaplib module. """ startmain = Integer @@ -57,7 +68,7 @@ class Mapping(ClassSerializer) : def __init__(self) : """ - Types are defined here for the TC module. + Types are defined here for the TC module. """ self.typecode = TC.Struct(Mapping, [ @@ -78,12 +89,12 @@ class Mapping(ClassSerializer) : class Transcript(ClassSerializer) : """ - Extended ClassSerializer object with mixed types of attributes + Extended ClassSerializer object with mixed types of attributes - Attributes: - trans_start ; Define the type of trans_start - trans_stop ; Define the type of trans_stop - CDS_stop ; Define the type of CDS_stop + Attributes: + - trans_start ; Define the type of trans_start + - trans_stop ; Define the type of trans_stop + - CDS_stop ; Define the type of CDS_stop """ class types() : diff --git a/src/Modules/Web.py b/src/Modules/Web.py index 635a86d1e551b1b2a65bbb414bb9c1dc137baa77..d1ce5eb92513925fd2182feed1604f117c7abd7b 100644 --- a/src/Modules/Web.py +++ b/src/Modules/Web.py @@ -1,11 +1,19 @@ #!/usr/bin/python """ - Module that provides general functions used by the web interfaces. - - Public classes: - Web ; General functions used by the web interfaces. +Module that provides general functions used by the web interfaces. + +@requires: sys +@requires: re +@requires: urllib +@requires: cStringIO.StringIO +@requires: simpletal.simpleTALES +@requires: simpletal.simpleTAL +@requires: Config """ +# Public classes: +# - Web ; General functions used by the web interfaces. + import sys # sys.stdout import re # match @@ -19,27 +27,27 @@ import Config class Web() : """ - General functions used by the web interfaces. + General functions used by the web interfaces. - Public variables: - version ; This is the version that is displayed on the web pages, - WSDL files, etc. + Public variables: + - version ; This is the version that is displayed on the web pages, + WSDL files, etc. - Special methods: - __init__() ; Initialise the class. + Special methods: + - __init__() ; Initialise the class. - Public methods: - run(func, *args) ; Run func(*args) and return stdout. - tal(scheme, filename, args) ; Compile a TAL template to HTML or XML. - read(path, req) ; Read a file and return the content. + Public methods: + - run(func, *args) ; Run func(*args) and return stdout. + - tal(scheme, filename, args) ; Compile a TAL template to HTML or XML. + - read(path, req) ; Read a file and return the content. """ def __init__(self) : """ - Initialise the class. + Initialise the class. - Public variables (altered): - version ; Here the displayed version is defined. + Public variables (altered): + - version ; Here the displayed version is defined. """ self.version = "2.0 β-5" @@ -52,14 +60,15 @@ class Web() : def run(self, func, *args) : """ - Run any function and return standard output as a string. + Run any function and return standard output as a string. - Arguments: - func ; The function that has to be called. - *args ; The arguments of func. + @arg func: The function that has to be called + @type func: function + @arg args: arguments for the function to call + @type args: list - Returns: - string ; Everything that func(*args) writes to standard output. + @return: Everything that func(*args) writes to standard output + @rtype: string """ old_stdout = sys.stdout @@ -74,17 +83,20 @@ class Web() : def tal_old(self, scheme, filename, args) : #TODO merge this function with 'tal' (below). """ - Compile a TAL template to HTML or XML. - - Arguments: - scheme ; Either "HTML" or "XML", output will be in this - format. - filename ; The filename of the template. - args ; A dictionary with variables (whose name correspond - to the ones in the template) and their values. - - Returns: - string ; An HTML or XML file. + Compile a TAL template to HTML or XML. + + @todo: merge this function with 'tal' + + @arg scheme: Either "HTML" or "XML", output will be in this format + @type scheme: string + @arg filename: The filename of the template + @type filename: string + @arg args: A dictionary with variables (whose name correspond to + the ones in the template) and their values + @type args: dictionary + + @return: An HTML or XML file + @rtype: string """ from simpletal import simpleTALES # context(), addGlobal() @@ -118,17 +130,18 @@ class Web() : def tal(self, scheme, filename, args) : """ - Compile a TAL template to HTML or XML. - - Arguments: - scheme ; Either "HTML" or "XML", output will be in this - format. - filename ; The filename of the template. - args ; A dictionary with variables (whose name correspond - to the ones in the template) and their values. - - Returns: - string ; An HTML or XML file. + Compile a TAL template to HTML or XML. + + @arg scheme: Either "HTML" or "XML", output will be in this format + @type scheme: string + @arg filename: The filename of the template + @type filename: string + @arg args: A dictionary with variables (whose name correspond to + the ones in the template) and their values + @type args: dictionary + + @return: An HTML or XML file + @rtype: string """ context = simpleTALES.Context() @@ -164,14 +177,15 @@ class Web() : def read(self, path, req) : """ - Read a file and return its content. + Read a file and return its content. - Arguments: - path ; Path to the file. - req ; HTTP request (used to extract the filename). + @arg path: Path to the file + @type path: string + @arg req: HTTP request (used to extract the filename) + @type req: string - Returns: - string ; The content of the file. + @return: The content of the file + @rtype: string """ handle = open(path + req.uri.split('/', 2)[2], "r") @@ -184,6 +198,13 @@ class Web() : def isEMail(self, eMail) : #TODO documentation """ + Check if argument is a valid email address. + + @arg eMail: email address to check + @type eMail: string + + @return: True or False + @rtype: boolean """ if re.match("^[a-zA-Z0-9._%-]+@[a-zA-Z0-9._%-]+.[a-zA-Z]{2,6}$", @@ -195,6 +216,11 @@ class Web() : def urlEncode(self, descriptions) : #TODO documentation """ + @arg descriptions: + @type descriptions: list + + @return: urlEncode descriptions??????????????? + @rtype: list """ newDescr = [] diff --git a/src/Modules/__init__.py b/src/Modules/__init__.py index 10629c4230034104ab132399156ba023ce75add4..9465c89daf7379f84f782fb9124f933cbe65ac07 100644 --- a/src/Modules/__init__.py +++ b/src/Modules/__init__.py @@ -1,14 +1,16 @@ """ - Public modules: - Config ; - Crossmap ; - Db ; - GenRecord ; - Misc ; - Mutator ; - Output ; - Parser ; - Retriever ; - Scheduler ; - Web ; +@organization: Leiden University Medical Center (LUMC) +@copyright: 2010, Jeroen Laros, LUMC """ +# Public modules: +# - Config ; +# - Crossmap ; +# - Db ; +# - GenRecord ; +# - Misc ; +# - Mutator ; +# - Output ; +# - Parser ; +# - Retriever ; +# - Scheduler ; +# - Web ; diff --git a/src/Mutalyzer.py b/src/Mutalyzer.py index 9c7477de186ffb12c976b3b428f15124df9539da..5036fe7ef03bc4203e248dfc8ab818f9c7749979 100644 --- a/src/Mutalyzer.py +++ b/src/Mutalyzer.py @@ -1,7 +1,29 @@ #!/usr/bin/python """ - The nomenclature checker. +The nomenclature checker. + +@requires: sys +@requires: math +@requires: types +@requires: Bio +@requires: Bio.Seq +@requires: Bio.Seq.Seq +@requires: Bio.Alphabet.IUPAC +@requires: Bio.SeqUtils.seq3 +@requires: Bio.Restriction +@requires: Modules.Retriever +@requires: Modules.GenRecord +@requires: Modules.Crossmap +@requires: Modules.Parser +@requires: Modules.Db +@requires: Modules.Mutator +@requires: Modules.Output +@requires: Modules.Config +@requires: operator.itemgetter +@requires: operator.attrgetter + +@todo: SET TO FALSE DEBUG FLAG """ import sys @@ -31,14 +53,15 @@ DEBUG = False def __formatRange(pos1, pos2) : """ - Simplify a range to one position when applicable. + Simplify a range to one position when applicable. - Arguments: - pos1 ; First coordinate of a range. - pos2 ; Second coordinate of a range. + @arg pos1: First coordinate of a range + @type pos1: integer + @arg pos2: Second coordinate of a range + @type pos2: integer - Returns: - string ; pos1_pos2 in case of a real range, pos1 otherwise. + @return: pos1_pos2 in case of a real range, pos1 otherwise + @rtype: string """ if pos1 == pos2 : @@ -48,13 +71,13 @@ def __formatRange(pos1, pos2) : def __intronicPosition(Loc) : """ - Check whether a location is intronic. + Check whether a location is intronic. - Arguments: - Loc ; A location from the Parser module. + @arg Loc: A location from the Parser module + @type Loc: - Returns: - boolean ; True if the location is intronic, False otherwise. + @return: True if the location is intronic, False otherwise + @rtype: boolean """ if not Loc : @@ -68,18 +91,20 @@ def __intronicPosition(Loc) : def __checkIntronPosition(main, offset, transcript) : """ - Check whether a c. position is really in an intron: The main coordinate - must be a splice site and the offset coordinate must have the correct - sign. - - Arguments: - main ; Main coordinate of the position. - offset ; Offset coordinate of the position. - transcript ; Transcript under scrutiny. - - Returns: - boolean ; True if the combination (main, offset) is valid for this - transcript. False otherwise. + Check whether a c. position is really in an intron: The main coordinate + must be a splice site and the offset coordinate must have the correct + sign. + + @arg main: Main coordinate of the position + @type main: integer + @arg offset: Offset coordinate of the position + @type offset: integer + @arg transcript: Transcript under scrutiny + @type transcript: object + + @return: True if the combination (main, offset) is valid for this + transcript, False otherwise + @rtype: boolean """ main_g = transcript.CM.x2g(main, 0) @@ -105,22 +130,23 @@ def __checkIntronPosition(main, offset, transcript) : def __roll(ref, start, stop) : """ - Determine the variability of a variant by looking at cyclic - permutations. Not all cyclic permutations are tested at each time, it - is sufficient to check ``aW'' if ``Wa'' matches (with ``a'' a letter, - ``W'' a word) when rolling to the left for example. - - Arguments: - ref ; A reference sequence. - start ; Start position of the pattern in the reference sequence. - stop ; End position of the pattern in the reference sequence. - - Returns: - tuple: - left ; Amount of positions that the pattern can be shifted to - the left. - right ; Amount of positions that the pattern can be shifted to - the right. + Determine the variability of a variant by looking at cyclic + permutations. Not all cyclic permutations are tested at each time, it + is sufficient to check ``aW'' if ``Wa'' matches (with ``a'' a letter, + ``W'' a word) when rolling to the left for example. + + @arg ref: A reference sequence + @type ref: string + @arg start: Start position of the pattern in the reference sequence + @type start: integer + @arg stop: End position of the pattern in the reference sequence. + @type stop: integer + + @return: tuple: + - left ; Amount of positions that the pattern can be shifted to the left + - right ; Amount of positions that the pattern can be shifted to the + right + @rtype: tuple (integer, integer) """ pattern = ref[start - 1:stop] # Extract the pattern. @@ -147,16 +173,16 @@ def __roll(ref, start, stop) : def __palinsnoop(string) : """ - Check a sequence for a reverse-complement-palindromic prefix (and - suffix). If one is detected, return the length of this prefix. If the - string equals its reverse complement, return -1. + Check a sequence for a reverse-complement-palindromic prefix (and + suffix). If one is detected, return the length of this prefix. If the + string equals its reverse complement, return -1. - Arguments: - string ; A nucleotide sequence. + @arg string: A nucleotide sequence + @type string: string - Returns: - integer ; The number of elements that are palindromic or -1 if the - string is a ``palindrome''. + @return: The number of elements that are palindromic or -1 if the string is + a "palindrome". + @rtype: string """ revcomp = Bio.Seq.reverse_complement(string) @@ -170,6 +196,7 @@ def __palinsnoop(string) : def __bprint(s, O, where) : # FIXME obsoleted function (replaced by __bprint2()), but still used. """ + @todo: FIXME obsoleted function (replaced by __bprint2()), but still used. """ if not s : @@ -194,21 +221,25 @@ def __bprint(s, O, where) : def __insertTag(s, pos1, pos2, tag1, tag2) : """ - Insert two tags (tag1 and tag2) in string s at positions pos1 and pos2 - respectively if the positions are within the length of s. If not, - either insert one tag or do nothing. If pos1 equals pos2, don't do - anything either. - - Arguments: - s ; A sequence. - pos1 ; Position of tag1. - pos2 ; Position of tag2. - tag1 ; Content of tag1. - tag2 ; Content of tag2. - - Returns: - string ; The original sequence, or a sequence with eiter tag1, - tag2 or both tags inserted. + Insert two tags (tag1 and tag2) in string s at positions pos1 and pos2 + respectively if the positions are within the length of s. If not, + either insert one tag or do nothing. If pos1 equals pos2, don't do + anything either. + + @arg s: A sequence + @type s: + @arg pos1: Position of tag1 + @type pos1: + @arg pos2: Position of tag2 + @type pos2: + @arg tag1: Content of tag1 + @type tag1: + @arg tag2: Content of tag2 + @type tag2: + + @return: The original sequence, or a sequence with eiter tag1, tag2 or both + tags inserted. + @rtype: string """ output = s @@ -227,15 +258,19 @@ def __insertTag(s, pos1, pos2, tag1, tag2) : def __bprint2(s, pos1, pos2, O, where) : """ - Make a fancy representation of a protein and put it in the Output - object under the name ``where''. - - Arguments: - s ; A protein sequence. - pos1 ; First position to highlight. - pos2 ; Last position to highlight. - O ; The Output object. - where ; Location in the Output object to store the representation. + Make a fancy representation of a protein and put it in the Output + object under the name "where". + + @arg s: A protein sequence + @type s: string + @arg pos1: First position to highlight + @type pos1: + @arg pos2: Last position to highlight + @type pos2: + @arg O: The Output object + @type O: object + @arg where: Location in the Output object to store the representation + @type where: """ if not s : @@ -269,14 +304,14 @@ def __bprint2(s, pos1, pos2, O, where) : def __PtLoc2main(Loc) : """ - Convert the main coordinate in a location (from the Parser) to an - integer. + Convert the main coordinate in a location (from the Parser) to an + integer. - Arguments: - Loc ; A location. + @arg Loc: A location + @type Loc: object - Returns: - integer ; Integer representation of the main coordinate. + @return: Integer representation of the main coordinate + @rtype: integer """ main = int(Loc.Main) @@ -288,14 +323,14 @@ def __PtLoc2main(Loc) : def __PtLoc2offset(Loc) : """ - Convert the offset coordinate in a location (from the Parser) to an - integer. + Convert the offset coordinate in a location (from the Parser) to an + integer. - Arguments: - Loc ; A location. + @arg Loc: A location. + @type Loc: object - Returns; - integer ; Integer representation of the offset coordinate. + @return: Integer representation of the offset coordinate + @rtype: integer """ if Loc.Offset : @@ -312,16 +347,17 @@ def __PtLoc2offset(Loc) : def __splice(string, splice_sites) : """ - Construct the transcript or the coding sequence from a record and - a list of splice sites. + Construct the transcript or the coding sequence from a record and + a list of splice sites. - Arguments: - record ; A GenBank record (see the BioPython documentation). - splice_sites ; A list of even length of integers. + @arg string: a DNA sequence + @type string: string + @arg splice_sites: A list of even length of integers. + @type splice_sites: list - Returns: - String ; The concatenation of slices from the sequence that is - present in the GenBank record. + @return: The concatenation of slices from the sequence that is present in + the GenBank record + @rtype: string """ transcript = "" @@ -335,6 +371,7 @@ def __splice(string, splice_sites) : def __nsplice(string, splice_sites, CDS, orientation) : #FIXME document this. """ + @todo: documentation """ transcript = "" @@ -364,14 +401,14 @@ def __nsplice(string, splice_sites, CDS, orientation) : def __cdsLen(splice_sites) : """ - Calculate the length of a CDS. + Calculate the length of a CDS. - Arguments: - splice_sites ; The coordinates of the CDS including internal splice - sites. + @arg splice_sites: The coordinates of the CDS including internal splice + sites. + @type splice_sites: list - Returns: - integer ; Length of the CDS. + @return: Length of the CDS + @rtype: integer """ l = 0 @@ -383,13 +420,13 @@ def __cdsLen(splice_sites) : def __checkDNA(arg) : """ - Check whether a string is a DNA string. + Check whether a string is a DNA string. - Arguments: - arg ; Any string. + @arg arg: Any string + @type arg: string - Returns: - boolean ; True if the string is a DNA string, False otherwise. + @return: True if the string is a DNA string, False otherwise + @rtype: boolean """ for i in str(arg) : @@ -400,18 +437,22 @@ def __checkDNA(arg) : def __checkOptArg(ref, p1, p2, arg, O) : """ - Do several checks for the optional argument of a variant. - - - Arguments: - ref ; The reference sequence. - p1 ; Start position of the variant. - p2 ; End position of the variant. - arg ; The optional argument. - O ; The Output object. - - Returns: - boolean ; True if the optional argument is correct, False otherwise. + Do several checks for the optional argument of a variant. + + + @arg ref: The reference sequence + @type ref: string + @arg p1: Start position of the variant + @type p1: integer + @arg p2: End position of the variant + @type p2: integer + @arg arg: The optional argument + @type arg: + @arg O: The Output object + @type O: object + + @return: True if the optional argument is correct, False otherwise. + @rtype: boolean """ if arg : # The argument is optional, if it is not present, it is correct. @@ -446,14 +487,15 @@ def __checkOptArg(ref, p1, p2, arg, O) : def __lcp(str1, str2) : """ - Calculate the length of the longest common prefix of two strings. + Calculate the length of the longest common prefix of two strings. - Arguments: - str1 ; The first string. - str2 ; The second string. + @arg str1: The first string + @type str1: string + @arg str2: The second string + @type str2: string - Returns: - integer ; The length of the longest common prefix of str1 and str2. + @return: The length of the longest common prefix of str1 and str2 + @rtype: integer """ pos = 0 @@ -468,14 +510,15 @@ def __lcp(str1, str2) : def __lcs(str1, str2) : """ - Calculate the length of the longest common suffix of two strings. + Calculate the length of the longest common suffix of two strings. - Arguments: - str1 ; The first string. - str2 ; The second string. + @arg str1: The first string + @type str1: string + @arg str2: The second string + @type str2: string - Returns: - integer ; The length of the longest common suffix of str1 and str2. + @return: The length of the longest common suffix of str1 and str2 + @rtype: integer """ t1 = str1[::-1] # Invert str1. @@ -487,20 +530,21 @@ def __lcs(str1, str2) : def findInFrameDescription(str1, str2) : """ - Give a description of an inframe difference of two proteins. Also give - the position at which the proteins start to differ and the positions at - which they are the same again. - - Arguments: - str1 ; The original protein. - str2 ; The mutated protein. - - Retuns: - vector: - string ; Protein description of the change. - integer ; Start position of the change. - integer ; End position of the change in the first protein. - integer ; End position of the change in the second protein. + Give a description of an inframe difference of two proteins. Also give + the position at which the proteins start to differ and the positions at + which they are the same again. + + @arg str1: The original protein + @type str1: string + @arg str2: The mutated protein + @type str2: string + + @return: vector: + - string ; Protein description of the change + - integer ; Start position of the change + - integer ; End position of the change in the first protein + - integer ; End position of the change in the second protein + @rtype: string """ # Nothing happened. @@ -562,22 +606,23 @@ def findInFrameDescription(str1, str2) : def findFrameShift(str1, str2) : """ - Give the description of an out of frame difference between two - proteins. Give a description of an inframe difference of two proteins. - Also give the position at which the proteins start to differ and the - end positions (to be compatible with the findInFrameDescription() - function). - - Arguments: - str1 ; The original protein. - str2 ; The mutated protein. - - Retuns: - vector: - string ; Protein description of the change. - integer ; Start position of the change. - integer ; End position of the first protein. - integer ; End position of the second protein. + Give the description of an out of frame difference between two + proteins. Give a description of an inframe difference of two proteins. + Also give the position at which the proteins start to differ and the + end positions (to be compatible with the findInFrameDescription() + function). + + @arg str1: The original protein + @type str1: string + @arg str2: The mutated protein + @type str2: string + + @return: vector: + - string ; Protein description of the change. + - integer ; Start position of the change. + - integer ; End position of the first protein. + - integer ; End position of the second protein. + @rtype: string """ lcp = __lcp(str1, str2) @@ -593,20 +638,22 @@ def findFrameShift(str1, str2) : def __toProtDescr(CDSStop, orig, trans) : """ - Wrapper function for the findInFrameDescription() and findFrameShift() - functions. It uses the value CDSStop to decide which one to call. - - Arguments: - CDSStop ; Position of the stop codon in c. notation (CDS length). - orig ; The original protein. - trans ; The mutated protein. - - Retuns: - vector: - string ; Protein description of the change. - integer ; Start position of the change. - integer ; End position of the change in the first protein. - integer ; End position of the change in the second protein. + Wrapper function for the findInFrameDescription() and findFrameShift() + functions. It uses the value CDSStop to decide which one to call. + + @arg CDSStop: Position of the stop codon in c. notation (CDS length) + @type CDSStop: integer + @arg orig: The original protein + @type orig: string + @arg trans: The mutated protein + @type trans: string + + @return: vector: + - string ; Protein description of the change. + - integer ; Start position of the change. + - integer ; End position of the change in the first protein. + - integer ; End position of the change in the second protein. + @rtype: tuple (string, integer, integer, integer) """ if CDSStop % 3 : @@ -620,16 +667,16 @@ def __toProtDescr(CDSStop, orig, trans) : def __trim2(str1, str2) : """ - Given two strings, trim the lcp and the lcs. + Given two strings, trim the lcp and the lcs. - Arguments: - str1 ; A string. - str2 ; An other string. + @arg str1: A string + @type str1: string + @arg str2: An other string + @type str2: string - Returns: - tuple: - string ; Trimmed version of str1. - string ; Trimmed version of str2. + @return: tuple: + - string: Trimmed version of str1. + - string: Trimmed version of str2. """ lcp = __lcp(str1, str2) @@ -640,17 +687,18 @@ def __trim2(str1, str2) : def __rangeToC(M, g1, g2) : # FIXME apparently obsolete. """ - Convert a genomic range to a CDS oriented range. - - Arguments: - M ; - g1 ; - g2 ; - - Returns: - tuple: - string ; - string ; + Convert a genomic range to a CDS oriented range. + + @arg M: + @type M: + @arg g1: + @type g1: + @arg g2: + @type g2: + + @return: tuple (string, string) + @rtype: tuple + @todo: FIXME apparently obsolete. """ if M.orientation == -1 : @@ -661,10 +709,14 @@ def __rangeToC(M, g1, g2) : def _createBatchOutput(O): #TODO More documentation. """ - Format the results to a batch output. + Format the results to a batch output. - Filter the mutalyzer output + Filter the mutalyzer output + + @arg O: + @type O: + @todo: More documentation. """ goi, toi = O.getOutput("geneSymbol")[-1] # Two strings [can be empty] tList = [] # Temporary List @@ -750,13 +802,18 @@ def checkSubstitution(start_g, Arg1, Arg2, MUU, GenRecordInstance, O) : Do a semantic check for substitutions, do the actual substitution and give it a name. - Arguments: - start_g ; Genomic location of the substitution. - Arg1 ; Nucleotide in the reference sequence. - Arg2 ; Nucleotide in the mutated sequence. - MUU ; A Mutator object. - GenRecordInstance ; A GenRecord object. - O ; The Output object. + @arg start_g: Genomic location of the substitution + @type start_g: integer + @arg Arg1: Nucleotide in the reference sequence. + @type Arg1: string + @arg Arg2: Nucleotide in the mutated sequence. + @type Arg2: string + @arg MUU: A Mutator object. + @type MUU: object + @arg GenRecordInstance: A GenRecord object. + @type GenRecordInstance: object + @arg O: The Output object. + @type O: object """ if not __checkDNA(Arg2) : # It must be DNA. @@ -775,16 +832,21 @@ def checkSubstitution(start_g, Arg1, Arg2, MUU, GenRecordInstance, O) : def checkDeletionDuplication(start_g, end_g, mutationType, MUU, GenRecordInstance, O) : """ - Do a semantic check for a deletion or duplication, do the actual - deletion/duplication and give it a name. - - Arguments: - start_g ; Genomic start position of the del/dup. - end_g ; Genomic end position of the del/dup. - mutationType ; The type (del or dup). - MUU ; A Mutator object. - GenRecordInstance ; A GenRecord object. - O ; The Output object. + Do a semantic check for a deletion or duplication, do the actual + deletion/duplication and give it a name. + + @arg start_g : Genomic start position of the del/dup + @type start_g: integer + @arg end_g: Genomic end position of the del/dup + @type end_g: integer + @arg mutationType: The type (del or dup) + @type mutationType: string + @arg MUU: A Mutator object + @type MUU: object + @arg GenRecordInstance: A GenRecord object + @type GenRecordInstance: object + @arg O: The Output object + @type O: object """ roll = __roll(MUU.orig, start_g, end_g) @@ -826,6 +888,7 @@ def checkDeletionDuplication(start_g, end_g, mutationType, MUU, def checkInversion(start_g, end_g, MUU, GenRecordInstance, O) : """ + @todo: documentation """ snoop = __palinsnoop(MUU.orig[start_g - 1:end_g]) @@ -866,6 +929,7 @@ def checkInversion(start_g, end_g, MUU, GenRecordInstance, O) : def checkInsertion(start_g, end_g, Arg1, MUU, GenRecordInstance, O) : """ + @todo: documentation """ if start_g + 1 != end_g : @@ -927,6 +991,7 @@ def checkInsertion(start_g, end_g, Arg1, MUU, GenRecordInstance, O) : def __ivs2g(location, transcript) : """ + @todo: documentation """ ivsNumber = int(location.IVSNumber) @@ -943,6 +1008,7 @@ def __ivs2g(location, transcript) : def __ex2g(location, transcript) : """ + @todo: documentation """ numberOfExons = transcript.CM.numberOfExons() @@ -965,6 +1031,7 @@ def __ex2g(location, transcript) : def __normal2g(RawVar, transcript) : """ + @todo: documentation """ if not RawVar.StartLoc.PtLoc.Main.isdigit() : @@ -1014,6 +1081,7 @@ def __normal2g(RawVar, transcript) : def __rv(MUU, RawVar, GenRecordInstance, parts, O, transcript) : """ + @todo: documentation """ # FIXME check this @@ -1166,6 +1234,9 @@ def __rv(MUU, RawVar, GenRecordInstance, parts, O, transcript) : #__rv def __ppp(MUU, parts, GenRecordInstance, O) : + """ + @todo: documentation + """ if parts.RawVar or parts.SingleAlleleVarSet : if parts.RefType == 'r' : O.addMessage(__file__, 4, "ERNA", "Descriptions on RNA level " \ @@ -1331,6 +1402,9 @@ def __ppp(MUU, parts, GenRecordInstance, O) : #__ppp def process(cmd, C, O) : + """ + @todo: documentation + """ parser = Parser.Nomenclatureparser(O) O.addOutput("inputvariant", cmd) ParseObj = parser.parse(cmd) @@ -1565,6 +1639,9 @@ def process(cmd, C, O) : #process def main(cmd) : + """ + @todo: documentation + """ C = Config.Config() O = Output.Output(__file__, C.Output) diff --git a/src/UCSC_update.py b/src/UCSC_update.py index 14ac44b283f4655ff5d6f516aeb458b7ff94ce35..82e1c07394b14a61ce178858141bb045b8297ec5 100644 --- a/src/UCSC_update.py +++ b/src/UCSC_update.py @@ -1,9 +1,17 @@ #!/usr/bin/python """ - Get updates on mapping information from the UCSC. +Get updates on mapping information from the UCSC. - This program is intended to be run daily from cron. +This program is intended to be run daily from cron. + +@requires: sys +@requires: os + +@requires: Modules.Config +@requires: Modules.Output +@requires: Modules.Remote +@requires: Modules.Update """ import sys # sys.argv diff --git a/src/VarInfo.py b/src/VarInfo.py index 39cd40f3ea0224af0e11be4128f4012e6766c6a2..2756496960057d907e95f0770e9b1e33ac6e45cf 100644 --- a/src/VarInfo.py +++ b/src/VarInfo.py @@ -1,20 +1,28 @@ #!/usr/bin/python """ - Search for an NM number in the MySQL database, if the version number - matches, get the start and end positions in a variant and translate these - positions to g. notation if the variant is in c. notation and vice versa. - +Search for an NM number in the MySQL database, if the version number +matches, get the start and end positions in a variant and translate these +positions to I{g.} notation if the variant is in I{c.} notation and vice versa. - If no end position is present, the start position is assumed to be the end position. - If the version number is not found in the database, an error message is generated and a suggestion for an other version is given. - If the reference sequence is not found at all, an error is returned. - If no variant is present, the transcription start and end and CDS end - in c. notation is returned. + in I{c.} notation is returned. - If the variant is not accepted by the nomenclature parser, a parse error will be printed. +@requires: sys +@requires: Modules.Db +@requires: Modules.Crossmap +@requires: Modules.Parser +@requires: Modules.Output +@requires: Modules.Config +@requires: Modules.Mapper + +@todo: documentation """ import sys # argv @@ -28,11 +36,13 @@ from Modules import Mapper def __sl2il(l) : """ - Convert a list of strings to a list of integers. - - Arguments: l ; A list of strings. + Convert a list of strings to a list of integers. + + @arg l: A list of strings + @type l: list - Returns: list ; A list of integers. + @return: A list of integers + @rtype: list """ for i in range(len(l)) : @@ -42,18 +52,21 @@ def __sl2il(l) : def __getcoords(C, Loc, Type) : """ - Return main, offset and g positions given either a position in - c. or in g. notation. - - Arguments: - C ; A crossmapper. - Loc ; Either a location in g. or c. notation. - Type ; The reference type. - Returns: - triple: - 0 ; Main coordinate in c. notation. - 1 ; Offset coordinate in c. notation. - 2 ; Position in g. notation. + Return main, offset and g positions given either a position in + I{c.} or in I{g.} notation. + + @arg C: A crossmapper + @type C: object + @arg Loc: Either a location in I{g.} or I{c.} notation + @type Loc: object + @arg Type: The reference type + @type Type: character + + @return: triple: + - 0 ; Main coordinate in I{c.} notation + - 1 ; Offset coordinate in I{c.} notation + - 2 ; Position in I{g.} notation + @rtype: triple (integer, integer, integer) """ if Type == 'c' : @@ -71,31 +84,37 @@ def __getcoords(C, Loc, Type) : def main(LOVD_ver, build, acc, var) : """ - The entry point (called by the HTML publisher). - - Arguments: - LOVD_ver ; The LOVD version (ignored for now). - build ; The human genome build. - acc ; The NM accession number and version. - var ; The variant, or empty. - - Returns: - start_main ; The main coordinate of the start position in c. - (non-star) notation. - start_offset ; The offset coordinate of the start position in c. - notation (intronic position). - end_main ; The main coordinate of the end position in c. - (non-star) notation. - end_offset ; The offset coordinate of the end position in c. - notation (intronic position). - start_g ; The g. notation of the start position. - end_g ; The g. notation of the end position. - type ; The mutation type. - - Returns (alternative): - trans_start ; Transcription start in c. notation. - trans_stop ; Transcription stop in c. notation. - CDS_stop ; CDS stop in c. notation. + The entry point (called by the HTML publisher). + + Returns: + - start_main ; The main coordinate of the start position in I{c.} + (non-star) notation. + - start_offset ; The offset coordinate of the start position in I{c.} + notation (intronic position). + - end_main ; The main coordinate of the end position in I{c.} + (non-star) notation. + - end_offset ; The offset coordinate of the end position in I{c.} + notation (intronic position). + - start_g ; The I{g.} notation of the start position. + - end_g ; The I{g.} notation of the end position. + - type ; The mutation type. + + Returns (alternative): + - trans_start ; Transcription start in I{c.} notation. + - trans_stop ; Transcription stop in I{c.} notation. + - CDS_stop ; CDS stop in I{c.} notation. + + @arg LOVD_ver: The LOVD version (ignored for now) + @type LOVD_ver: string + @arg build: The human genome build + @type build: string + @arg acc: The NM accession number and version + @type acc: string + @arg var: The variant, or empty + @type var: string + + @return: + @rtype: """ C = Config.Config() diff --git a/src/handler.py b/src/handler.py index 12a3a488ddaf2d477fb9be5102c3e28b07e717c0..026ebcaeb4edeaf9549722977f7e50e7049fcaeb 100644 --- a/src/handler.py +++ b/src/handler.py @@ -1,13 +1,24 @@ #!/usr/bin/python """ - General handler for mod_python. - - This handler calls the webservices handler, the HTML publisher or handles - a request itself, depending on keywords in the URI of the request. - - Public methods: - handler(req) ; The handler. +General handler for mod_python. + +This handler calls the webservices handler, the HTML publisher or handles +a request itself, depending on keywords in the URI of the request. + +Public methods: + - handler(req) ; The handler. + +@requires: os +@requires: bz2 +@requires: ZSI.dispatch +@requires: soaplib.client.make_service_client +@requires: mod_python.apache +@requires: mod_python.publisher +@requires: Modules.Web +@requires: Modules.Config +@requires: Modules.File +@requires: webservice """ import os @@ -24,28 +35,28 @@ import webservice def handler(req): """ - Handle a request passed to us by mod_python. - - Keywords in the URI of the request are used to decide what to do: - "services" ; Dispatch the webservices handler. - ".js" ; Return the raw content of the file (to include - JavaScript from an HTML file). - ".py" ; Return the content as a downloadable file after it has - been processed by TAL (to generate webservice client - files). - ".wsdl" ; Return the content of the file after it has been - processed by TAL (to generate a WSDL file that refers - to the correct server). - By default, the HTML publisher is used for normal HTML files and other - unhandled requests. - - Arguments: - req ; The request. - - Returns: - int ; An apache return code, either generated in this function - itself, or by the publisher handler which handles normal HTML - requests. + Handle a request passed to us by mod_python. + + Keywords in the URI of the request are used to decide what to do: + - "services" ; Dispatch the webservices handler. + - ".js" ; Return the raw content of the file (to include JavaScript + from an HTML file). + - ".py" ; Return the content as a downloadable file after it has + been processed by TAL (to generate webservice client + files). + - ".wsdl" ; Return the content of the file after it has been + processed by TAL (to generate a WSDL file that refers to + the correct server). + + By default, the HTML publisher is used for normal HTML files and other + unhandled requests. + + @arg req: The request + @type req: string + + @return: An apache return code, either generated in this function + itself, or by the publisher handler which handles normal HTML requests + @rtype: integer """ # Figure out where this program is located and go to the parent directory. diff --git a/src/index.py b/src/index.py index 061e94ccd90c8dbce08ab5a9b9d8f7bd0602118a..931e078d1ad5890ea401a60b104c822e056553a6 100644 --- a/src/index.py +++ b/src/index.py @@ -1,14 +1,34 @@ #!/usr/bin/python """ - The HTML publisher. - - These functions appear as HTML pages on the web server. - - Public methods: - index(req) ; The mutation checker page. - Variant_info(req) ; The g. to c. and vice versa interface for LOVD. - download(req) ; The download page. +The HTML publisher. + +These functions appear as HTML pages on the web server. + +Public methods: + - index(req) ; The mutation checker page. + - Variant_info(req) ; The I{g.} to I{c.} and vice versa interface for LOVD. + - download(req) ; The download page. + +@requires: Mutalyzer +@requires: VarInfo +@requires: pydoc +@requires: webservice +@requires: string + +@requires: mod_python import apache +@requires: mod_python import Session +@requires: mod_python import util + +@requires: Modules.Parser +@requires: Modules.Mapper +@requires: Modules.Web +@requires: Modules.Config +@requires: Modules.Output +@requires: Modules.Db +@requires: Modules.Scheduler +@requires: Modules.Retriever +@requires: Modules.File """ import Mutalyzer @@ -32,9 +52,20 @@ from Modules import Retriever from Modules import File class InputException(Exception): + """ + @todo: documentation + """ pass def snp(req) : + """ + @todo: documentation + + @arg req: the HTTP request + @type req: object + @return: compiled TAL template + @rtype: object + """ C = Config.Config() O = Output.Output(__file__, C.Output) W = Web.Web() @@ -61,39 +92,70 @@ def snp(req) : def index(req) : + """ + @todo: documentation + + @arg req: the HTTP request + @type req: object + @return: compiled TAL template + @rtype: object + """ W = Web.Web() return W.tal("HTML", "templates/index.html", {}) #index def help(req) : + """ + @todo: documentation + + @arg req: the HTTP request + @type req: object + @return: compiled TAL template + @rtype: object + """ W = Web.Web() return W.tal("HTML", "templates/help.html", {}) #about def about(req) : + """ + @todo: documentation + + @arg req: the HTTP request + @type req: object + @return: compiled TAL template + @rtype: object + """ W = Web.Web() return W.tal("HTML", "templates/about.html", {}) #about def nameGenerator(req): + """ + @todo: documentation + + @arg req: the HTTP request + @type req: object + @return: compiled TAL template + @rtype: object + """ W = Web.Web() return W.tal("HTML", "templates/generator.html", {}) #generator def check(req) : """ - The mutation checker page. + The mutation checker page. - If the incoming request has a form, run Mutalyzer. The output of - Mutalyzer is used together with a version and the last posted value - to make an HTML page from a TAL template. + If the incoming request has a form, run Mutalyzer. The output of + Mutalyzer is used together with a version and the last posted value + to make an HTML page from a TAL template. - Arguments: - req ; The request: - req.form['mutationName'] ; A description of a variant. + @arg req: The request: req.form['mutationName'] ; A description of a variant + @type req: object - Returns: - string ; An HTML page containing the results of Mutalyzer. + @return: a compiled TAL template containing the results of Mutalyzer + @rtype: object """ W = Web.Web() @@ -172,26 +234,25 @@ def check(req) : def getGS(req): """ - LOVD bypass to get the correct GeneSymbol incl Transcript variant. - - Used by LOVD to get the correct transcript variant out of a genomic - record. LOVD uses a genomic reference (NC_?) in combination with a gene - symbol to pass variant info to mutalyzer. Mutalyzer 1.0 was only using - the first transcript. LOVD supplies the NM of the transcript needed but - this was ignored. This helper allows LOVD to get the requested - transcript variant from a genomic reference. - - Arguments: - req ; The request: - req.form['mutationName'] ; the mutationname without - gene symbol - re.form['variantRecord'] ; the NM reference of the - variant - re.form['forward'] ; if set this forwards the - request to the name checker - Returns - string ; The GeneSymbol with the variant notation - web ; If forward is set the request is forwarded to check + LOVD bypass to get the correct GeneSymbol incl Transcript variant. + + Used by LOVD to get the correct transcript variant out of a genomic + record. LOVD uses a genomic reference (NC_?) in combination with a gene + symbol to pass variant info to mutalyzer. Mutalyzer 1.0 was only using + the first transcript. LOVD supplies the NM of the transcript needed but + this was ignored. This helper allows LOVD to get the requested + transcript variant from a genomic reference. + + @arg req: The request: + - req.form['mutationName'] ; the mutationname without gene symbol + - re.form['variantRecord'] ; the NM reference of the variant + - re.form['forward'] ; if set this forwards the request to the name + checker + @type req: + + @return: + - string ; The GeneSymbol with the variant notation + - web ; If forward is set the request is forwarded to check """ W = Web.Web() C = Config.Config() @@ -223,6 +284,12 @@ def getGS(req): def checkForward(req) : + """ + @arg req: + @type req: + + @todo: documentation + """ session = Session.Session(req) session['mut'] = req.form.get("mutationName", None) session.save() @@ -232,15 +299,15 @@ def checkForward(req) : def syntaxCheck(req) : """ - Checks the syntax of a variant + Checks the syntax of a variant - Arguments: - req ; The request: - req.form['variant'] ; A description of the variant. + @arg req: The request: + req.form['variant'] ; A description of the variant + @type req: object - Returns: - string ; An HTML page containing the remark if the variant syntax - is OK or not + @return: An HTML page containing the remark if the variant syntax is OK or + not + @rtype: string """ W = Web.Web() @@ -278,6 +345,12 @@ def syntaxCheck(req) : #checkingSyntax def positionConverter(req): + """ + @arg req: + @type req: + + @todo: documentation + """ W = Web.Web() C = Config.Config() O = Output.Output(__file__, C.Output) @@ -337,17 +410,17 @@ def positionConverter(req): def Variant_info(req) : """ - The g. to c. and vice versa interface for LOVD. + The I{g.} to I{c.} and vice versa interface for LOVD. - Arguments: - req ; The request: - req.form['LOVD_ver'] ; The version of the calling LOVD. - req.form['build'] ; The human genome build (hg19 assumed). - req.form['acc'] ; The accession number (NM number). - req.form['var'] ; A description of the variant. + @arg req: The request: + - req.form['LOVD_ver'] ; The version of the calling LOVD + - req.form['build'] ; The human genome build (hg19 assumed) + - req.form['acc'] ; The accession number (NM number) + - req.form['var'] ; A description of the variant + @type req: object - Returns: - string ; An HTML page containing the results of Variant_info. + @return: An HTML page containing the results of Variant_info + @rtype: string """ W = Web.Web() @@ -369,13 +442,13 @@ def Variant_info(req) : def webservices(req) : """ - The download page. + The download page. - Arguments: - req ; The request. + @arg req: The request + @type req: object - Returns: - string ; An HTML page. + @return: An HTML page + @rtype: object """ W = Web.Web() @@ -386,6 +459,14 @@ def webservices(req) : #download def __checkInt(inpv, refname): + """ + @arg inpv: + @type inpv: + @arg refname: + @type refname: + + @todo: documentation + """ #remove , . and - inpv = inpv.replace(',','').replace('.','').replace('-','') try: @@ -395,6 +476,13 @@ def __checkInt(inpv, refname): def upload(req) : """ + @arg req: + @type req: + + @return: + @rtype: + + @todo: documentation """ C = Config.Config() @@ -469,7 +557,15 @@ def upload(req) : def progress(req): """ - Progress page for batch runs + Progress page for batch runs + + @arg req: + @type req: + + @return: + @rtype: + + @todo: documentation """ W = Web.Web() C = Config.Config() @@ -499,7 +595,18 @@ def progress(req): def batch(req, batchType=None): """ - Batch function to add batch jobs to the Database + Batch function to add batch jobs to the Database + + @arg batchType: Type of the batch job + @type batchType: string + + + @arg req: the HTTP request + @type req: object + @return: compiled TAL template + @rtype: object + + @todo: documentation """ W = Web.Web() C = Config.Config() @@ -560,15 +667,41 @@ def batch(req, batchType=None): #batch def disclaimer(req) : + """ + @arg req: the HTTP request + @type req: object + @return: compiled TAL template + @rtype: object + + @todo: documentation + """ W = Web.Web() return W.tal("HTML", "templates/disclaimer.html", []) #disclaimer def batchNameChecker(req): + """ + @arg req: + @type req: + + @return: + @rtype: + + @todo: documentation + """ return batch(req, "NameChecker") #batchCheck def batchPositionConverter(req): + """ + @arg req: + @type req: + + @return: + @rtype: + + @todo: documentation + """ return batch(req, "PositionConverter") #batchConvert @@ -578,13 +711,13 @@ def batchSyntaxChecker(req): def documentation(req) : """ - Generate documentation for the webservice. + Generate documentation for the webservice. - Arguments: - req ; The request. + @arg req: The HTTP request + @type req: object - Returns: - string ; An HTML page. + @return: An HTML page + @rtype: string """ htmldoc = pydoc.HTMLDoc() @@ -594,6 +727,15 @@ def documentation(req) : #TODO: taltest.html does not exist def taltest(req) : + """ + @arg req: the HTTP request + @type req: object + @return: compiled TAL template + @rtype: object + + @todo: taltest.html does not exist + @todo: documentation + """ W = Web.Web() C = Config.Config() variant = "" diff --git a/src/webservice.py b/src/webservice.py index aa718f0ffbb1d1a88faf116f8c493675c2ecea74..44168c451251530456993fd92c6fc806c22ffb17 100644 --- a/src/webservice.py +++ b/src/webservice.py @@ -1,11 +1,29 @@ #!/usr/bin/python """ - Mutalyzer webservices. - - Public classes: - MutalyzerService ; Mutalyzer webservices. +Mutalyzer webservices. + +@requires: soaplib.wsgi_soap.SimpleWSGISoapApp +@requires: soaplib.service.soapmethod +@requires: soaplib.serializers.primitive.String +@requires: soaplib.serializers.primitive.Integer +@requires: soaplib.serializers.primitive.Array +@requires: ZSI.fault.Fault + +@requires: Modules.Web +@requires: Modules.Db +@requires: Modules.Output +@requires: Modules.Config +@requires: Modules.Parser +@requires: Modules.Mapper + +@requires: Modules.Serializers.SoapMessage +@requires: Modules.Serializers.Mapping +@requires: Modules.Serializers.Transcript """ +# Public classes: +# - MutalyzerService ; Mutalyzer webservices. + from soaplib.wsgi_soap import SimpleWSGISoapApp from soaplib.service import soapmethod @@ -25,48 +43,46 @@ from Modules.Serializers import SoapMessage, Mapping, Transcript, \ class MutalyzerService(SimpleWSGISoapApp) : """ - Mutalyzer webservices. - - These methods are made public via a SOAP interface. - - Private methods: - __checkBuild(L, D, build) ; Check if the build is supported. - __checkChrom(L, D, chrom) ; Check if the chromosome is in our - database. - __checkPos(L, pos) ; Check if the position is valid. - - Public methods: - getTranscripts(build, chrom, ; Get all transcripts that overlap - pos) with a chromosomal position. - getTranscriptsRange(build, ; Get all transcripts that overlap - chrom, with a range on a chromosome. - pos1, - pos2, - method) - getGeneName(build, accno) ; Find the gene name associated with a - transcript. - mappingInfo(LOVD_ver, build, ; Convert a transcript coordinate to a - accNo, variant) chromosomal one, or vice versa. - transcriptInfo(LOVD_ver, ; Find transcription start and end, - build, and CDS end (in c. notation) for a - accNo) given transcript. - cTogConversion(self, build, ; Convert c. to g. - variant) - gTocConversion(self, build, ; Convert g. to c. - variant) + Mutalyzer webservices. + + These methods are made public via a SOAP interface. + + Private methods: + - __checkBuild(L, D, build) ; Check if the build is supported. + - __checkChrom(L, D, chrom) ; Check if the chromosome is in our + database. + - __checkPos(L, pos) ; Check if the position is valid. + + Public methods: + - getTranscripts(build, chrom, pos); Get all transcripts that overlap + with a chromosomal position. + - getTranscriptsRange(build, chrom, pos1, pos2, method) ; Get all + transcripts that overlap with a range on a chromosome. + - getGeneName(build, accno) ; Find the gene name associated with a + transcript. + - mappingInfo(LOVD_ver, build, accNo, variant) ; Convert a transcript + coordinate to a chromosomal one, or vice versa. + - transcriptInfo(LOVD_ver, build, accNo) ; Find transcription start and + end, and CDS end (in I{c.} notation) for a given transcript. + - cTogConversion(self, build, variant) ; Convert I{c.} to I{g.} + - gTocConversion(self, build, variant) ; Convert I{g.} to I{c.} + """ - def __checkBuild(self, build, config) : + def __checkBuild(self, L, build, config) : """ - Check if the build is supported (hg18 or hg19). + Check if the build is supported (hg18 or hg19). + - Arguments: - L ; An output object for logging. - D ; A handle to the database. - build ; The build name that needs to be checked. + Returns: + - Nothing (but raises an EARG exception). - Returns: - Nothing (but raises an EARG exception). + @arg L: an output object for logging + @type L: object + @arg build: The human genome build name that needs to be checked + @type build: string + @arg config: configuration object of the Db module + @type config: object """ if not build in config.dbNames : @@ -79,15 +95,17 @@ class MutalyzerService(SimpleWSGISoapApp) : def __checkChrom(self, L, D, chrom) : """ - Check if the chromosome is in our database. + Check if the chromosome is in our database. - Arguments: - L ; An output object for logging. - D ; A handle to the database. - chrom ; The name of the chromosome. + Returns: + - Nothing (but raises an EARG exception). - Returns: - Nothing (but raises an EARG exception). + @arg L: An output object for logging + @type L: object + @arg D: A handle to the database. + @type D: object + @arg chrom: The name of the chromosome + @type chrom: string """ if not D.isChrom(chrom) : @@ -100,14 +118,15 @@ class MutalyzerService(SimpleWSGISoapApp) : def __checkPos(self, L, pos) : """ - Check if the position is valid. + Check if the position is valid. - Arguments: - L ; An output object for logging. - pos ; The position. + Returns: + - Nothing (but raises an ERANGE exception). - Returns: - Nothing (but raises an ERANGE exception). + @arg L: An output object for logging + @type L: object + @arg pos: The position + @type pos: integer """ if pos < 1 : @@ -119,14 +138,15 @@ class MutalyzerService(SimpleWSGISoapApp) : def __checkVariant(self, L, variant) : """ - Check if a variant is provided. + Check if a variant is provided. - Arguments: - L ; An output object for logging. - variant ; The variant. + Returns: + - Nothing (but raises an EARG exception). - Returns: - Nothing (but raises an EARG exception). + @arg L: An output object for logging + @type L: object + @arg variant: The variant + @type variant: string """ if not variant : @@ -139,21 +159,23 @@ class MutalyzerService(SimpleWSGISoapApp) : @soapmethod(String, String, Integer, _returns = Array(String)) def getTranscripts(self, build, chrom, pos) : """ - Get all the transcripts that overlap with a chromosomal position. - - Arguments: - string build ; The build name encoded as "hg18" or "hg19". - string chrom ; A chromosome encoded as "chr1", ..., "chrY". - int pos ; A postion on the chromosome. - - Returns: - string ; A list of transcripts. - - On error an exception is raised: - detail ; Human readable description of the error. - faultstring: ; A code to indicate the type of error. - EARG ; The argument was not valid. - ERANGE ; An invalid range was given. + Get all the transcripts that overlap with a chromosomal position. + + On error an exception is raised: + - detail ; Human readable description of the error. + - faultstring: ; A code to indicate the type of error. + - EARG ; The argument was not valid. + - ERANGE ; An invalid range was given. + + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg chrom: A chromosome encoded as "chr1", ..., "chrY" + @type chrom: string + @arg pos: A position on the chromosome + @type pos: integer + + @return: A list of transcripts + @rtype: list """ C = Config.Config() @@ -163,7 +185,7 @@ class MutalyzerService(SimpleWSGISoapApp) : "Received request getTranscripts(%s %s %s)" % (build, chrom, pos)) - self.__checkBuild(build, C.Db) + self.__checkBuild(L, build, C.Db) D = Db.Mapping(build, C.Db) self.__checkChrom(L, D, chrom) @@ -196,7 +218,7 @@ class MutalyzerService(SimpleWSGISoapApp) : "Received request getTranscriptsByGene(%s %s)" % (build, name)) - self.__checkBuild(build, C.Db) + self.__checkBuild(L, build, C.Db) D = Db.Mapping(build, C.Db) ret = D.get_TranscriptsByGeneName(name) @@ -208,23 +230,26 @@ class MutalyzerService(SimpleWSGISoapApp) : #getTranscriptsByGene @soapmethod(String, String, Integer, Integer, Integer, - _returns = Array(String)) + _returns = Array(String)) def getTranscriptsRange(self, build, chrom, pos1, pos2, method) : """ - Get all the transcripts that overlap with a range on a chromosome. - - Arguments: - string build ; The build name encoded as "hg18" or "hg19". - string chrom ; A chromosome encoded as "chr1", ..., "chrY". - int pos1 ; The first postion of the range. - int pos2 ; The last postion of the range. - int method ; The method of determining overlap: - 0 ; Return only the transcripts that completely - fall in the range [pos1, pos2]. - 1 ; Return all hit transcripts. - - Returns: - string ; A list of transcripts. + Get all the transcripts that overlap with a range on a chromosome. + + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg chrom: A chromosome encoded as "chr1", ..., "chrY" + @type chrom: string + @arg pos1: The first postion of the range + @type pos1: integer + @arg pos2: The last postion of the range + @type pos2: integer + @arg method: The method of determining overlap: + - 0 ; Return only the transcripts that completely fall in the range + [pos1, pos2]. + - 1 ; Return all hit transcripts + + @return: A list of transcripts + @rtype: list """ C = Config.Config() @@ -235,7 +260,7 @@ class MutalyzerService(SimpleWSGISoapApp) : chrom, pos1, pos2, method)) D = Db.Mapping(build, C.Db) - self.__checkBuild(build, C.Db) + self.__checkBuild(L, build, C.Db) ret = D.get_Transcripts(chrom, pos1, pos2, method) @@ -253,14 +278,15 @@ class MutalyzerService(SimpleWSGISoapApp) : @soapmethod(String, String, _returns = String) def getGeneName(self, build, accno) : """ - Find the gene name associated with a transcript. + Find the gene name associated with a transcript. - Arguments: - string build ; The build name encoded as "hg18" or "hg19". - string accno ; The identifier of a transcript. + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg accno: The identifier of a transcript + @type accno: string - Returns: - string ; The name of the associated gene. + @return: The name of the associated gene + @rtype: string """ C = Config.Config() @@ -270,7 +296,7 @@ class MutalyzerService(SimpleWSGISoapApp) : "Received request getGeneName(%s %s)" % (build, accno)) D = Db.Mapping(build, C.Db) - self.__checkBuild(build, C.Db) + self.__checkBuild(L, build, C.Db) ret = D.get_GeneName(accno.split('.')[0]) @@ -285,43 +311,42 @@ class MutalyzerService(SimpleWSGISoapApp) : @soapmethod(String, String, String, String, _returns = Mapping) def mappingInfo(self, LOVD_ver, build, accNo, variant) : """ - Search for an NM number in the MySQL database, if the version - number matches, get the start and end positions in a variant and - translate these positions to g. notation if the variant is in c. - notation and vice versa. - - - If no end position is present, the start position is assumed to - be the end position. - - If the version number is not found in the database, an error - message is generated and a suggestion for an other version is - given. - - If the reference sequence is not found at all, an error is - returned. - - If no variant is present, an error is returned. - - If the variant is not accepted by the nomenclature parser, a - parse error will be printed. - - - Arguments (all strings): - LOVD_ver ; The LOVD version. - build ; The human genome build (hg19 or hg18). - accNo ; The NM accession number and version. - variant ; The variant. - - Returns: - complex object: - start_main ; The main coordinate of the start position - in c. (non-star) notation. - start_offset ; The offset coordinate of the start position - in c. notation (intronic position). - end_main ; The main coordinate of the end position in - c. (non-star) notation. - end_offset ; The offset coordinate of the end position in - c. notation (intronic position). - start_g ; The g. notation of the start position. - end_g ; The g. notation of the end position. - type ; The mutation type. - + Search for an NM number in the MySQL database, if the version + number matches, get the start and end positions in a variant and + translate these positions to I{g.} notation if the variant is in I{c.} + notation and vice versa. + + - If no end position is present, the start position is assumed to be + the end position. + - If the version number is not found in the database, an error message + is generated and a suggestion for an other version is given. + - If the reference sequence is not found at all, an error is returned. + - If no variant is present, an error is returned. + - If the variant is not accepted by the nomenclature parser, a parse + error will be printed. + + @arg LOVD_ver: The LOVD version + @type LOVD_ver: string + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg accNo: The NM accession number and version + @type accNo: string + @arg variant: The variant + @type variant: string + + @return: complex object: + - start_main ; The main coordinate of the start position + in I{c.} (non-star) notation. + - start_offset ; The offset coordinate of the start position + in I{c.} notation (intronic position). + - end_main ; The main coordinate of the end position in + I{c.} (non-star) notation. + - end_offset ; The offset coordinate of the end position in + I{c.} notation (intronic position). + - start_g ; The I{g.} notation of the start position. + - end_g ; The I{g.} notation of the end position. + - type ; The mutation type. + @rtype: object """ C = Config.Config() @@ -345,21 +370,22 @@ class MutalyzerService(SimpleWSGISoapApp) : @soapmethod(String, String, String, _returns = Transcript) def transcriptInfo(self, LOVD_ver, build, accNo) : """ - Search for an NM number in the MySQL database, if the version - number matches, the transcription start and end and CDS end - in c. notation is returned. - - - Arguments (all strings: - LOVD_ver ; The LOVD version. - build ; The human genome build (hg19 or hg18). - accNo ; The NM accession number and version. - - Returns: - complex object: - trans_start ; Transcription start in c. notation. - trans_stop ; Transcription stop in c. notation. - CDS_stop ; CDS stop in c. notation. + Search for an NM number in the MySQL database, if the version + number matches, the transcription start and end and CDS end + in I{c.} notation is returned. + + @arg LOVD_ver: The LOVD version + @type LOVD_ver: string + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg accNo: The NM accession number and version + @type accNo: string + + @return: complex object: + - trans_start ; Transcription start in I{c.} notation. + - trans_stop ; Transcription stop in I{c.} notation. + - CDS_stop ; CDS stop in I{c.} notation. + @rtype: object """ C = Config.Config() @@ -381,14 +407,15 @@ class MutalyzerService(SimpleWSGISoapApp) : @soapmethod(String, String, _returns = String) def chromAccession(self, build, name) : """ - Get the accession number of a chromosome, given a name. + Get the accession number of a chromosome, given a name. - Arguments: - build ; The human genome build. - name ; The name of a chromosome. + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg name: The name of a chromosome (e.g. chr1) + @type name: string - Returns: - string ; The accession number of a chromosome. + @return: The accession number of a chromosome + @rtype: string """ C = Config.Config() # Read the configuration file. D = Db.Mapping(build, C.Db) @@ -397,7 +424,7 @@ class MutalyzerService(SimpleWSGISoapApp) : L.addMessage(__file__, -1, "INFO", "Received request chromAccession(%s %s)" % (build, name)) - self.__checkBuild(build, C.Db) + self.__checkBuild(L, build, C.Db) self.__checkChrom(L, D, name) result = D.chromAcc(name) @@ -413,14 +440,15 @@ class MutalyzerService(SimpleWSGISoapApp) : @soapmethod(String, String, _returns = String) def chromosomeName(self, build, accNo) : """ - Get the name of a chromosome, given a chromosome accession number. + Get the name of a chromosome, given a chromosome accession number. - Arguments: - build ; The human genome build. - accNo ; The accession number of a chromosome (NC_...). + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg accNo: The accession number of a chromosome (NC_...) + @type accNo: string - Returns: - string ; The name of a chromosome. + @return: The name of a chromosome + @rtype: string """ C = Config.Config() # Read the configuration file. D = Db.Mapping(build, C.Db) @@ -429,7 +457,7 @@ class MutalyzerService(SimpleWSGISoapApp) : L.addMessage(__file__, -1, "INFO", "Received request chromName(%s %s)" % (build, accNo)) - self.__checkBuild(build, C.Db) + self.__checkBuild(L, build, C.Db) # self.__checkChrom(L, D, name) result = D.chromName(accNo) @@ -445,14 +473,15 @@ class MutalyzerService(SimpleWSGISoapApp) : @soapmethod(String, String, _returns = String) def getchromName(self, build, acc) : """ - Get the chromosome name, given a transcript identifier (NM number). + Get the chromosome name, given a transcript identifier (NM number). - Arguments: - build ; The human genome build. - acc ; The NM accession number (version NOT included) + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg acc: The NM accession number (version NOT included) + @type acc: string - Returns: - string ; The name of a chromosome. + @return: The name of a chromosome + @rtype: string """ C = Config.Config() # Read the configuration file. D = Db.Mapping(build, C.Db) @@ -461,7 +490,7 @@ class MutalyzerService(SimpleWSGISoapApp) : L.addMessage(__file__, -1, "INFO", "Received request getchromName(%s %s)" % (build, acc)) - self.__checkBuild(build, C.Db) + self.__checkBuild(L, build, C.Db) # self.__checkChrom(L, D, name) result = D.get_chromName(acc) @@ -477,17 +506,17 @@ class MutalyzerService(SimpleWSGISoapApp) : @soapmethod(String, String, _returns = Array(String)) def numberConversion(self, build, variant) : """ - Converts c. to g. notation or vice versa - + Converts I{c.} to I{g.} notation or vice versa - Arguments (all strings: - build ; The human genome build (hg19 or hg18). - variant ; The variant in either c. or g. notation, full HGVS - notation, including NM_ or NC_ accession number. - Returns: - string; The variant in either g. or c. notation. + @arg build: The human genome build (hg19 or hg18) + @type build: string + @arg variant: The variant in either I{c.} or I{g.} notation, full HGVS + notation, including NM_ or NC_ accession number + @type variant: string + @return: The variant in either I{g.} or I{c.} notation + @rtype: string """ C = Config.Config() # Read the configuration file. @@ -513,8 +542,15 @@ class MutalyzerService(SimpleWSGISoapApp) : #numberConversion @soapmethod(String, _returns = String) - def checkSyntax(self, variant) : + def checkSyntax(self, variant): """ + Checks the syntax of a variant. + + @arg variant: the variant to check + @type variant: string + + @return: message + @rtype: string """ C = Config.Config() # Read the configuration file. L = Output.Output(__file__, C.Output)