Commit 61049722 authored by Vermaat's avatar Vermaat
Browse files

Merge refactor-mutalyzer-branch r301 through r329.

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/namechecker-pdf-branch@330 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parents 6f3696ff 7bc44ba1
......@@ -107,7 +107,8 @@ Alternatively, if you want to have a development environment, use:
sudo python setup.py develop
The development environment uses symlinks to this source directory, so you can
develop directly from here.
develop directly from here. This command should be re-issued whenever the
version number of Mutalyzer is updated.
Setup Mutalyzer
......
......@@ -73,7 +73,7 @@ Todo list:
- Check for os.path.join vulnerabilities.
- Use web.config.debug=False on production server and perhaps put this in
the configuration file.
- Add database indices to extras/post-install.sh script.
- Solution for database schema migration on version updates.
Code style guide:
- Follow PEP 8 (code) and PEP 257 (docstrings).
......
......@@ -291,7 +291,8 @@ CREATE TABLE BatchQueue (
JobID char(20) NOT NULL,
Input char(255) NOT NULL,
Flags char(20) DEFAULT NULL,
PRIMARY KEY (QueueID)
PRIMARY KEY (QueueID),
KEY JobQueue (JobID,QueueID)
);
CREATE TABLE GBInfo (
AccNo char(20) NOT NULL DEFAULT '',
......
......@@ -435,7 +435,7 @@ class Crossmap() :
return int(s)
#main2int
def int2offset(self, t) :
def int2offset(self, t, fuzzy=False):
"""
Convert a tuple of integers to offset-notation. This adds a `+',
and `u' or `d' to the offset when appropriate. The main value is
......@@ -443,17 +443,22 @@ class Crossmap() :
@arg t: A tuple of integers: (main, offset) in __STOP notation
@type t: tuple
@kwarg fuzzy: Denotes that the coordinate is fuzzy (i.e. offset is
unknown).
@type fuzzy: bool
@return: The offset in HGVS notation
@rtype: string
"""
if t[1] > 0 : # The exon boundary is downstream.
if fuzzy: return '+?'
if t[0] >= self.__trans_end : # It is downstream of the last exon.
return "+d" + str(t[1])
return '+' + str(t[1])
#if
if t[1] < 0 : # The exon boundary is uptream.
if fuzzy: return '-?'
if t[0] <= self.__trans_start : # It is upstream of the first exon.
return "-u" + str(-t[1])
return str(t[1])
......@@ -490,32 +495,38 @@ class Crossmap() :
return int(s[1:])
#offset2int
def tuple2string(self, t) :
def tuple2string(self, t, fuzzy=False) :
"""
Convert a tuple (main, offset) in __STOP notation to I{c.} notation.
@arg t: A tuple (main, offset) in __STOP notation
@type t: tuple
@kwarg fuzzy: Denotes that the coordinate is fuzzy (i.e. offset is
unknown).
@type fuzzy: bool
@return: The position in HGVS notation
@rtype: string
"""
return str(self.int2main(t[0])) + str(self.int2offset(t))
return str(self.int2main(t[0])) + str(self.int2offset(t, fuzzy))
#tuple2string
def g2c(self, a) :
def g2c(self, a, fuzzy=False) :
"""
Uses both g2x() and tuple2string() to translate a genomic position
to __STOP notation to I{c.} notation.
@arg a: The genomic position that must be translated
@type a: integer
@kwarg fuzzy: Denotes that the coordinate is fuzzy (i.e. offset is
unknown).
@type fuzzy: bool
@return: The position in HGVS notation
@rtype: string
"""
return self.tuple2string(self.g2x(a))
return self.tuple2string(self.g2x(a), fuzzy)
#g2c
def info(self) :
......
......@@ -81,7 +81,7 @@ class Locus(object) :
- exon ; A position list object.
- txTable ; The translation table.
- CM ; A Crossmap object.
@arg name: identifier of the locus
@type name: string
"""
......@@ -110,21 +110,37 @@ class Locus(object) :
self.proteinProduct = None
#__init__
def addToDescription(self, rawVariant) :
def cancelDescription(self):
"""
Set the description on this locus to 'unknown'.
This can be used if at some point we give up creating a sensible
description on this locus. It also makes sure future additions to
the description are ignored and it keeps the 'unknown' value.
@note: This depends on the check for the unknown value in the
addToDescription method. This is a not a beatiful solution.
"""
self.description = '?'
#cancelDescription
def addToDescription(self, rawVariant):
"""
Expands the DNA description with a new raw variant.
@arg rawVariant: description of a single mutation
@type rawVariant: string
"""
if self.description:
self.description = "%s;%s" % (self.description, rawVariant)
else :
# Don't change anything if we already have an unknown value.
if self.description != '?':
self.description = "%s;%s" % (self.description, rawVariant)
else:
self.description = rawVariant
#addToDescription
#Locus
class Gene(object) :
"""
A Gene object, to store a list of Locus objects and the orientation of
......@@ -150,7 +166,7 @@ class Gene(object) :
- longName ;
Private variables (altered):
- __locusTag ;
@arg name: gene name
@type name: string
"""
......@@ -166,7 +182,7 @@ class Gene(object) :
def newLocusTag(self) :
"""
Generates a new Locus tag.
@return: Locus tag
@rtype: integer (3 digits, if < 100 preceeded with 0's)
"""
......@@ -179,10 +195,10 @@ class Gene(object) :
def findLocus(self, name) :
"""
Find a transcript, given its name.
@arg name: transcript variant number
@type name: string
@return: transcript
@rtype: object
"""
......@@ -196,7 +212,7 @@ class Gene(object) :
def listLoci(self) :
"""
Provides a list of transcript variant numbers
@return: list of transcript variant numbers
@rtype: list
"""
......@@ -210,10 +226,10 @@ class Gene(object) :
def findLink(self, protAcc) :
"""
Look in the list of transcripts for a given protein accession number.
@arg protAcc: protein accession number
@type protAcc: string
@return: transcript
@rtype: object
"""
......@@ -280,10 +296,10 @@ class Record(object) :
def findGene(self, name) :
"""
Returns a Gene object, given its name.
@arg name: Gene name
@type name: string
@return: Gene object
@rtype: object
"""
......@@ -297,10 +313,10 @@ class Record(object) :
def listGenes(self) :
"""
List the names of all genes found in this record.
@return: Genes list
@rtype: list
"""
ret = []
......@@ -312,7 +328,7 @@ class Record(object) :
def addToDescription(self, rawVariant) :
"""
Expands the DNA description with a new raw variant.
@arg rawVariant: description of a single mutation
@type rawVariant: string
"""
......@@ -326,11 +342,11 @@ class Record(object) :
def toChromPos(self, i) :
"""
Converts a g. position (relative to the start of the record) to a
chromosomal g. position
chromosomal g. position
@arg i: g. position (relative to the start of the record)
@type i: integer
@return: chromosomal g. position
@rtype: integer
"""
......@@ -366,10 +382,10 @@ class GenRecord() :
def __init__(self, output, config) :
"""
Initialise the class.
Public variable:
- record ; A record object
@arg output: an output object
@type output: object
@arg config: a config object
......@@ -384,13 +400,13 @@ class GenRecord() :
def __checkExonList(self, exonList, CDSpos) :
"""
@todo document me
@arg exonList: list of splice sites
@type exonList: list (object)
@arg CDSpos: location of the CDS
@type CDSpos: object
@return:
@return:
@rtype: boolean
"""
......@@ -414,12 +430,12 @@ class GenRecord() :
return True
return False
#__checkExonList
def __constructCDS(self, mRNA, CDSpos) :
"""
Construct a list of coordinates that contains CDS start and stop and
Construct a list of coordinates that contains CDS start and stop and
the internal splice sites.
@arg mRNA: mRNA positions/coordinates list
@type mRNA: list (integer)
@arg CDSpos: coding DNA positions/coordinates
......@@ -449,14 +465,14 @@ class GenRecord() :
"""
Return the reverse-complement of a DNA sequence if the gene is in
the reverse orientation.
@arg gene: Gene
@arg gene: Gene
@type gene: object
@arg string: DNA sequence
@type string: string
@kwarg string_reverse: DNA sequence to use (if not None) for the
reverse complement.
@return: reverse-complement (if applicable), otherwise return the
original.
@rtype: string
......@@ -472,7 +488,7 @@ class GenRecord() :
"""
Check if the record in self.record is compatible with mutalyzer.
Update the mRNA PList with the exon and CDS data.
@todo: This function should really check the record for minimal
requirements
"""
......@@ -602,7 +618,8 @@ class GenRecord() :
return None
#current_transcript
def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None):
def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None,
start_fuzzy=False, stop_fuzzy=False):
"""
Generate variant descriptions for all genes, transcripts, etc.
......@@ -620,6 +637,10 @@ class GenRecord() :
@type roll: tuple (integer, integer)
@kwarg arg1_reverse: argument 1 to be used on reverse strand
@type arg1_reverse: string
@kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
@type start_fuzzy: bool
@kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
@type stop_fuzzy: bool
"""
forwardStart = start_g
forwardStop = stop_g
......@@ -634,23 +655,64 @@ class GenRecord() :
if varType != "subst" :
if forwardStart != forwardStop :
self.record.addToDescription("%s_%s%s%s" % (forwardStart,
forwardStop, varType, arg1))
self.record.addToChromDescription("%s_%s%s%s" % (
self.record.toChromPos(forwardStart),
self.record.toChromPos(forwardStop), varType, arg1))
# Todo: Fuzzy offsets to genomic positions (see bug #38).
#
# The genomic positioning is problematic. We would like to
# have it in brackets (as fuzzy positions), like the above
# g.(34299_23232)del example.
#
# Now consider a variant c.a-?_b+18del where only the offset
# before the exon is unknown but the offset after the exon is
# exact. Now a genomic description like g.(34299)_23232del
# comes to mind, however, this notation is not allowed by the
# HGVS grammar.
#
# I think all we can do is to treat both positions as fuzzy in
# the genomic description, even if only one of them really is.
#
# Peter thinks the HGVS grammar should at some point be
# updated to allow the brackets around individual locations.
if start_fuzzy or stop_fuzzy:
self.record.addToDescription("(%s_%s)%s%s" % (
forwardStart, forwardStop, varType, arg1))
self.record.addToChromDescription("(%s_%s)%s%s" % (
self.record.toChromPos(forwardStart),
self.record.toChromPos(forwardStop), varType, arg1))
else:
self.record.addToDescription("%s_%s%s%s" % (
forwardStart, forwardStop, varType, arg1))
self.record.addToChromDescription("%s_%s%s%s" % (
self.record.toChromPos(forwardStart),
self.record.toChromPos(forwardStop), varType, arg1))
#if
else :
self.record.addToDescription("%s%s%s" % (forwardStart, varType,
arg1))
self.record.addToChromDescription("%s%s%s" % (
self.record.toChromPos(forwardStart), varType, arg1))
if start_fuzzy or stop_fuzzy:
# Todo: Current HGVS does not allow for () around single
# positions, only around ranges (see above and #38).
self.record.addToDescription("(%s)%s%s" % (
forwardStart, varType, arg1))
self.record.addToChromDescription("(%s)%s%s" % (
self.record.toChromPos(forwardStart), varType, arg1))
else:
self.record.addToDescription("%s%s%s" % (
forwardStart, varType, arg1))
self.record.addToChromDescription("%s%s%s" % (
self.record.toChromPos(forwardStart), varType, arg1))
#else
#if
else :
self.record.addToDescription("%s%c>%c" % (forwardStart, arg1, arg2))
self.record.addToChromDescription("%s%c>%c" % (
self.record.toChromPos(forwardStart), arg1, arg2))
if start_fuzzy or stop_fuzzy:
# Todo: Current HGVS does not allow for () around single
# positions, only around ranges (see above and #38).
self.record.addToDescription("(%s)%c>%c" % (
forwardStart, arg1, arg2))
self.record.addToChromDescription("(%s)%c>%c" % (
self.record.toChromPos(forwardStart), arg1, arg2))
else:
self.record.addToDescription("%s%c>%c" % (
forwardStart, arg1, arg2))
self.record.addToChromDescription("%s%c>%c" % (
self.record.toChromPos(forwardStart), arg1, arg2))
for i in self.record.geneList :
for j in i.transcriptList :
......@@ -683,24 +745,45 @@ class GenRecord() :
if varType != "subst" :
if orientedStart != orientedStop :
j.addToDescription("%s_%s%s%s" % (
j.CM.g2c(orientedStart), j.CM.g2c(orientedStop),
varType, self.__maybeInvert(i, arg1, arg1_reverse)))
self.checkIntron(i, j, orientedStart)
self.checkIntron(i, j, orientedStop)
if (start_fuzzy or stop_fuzzy) and not j.current:
# Don't generate descriptions on transcripts
# other than the current in the case of fuzzy
# positions.
j.cancelDescription()
else:
j.addToDescription("%s_%s%s%s" % (
j.CM.g2c(orientedStart, start_fuzzy),
j.CM.g2c(orientedStop, stop_fuzzy),
varType, self.__maybeInvert(i, arg1, arg1_reverse)))
self.checkIntron(i, j, orientedStart)
self.checkIntron(i, j, orientedStop)
#if
else :
j.addToDescription("%s%s%s" % (
j.CM.g2c(orientedStart), varType,
self.__maybeInvert(i, arg1, arg1_reverse)))
self.checkIntron(i, j, orientedStart)
if start_fuzzy and not j.current:
# Don't generate descriptions on transcripts
# other than the current in the case of fuzzy
# positions.
j.cancelDescription()
else:
j.addToDescription("%s%s%s" % (
j.CM.g2c(orientedStart, start_fuzzy),
varType,
self.__maybeInvert(i, arg1, arg1_reverse)))
self.checkIntron(i, j, orientedStart)
#else
#if
else :
j.addToDescription("%s%c>%c" % (j.CM.g2c(orientedStart),
self.__maybeInvert(i, arg1, arg1_reverse),
self.__maybeInvert(i, arg2)))
self.checkIntron(i, j, orientedStart)
if start_fuzzy and not j.current:
# Don't generate descriptions on transcripts
# other than the current in the case of fuzzy
# positions.
j.cancelDescription()
else:
j.addToDescription("%s%c>%c" % (
j.CM.g2c(orientedStart, start_fuzzy),
self.__maybeInvert(i, arg1, arg1_reverse),
self.__maybeInvert(i, arg2)))
self.checkIntron(i, j, orientedStart)
#else
#if
#for
......@@ -710,14 +793,14 @@ class GenRecord() :
def checkIntron(self, gene, transcript, position) :
"""
Checks if a position is on or near a splice site
@arg gene: Gene
@type gene: object
@arg transcript: transcript
@type transcript: object
@arg position: g. position
@type position: integer
@return:
@todo: Also check a range properly.
"""
......
......@@ -20,8 +20,8 @@ import os
RELEASE = False
__version_info__ = ('2', '0', 'beta-10', 'dev')
__date__ = '27 Jun 2011'
__version_info__ = ('2', '0', 'beta-11', 'dev')
__date__ = '21 Jul 2011'
__version__ = '.'.join(__version_info__)
......
......@@ -138,14 +138,37 @@ class Config():
# We don't remove these after the tests, since they might be
# useful for debugging.
if mutalyzer.is_test():
handle, filename = tempfile.mkstemp(suffix='.log',
prefix='mutalyzer-tests-')
os.close(handle)
self.Output.log = filename
dirname = tempfile.mkdtemp(suffix='.cache',
prefix='mutalyzer-tests-')
self.Retriever.cache = dirname
self.Scheduler.resultsDir = dirname
# Todo:
#
# This needs some refactoring. The problem with the temporary
# file and dir names is that they will not be used by the
# (running) batch daemon, which will thus save its results to
# to 'normal' directory.
# Furthermore, subsequent web requests from a unit test will
# use different configuration instantiations, so might not
# see results from previous requests.
#
# We need a more robust solution for different configurations,
# depending of the running user/setting (e.g. unit tests).
#
# Idea: Don't create a local instance of the website in the
# unit tests, but only use running instances of all servers
# (website, webservice, batch daemon). They will use their
# own 'normal' configuration.
# All other parts of the unit tests will use temporary test
# configuration values. We might even decorate the tests
# needing server access as such and provide the option of
# skipping these.
#handle, filename = tempfile.mkstemp(suffix='.log',
# prefix='mutalyzer-tests-')
#os.close(handle)
#self.Output.log = filename
#dirname = tempfile.mkdtemp(suffix='.cache',
# prefix='mutalyzer-tests-')
#self.Retriever.cache = dirname
#self.Scheduler.resultsDir = dirname
pass
except KeyError as e:
raise ConfigurationError('Missing configuration value: %s' % e)
......
......@@ -95,9 +95,10 @@
<b>Affected transcripts:</b><br>
<br>
<tt tal:repeat = "i descriptions">
<a tal:content = "i/0"
<a tal:condition = "i/1" tal:content = "i/0"
tal:attributes =
"href string:checkForward?mutationName=${i/1}"></a><br>
"href string:checkForward?mutationName=${i/1}"></a><tal
tal:condition = "not:i/1" tal:replace = "i/0"></tal><br>
</tt>
<br>
<br>
......
......@@ -468,7 +468,7 @@
<td colspan="2">
<a id="page_external_oldmut"
onclick="swapActive('external_oldmut');"
href="http://www.mutalyzer.nl/1.0.4_old/"
href="http://132.229.137.14/1.0.4_old/"
class="vertnavsub">Mutalyzer 1.0.4</a>
</td>
</tr>
......
......@@ -36,6 +36,7 @@ class _NotDNAError(_RawVariantError): pass
class _PositionsNotConsecutiveError(_RawVariantError): pass
class _LengthMismatchError(_RawVariantError): pass
class _ReferenceMismatchError(_RawVariantError): pass
class _RangeInsertionError(_RawVariantError): pass
class _OffsetSignError(_RawVariantError):
def __init__(self, main, offset, acceptor):
self.main = main
......@@ -136,9 +137,9 @@ def _check_argument(argument, reference, first, last, output):
@raise _LengthMismatchError: The argument is a length, but it does not
match the given range length.
@raise NotDNAError: The argument should be DNA, but it is not.
@raise ReferenceMismatchError: The argument is DNA, but it does not
match the given reference.
@raise _NotDNAError: The argument should be DNA, but it is not.
@raise _ReferenceMismatchError: The argument is DNA, but it does not
match the given reference.
"""
if not argument:
# The argument is optional, if it is not present, it is correct.
......@@ -301,7 +302,8 @@ def apply_substitution(position, original, substitute, mutator, record, O):
#apply_substitution
def apply_deletion_duplication(first, last, type, mutator, record, O):
def apply_deletion_duplication(first, last, type, mutator, record, O,
first_fuzzy=False, last_fuzzy=False):
"""
Do a semantic check for a deletion or duplication, do the actual
deletion/duplication and give it a name.
......@@ -318,6 +320,13 @@ def apply_deletion_duplication(first, last, type, mutator, record, O):
@type record: Modules.GenRecord.GenRecord
@arg O: The Output object.
@type O: Modules.Output.Output
@kwarg first_fuzzy: Denotes that the start position is fuzzy (e.g. in the
case of an unknown offset in c. notation).
@type first_fuzzy: bool
@kwarg last_fuzzy: Denotes that the end position is fuzzy (e.g. in the
case of an unknown offset in c. notation).
@type last_fuzzy: bool
"""
reverse_roll, forward_roll = util.roll(mutator.orig, first, last)
......@@ -327,7 +336,7 @@ def apply_deletion_duplication(first, last, type, mutator, record, O):
# We only have to consider the forward roll, since RNA reference
# sequences are always orientated in correspondence with the transcript.
original_forward_roll = forward_roll
if record.record.molType == 'n':
if record.record.molType != 'g':
# Todo: Do we assume .geneList[0].transcriptList[0] is the selected
# transcript here?? Why not use record.current_transcript?
splice_sites = record.record.geneList[0].transcriptList[0] \
......@@ -390,7 +399,9 @@ def apply_deletion_duplication(first, last, type, mutator, record, O):
else:
mutator.dupM(first, last)