Commit 61049722 authored by Vermaat's avatar Vermaat
Browse files

Merge refactor-mutalyzer-branch r301 through r329.

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/namechecker-pdf-branch@330 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parents 6f3696ff 7bc44ba1
...@@ -107,7 +107,8 @@ Alternatively, if you want to have a development environment, use: ...@@ -107,7 +107,8 @@ Alternatively, if you want to have a development environment, use:
sudo python setup.py develop sudo python setup.py develop
The development environment uses symlinks to this source directory, so you can The development environment uses symlinks to this source directory, so you can
develop directly from here. develop directly from here. This command should be re-issued whenever the
version number of Mutalyzer is updated.
Setup Mutalyzer Setup Mutalyzer
......
...@@ -73,7 +73,7 @@ Todo list: ...@@ -73,7 +73,7 @@ Todo list:
- Check for os.path.join vulnerabilities. - Check for os.path.join vulnerabilities.
- Use web.config.debug=False on production server and perhaps put this in - Use web.config.debug=False on production server and perhaps put this in
the configuration file. the configuration file.
- Add database indices to extras/post-install.sh script. - Solution for database schema migration on version updates.
Code style guide: Code style guide:
- Follow PEP 8 (code) and PEP 257 (docstrings). - Follow PEP 8 (code) and PEP 257 (docstrings).
......
...@@ -291,7 +291,8 @@ CREATE TABLE BatchQueue ( ...@@ -291,7 +291,8 @@ CREATE TABLE BatchQueue (
JobID char(20) NOT NULL, JobID char(20) NOT NULL,
Input char(255) NOT NULL, Input char(255) NOT NULL,
Flags char(20) DEFAULT NULL, Flags char(20) DEFAULT NULL,
PRIMARY KEY (QueueID) PRIMARY KEY (QueueID),
KEY JobQueue (JobID,QueueID)
); );
CREATE TABLE GBInfo ( CREATE TABLE GBInfo (
AccNo char(20) NOT NULL DEFAULT '', AccNo char(20) NOT NULL DEFAULT '',
......
...@@ -435,7 +435,7 @@ class Crossmap() : ...@@ -435,7 +435,7 @@ class Crossmap() :
return int(s) return int(s)
#main2int #main2int
def int2offset(self, t) : def int2offset(self, t, fuzzy=False):
""" """
Convert a tuple of integers to offset-notation. This adds a `+', Convert a tuple of integers to offset-notation. This adds a `+',
and `u' or `d' to the offset when appropriate. The main value is and `u' or `d' to the offset when appropriate. The main value is
...@@ -443,17 +443,22 @@ class Crossmap() : ...@@ -443,17 +443,22 @@ class Crossmap() :
@arg t: A tuple of integers: (main, offset) in __STOP notation @arg t: A tuple of integers: (main, offset) in __STOP notation
@type t: tuple @type t: tuple
@kwarg fuzzy: Denotes that the coordinate is fuzzy (i.e. offset is
unknown).
@type fuzzy: bool
@return: The offset in HGVS notation @return: The offset in HGVS notation
@rtype: string @rtype: string
""" """
if t[1] > 0 : # The exon boundary is downstream. if t[1] > 0 : # The exon boundary is downstream.
if fuzzy: return '+?'
if t[0] >= self.__trans_end : # It is downstream of the last exon. if t[0] >= self.__trans_end : # It is downstream of the last exon.
return "+d" + str(t[1]) return "+d" + str(t[1])
return '+' + str(t[1]) return '+' + str(t[1])
#if #if
if t[1] < 0 : # The exon boundary is uptream. if t[1] < 0 : # The exon boundary is uptream.
if fuzzy: return '-?'
if t[0] <= self.__trans_start : # It is upstream of the first exon. if t[0] <= self.__trans_start : # It is upstream of the first exon.
return "-u" + str(-t[1]) return "-u" + str(-t[1])
return str(t[1]) return str(t[1])
...@@ -490,32 +495,38 @@ class Crossmap() : ...@@ -490,32 +495,38 @@ class Crossmap() :
return int(s[1:]) return int(s[1:])
#offset2int #offset2int
def tuple2string(self, t) : def tuple2string(self, t, fuzzy=False) :
""" """
Convert a tuple (main, offset) in __STOP notation to I{c.} notation. Convert a tuple (main, offset) in __STOP notation to I{c.} notation.
@arg t: A tuple (main, offset) in __STOP notation @arg t: A tuple (main, offset) in __STOP notation
@type t: tuple @type t: tuple
@kwarg fuzzy: Denotes that the coordinate is fuzzy (i.e. offset is
unknown).
@type fuzzy: bool
@return: The position in HGVS notation @return: The position in HGVS notation
@rtype: string @rtype: string
""" """
return str(self.int2main(t[0])) + str(self.int2offset(t)) return str(self.int2main(t[0])) + str(self.int2offset(t, fuzzy))
#tuple2string #tuple2string
def g2c(self, a) : def g2c(self, a, fuzzy=False) :
""" """
Uses both g2x() and tuple2string() to translate a genomic position Uses both g2x() and tuple2string() to translate a genomic position
to __STOP notation to I{c.} notation. to __STOP notation to I{c.} notation.
@arg a: The genomic position that must be translated @arg a: The genomic position that must be translated
@type a: integer @type a: integer
@kwarg fuzzy: Denotes that the coordinate is fuzzy (i.e. offset is
unknown).
@type fuzzy: bool
@return: The position in HGVS notation @return: The position in HGVS notation
@rtype: string @rtype: string
""" """
return self.tuple2string(self.g2x(a)) return self.tuple2string(self.g2x(a), fuzzy)
#g2c #g2c
def info(self) : def info(self) :
......
...@@ -81,7 +81,7 @@ class Locus(object) : ...@@ -81,7 +81,7 @@ class Locus(object) :
- exon ; A position list object. - exon ; A position list object.
- txTable ; The translation table. - txTable ; The translation table.
- CM ; A Crossmap object. - CM ; A Crossmap object.
@arg name: identifier of the locus @arg name: identifier of the locus
@type name: string @type name: string
""" """
...@@ -110,21 +110,37 @@ class Locus(object) : ...@@ -110,21 +110,37 @@ class Locus(object) :
self.proteinProduct = None self.proteinProduct = None
#__init__ #__init__
def addToDescription(self, rawVariant) : def cancelDescription(self):
"""
Set the description on this locus to 'unknown'.
This can be used if at some point we give up creating a sensible
description on this locus. It also makes sure future additions to
the description are ignored and it keeps the 'unknown' value.
@note: This depends on the check for the unknown value in the
addToDescription method. This is a not a beatiful solution.
"""
self.description = '?'
#cancelDescription
def addToDescription(self, rawVariant):
""" """
Expands the DNA description with a new raw variant. Expands the DNA description with a new raw variant.
@arg rawVariant: description of a single mutation @arg rawVariant: description of a single mutation
@type rawVariant: string @type rawVariant: string
""" """
if self.description: if self.description:
self.description = "%s;%s" % (self.description, rawVariant) # Don't change anything if we already have an unknown value.
else : if self.description != '?':
self.description = "%s;%s" % (self.description, rawVariant)
else:
self.description = rawVariant self.description = rawVariant
#addToDescription #addToDescription
#Locus #Locus
class Gene(object) : class Gene(object) :
""" """
A Gene object, to store a list of Locus objects and the orientation of A Gene object, to store a list of Locus objects and the orientation of
...@@ -150,7 +166,7 @@ class Gene(object) : ...@@ -150,7 +166,7 @@ class Gene(object) :
- longName ; - longName ;
Private variables (altered): Private variables (altered):
- __locusTag ; - __locusTag ;
@arg name: gene name @arg name: gene name
@type name: string @type name: string
""" """
...@@ -166,7 +182,7 @@ class Gene(object) : ...@@ -166,7 +182,7 @@ class Gene(object) :
def newLocusTag(self) : def newLocusTag(self) :
""" """
Generates a new Locus tag. Generates a new Locus tag.
@return: Locus tag @return: Locus tag
@rtype: integer (3 digits, if < 100 preceeded with 0's) @rtype: integer (3 digits, if < 100 preceeded with 0's)
""" """
...@@ -179,10 +195,10 @@ class Gene(object) : ...@@ -179,10 +195,10 @@ class Gene(object) :
def findLocus(self, name) : def findLocus(self, name) :
""" """
Find a transcript, given its name. Find a transcript, given its name.
@arg name: transcript variant number @arg name: transcript variant number
@type name: string @type name: string
@return: transcript @return: transcript
@rtype: object @rtype: object
""" """
...@@ -196,7 +212,7 @@ class Gene(object) : ...@@ -196,7 +212,7 @@ class Gene(object) :
def listLoci(self) : def listLoci(self) :
""" """
Provides a list of transcript variant numbers Provides a list of transcript variant numbers
@return: list of transcript variant numbers @return: list of transcript variant numbers
@rtype: list @rtype: list
""" """
...@@ -210,10 +226,10 @@ class Gene(object) : ...@@ -210,10 +226,10 @@ class Gene(object) :
def findLink(self, protAcc) : def findLink(self, protAcc) :
""" """
Look in the list of transcripts for a given protein accession number. Look in the list of transcripts for a given protein accession number.
@arg protAcc: protein accession number @arg protAcc: protein accession number
@type protAcc: string @type protAcc: string
@return: transcript @return: transcript
@rtype: object @rtype: object
""" """
...@@ -280,10 +296,10 @@ class Record(object) : ...@@ -280,10 +296,10 @@ class Record(object) :
def findGene(self, name) : def findGene(self, name) :
""" """
Returns a Gene object, given its name. Returns a Gene object, given its name.
@arg name: Gene name @arg name: Gene name
@type name: string @type name: string
@return: Gene object @return: Gene object
@rtype: object @rtype: object
""" """
...@@ -297,10 +313,10 @@ class Record(object) : ...@@ -297,10 +313,10 @@ class Record(object) :
def listGenes(self) : def listGenes(self) :
""" """
List the names of all genes found in this record. List the names of all genes found in this record.
@return: Genes list @return: Genes list
@rtype: list @rtype: list
""" """
ret = [] ret = []
...@@ -312,7 +328,7 @@ class Record(object) : ...@@ -312,7 +328,7 @@ class Record(object) :
def addToDescription(self, rawVariant) : def addToDescription(self, rawVariant) :
""" """
Expands the DNA description with a new raw variant. Expands the DNA description with a new raw variant.
@arg rawVariant: description of a single mutation @arg rawVariant: description of a single mutation
@type rawVariant: string @type rawVariant: string
""" """
...@@ -326,11 +342,11 @@ class Record(object) : ...@@ -326,11 +342,11 @@ class Record(object) :
def toChromPos(self, i) : def toChromPos(self, i) :
""" """
Converts a g. position (relative to the start of the record) to a Converts a g. position (relative to the start of the record) to a
chromosomal g. position chromosomal g. position
@arg i: g. position (relative to the start of the record) @arg i: g. position (relative to the start of the record)
@type i: integer @type i: integer
@return: chromosomal g. position @return: chromosomal g. position
@rtype: integer @rtype: integer
""" """
...@@ -366,10 +382,10 @@ class GenRecord() : ...@@ -366,10 +382,10 @@ class GenRecord() :
def __init__(self, output, config) : def __init__(self, output, config) :
""" """
Initialise the class. Initialise the class.
Public variable: Public variable:
- record ; A record object - record ; A record object
@arg output: an output object @arg output: an output object
@type output: object @type output: object
@arg config: a config object @arg config: a config object
...@@ -384,13 +400,13 @@ class GenRecord() : ...@@ -384,13 +400,13 @@ class GenRecord() :
def __checkExonList(self, exonList, CDSpos) : def __checkExonList(self, exonList, CDSpos) :
""" """
@todo document me @todo document me
@arg exonList: list of splice sites @arg exonList: list of splice sites
@type exonList: list (object) @type exonList: list (object)
@arg CDSpos: location of the CDS @arg CDSpos: location of the CDS
@type CDSpos: object @type CDSpos: object
@return: @return:
@rtype: boolean @rtype: boolean
""" """
...@@ -414,12 +430,12 @@ class GenRecord() : ...@@ -414,12 +430,12 @@ class GenRecord() :
return True return True
return False return False
#__checkExonList #__checkExonList
def __constructCDS(self, mRNA, CDSpos) : def __constructCDS(self, mRNA, CDSpos) :
""" """
Construct a list of coordinates that contains CDS start and stop and Construct a list of coordinates that contains CDS start and stop and
the internal splice sites. the internal splice sites.
@arg mRNA: mRNA positions/coordinates list @arg mRNA: mRNA positions/coordinates list
@type mRNA: list (integer) @type mRNA: list (integer)
@arg CDSpos: coding DNA positions/coordinates @arg CDSpos: coding DNA positions/coordinates
...@@ -449,14 +465,14 @@ class GenRecord() : ...@@ -449,14 +465,14 @@ class GenRecord() :
""" """
Return the reverse-complement of a DNA sequence if the gene is in Return the reverse-complement of a DNA sequence if the gene is in
the reverse orientation. the reverse orientation.
@arg gene: Gene @arg gene: Gene
@type gene: object @type gene: object
@arg string: DNA sequence @arg string: DNA sequence
@type string: string @type string: string
@kwarg string_reverse: DNA sequence to use (if not None) for the @kwarg string_reverse: DNA sequence to use (if not None) for the
reverse complement. reverse complement.
@return: reverse-complement (if applicable), otherwise return the @return: reverse-complement (if applicable), otherwise return the
original. original.
@rtype: string @rtype: string
...@@ -472,7 +488,7 @@ class GenRecord() : ...@@ -472,7 +488,7 @@ class GenRecord() :
""" """
Check if the record in self.record is compatible with mutalyzer. Check if the record in self.record is compatible with mutalyzer.
Update the mRNA PList with the exon and CDS data. Update the mRNA PList with the exon and CDS data.
@todo: This function should really check the record for minimal @todo: This function should really check the record for minimal
requirements requirements
""" """
...@@ -602,7 +618,8 @@ class GenRecord() : ...@@ -602,7 +618,8 @@ class GenRecord() :
return None return None
#current_transcript #current_transcript
def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None): def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None,
start_fuzzy=False, stop_fuzzy=False):
""" """
Generate variant descriptions for all genes, transcripts, etc. Generate variant descriptions for all genes, transcripts, etc.
...@@ -620,6 +637,10 @@ class GenRecord() : ...@@ -620,6 +637,10 @@ class GenRecord() :
@type roll: tuple (integer, integer) @type roll: tuple (integer, integer)
@kwarg arg1_reverse: argument 1 to be used on reverse strand @kwarg arg1_reverse: argument 1 to be used on reverse strand
@type arg1_reverse: string @type arg1_reverse: string
@kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
@type start_fuzzy: bool
@kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
@type stop_fuzzy: bool
""" """
forwardStart = start_g forwardStart = start_g
forwardStop = stop_g forwardStop = stop_g
...@@ -634,23 +655,64 @@ class GenRecord() : ...@@ -634,23 +655,64 @@ class GenRecord() :
if varType != "subst" : if varType != "subst" :
if forwardStart != forwardStop : if forwardStart != forwardStop :
self.record.addToDescription("%s_%s%s%s" % (forwardStart, # Todo: Fuzzy offsets to genomic positions (see bug #38).
forwardStop, varType, arg1)) #
self.record.addToChromDescription("%s_%s%s%s" % ( # The genomic positioning is problematic. We would like to
self.record.toChromPos(forwardStart), # have it in brackets (as fuzzy positions), like the above
self.record.toChromPos(forwardStop), varType, arg1)) # g.(34299_23232)del example.
#
# Now consider a variant c.a-?_b+18del where only the offset
# before the exon is unknown but the offset after the exon is
# exact. Now a genomic description like g.(34299)_23232del
# comes to mind, however, this notation is not allowed by the
# HGVS grammar.
#
# I think all we can do is to treat both positions as fuzzy in
# the genomic description, even if only one of them really is.
#
# Peter thinks the HGVS grammar should at some point be
# updated to allow the brackets around individual locations.
if start_fuzzy or stop_fuzzy:
self.record.addToDescription("(%s_%s)%s%s" % (
forwardStart, forwardStop, varType, arg1))
self.record.addToChromDescription("(%s_%s)%s%s" % (
self.record.toChromPos(forwardStart),
self.record.toChromPos(forwardStop), varType, arg1))
else:
self.record.addToDescription("%s_%s%s%s" % (
forwardStart, forwardStop, varType, arg1))
self.record.addToChromDescription("%s_%s%s%s" % (
self.record.toChromPos(forwardStart),
self.record.toChromPos(forwardStop), varType, arg1))
#if #if
else : else :
self.record.addToDescription("%s%s%s" % (forwardStart, varType, if start_fuzzy or stop_fuzzy:
arg1)) # Todo: Current HGVS does not allow for () around single
self.record.addToChromDescription("%s%s%s" % ( # positions, only around ranges (see above and #38).
self.record.toChromPos(forwardStart), varType, arg1)) self.record.addToDescription("(%s)%s%s" % (
forwardStart, varType, arg1))
self.record.addToChromDescription("(%s)%s%s" % (
self.record.toChromPos(forwardStart), varType, arg1))
else:
self.record.addToDescription("%s%s%s" % (
forwardStart, varType, arg1))
self.record.addToChromDescription("%s%s%s" % (
self.record.toChromPos(forwardStart), varType, arg1))
#else #else
#if #if
else : else :
self.record.addToDescription("%s%c>%c" % (forwardStart, arg1, arg2)) if start_fuzzy or stop_fuzzy:
self.record.addToChromDescription("%s%c>%c" % ( # Todo: Current HGVS does not allow for () around single
self.record.toChromPos(forwardStart), arg1, arg2)) # positions, only around ranges (see above and #38).
self.record.addToDescription("(%s)%c>%c" % (
forwardStart, arg1, arg2))
self.record.addToChromDescription("(%s)%c>%c" % (
self.record.toChromPos(forwardStart), arg1, arg2))
else:
self.record.addToDescription("%s%c>%c" % (
forwardStart, arg1, arg2))
self.record.addToChromDescription("%s%c>%c" % (
self.record.toChromPos(forwardStart), arg1, arg2))
for i in self.record.geneList : for i in self.record.geneList :
for j in i.transcriptList : for j in i.transcriptList :
...@@ -683,24 +745,45 @@ class GenRecord() : ...@@ -683,24 +745,45 @@ class GenRecord() :
if varType != "subst" : if varType != "subst" :
if orientedStart != orientedStop : if orientedStart != orientedStop :
j.addToDescription("%s_%s%s%s" % ( if (start_fuzzy or stop_fuzzy) and not j.current:
j.CM.g2c(orientedStart), j.CM.g2c(orientedStop), # Don't generate descriptions on transcripts
varType, self.__maybeInvert(i, arg1, arg1_reverse))) # other than the current in the case of fuzzy
self.checkIntron(i, j, orientedStart) # positions.
self.checkIntron(i, j, orientedStop) j.cancelDescription()
else:
j.addToDescription("%s_%s%s%s" % (
j.CM.g2c(orientedStart, start_fuzzy),
j.CM.g2c(orientedStop, stop_fuzzy),
varType, self.__maybeInvert(i, arg1, arg1_reverse)))
self.checkIntron(i, j, orientedStart)
self.checkIntron(i, j, orientedStop)
#if #if
else : else :
j.addToDescription("%s%s%s" % ( if start_fuzzy and not j.current:
j.CM.g2c(orientedStart), varType, # Don't generate descriptions on transcripts
self.__maybeInvert(i, arg1, arg1_reverse))) # other than the current in the case of fuzzy
self.checkIntron(i, j, orientedStart) # positions.
j.cancelDescription()
else:
j.addToDescription("%s%s%s" % (
j.CM.g2c(orientedStart, start_fuzzy),
varType,
self.__maybeInvert(i, arg1, arg1_reverse)))
<