Commit 2e170030 authored by Laros's avatar Laros
Browse files

Renewed the web interface. Worked on protein descriptions. Added more error

handling for incomplete and poorly annotated GenBank files.

menu.html:
- Template for all pages. Includes a menu and such.

index.html:
- The main page.

base/:
- Pictures, javascript, stylesheet, etc.

interface.js:
- Temporary script for gbupload.html.

download.html:
- Adjusted to work with the menu.html template.

gbupload.html:
- Adjusted to work with the menu.html template.

check.html:
- Extended the template to use more variables to increase flexibility.

index.py:
- Removed the capturing of the Mutalyzer output in the check() function,
  replaced it with retrieval of data from the Output object. 
- Added a new index() function (see index.html).
- Implemented the upload() function (see gbupload.html), currently it can only
  retrieve genes and chromosome slices from the NCBI.

Web.py:
- Enabled the parsing of the menu.html template in the tal() function.

handler.py:
- Modified to work with the new tal() function of Web.py.

Output.py:
- getMessages() now returns a list of messages, instead of printing it to 
  standard output.

mail.txt:
- Template message for finished batch jobs.

Mutalyzer.py:
- __bprint() now puts the visualisation of a protein in the Output object, 
  instead of printing it to standard output.
- Made a new protein printing function that can highlight the changes, it is
  not used yet.
- Added a __cdsLen() function that is used to see if a variant leads to a
  frameshift or not.
- Added extention of a protein in the protein description function.
- Added some error handling for `?' as a position.
- Added error messages for invalid genes and transcript variants in the 
  input (suggesting possible options).
- DNA / protein descriptions are now stored in the Output object.  
- Added a legend.

GenRecord.py:
- Added functions listLoci() and listGenes() for suggestions when invalid 
  input is given.
- Added a preprocessing step in the GenBank file parsing to deal with 
  missing locus tags (not functional yet).
- Added some error handling for incomplete transcripts.



git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@52 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent 23af8a24
......@@ -37,3 +37,6 @@ EFILESIZE | N | The filesize is either too large or too small.
ERETR | R | Could not retrieve a GenBank record.
EARG | N | Error in the arguments (of a webservice).
ERANGE | D | Position out of range (webservice).
EINVALIDGENE | D | Invalid gene name given.
EINVALIDTRANSCRIPT | D | Invalid gene name given.
Dear submitter,
The batch operation you have submitted, has been processed successfully.
Your results can be found here:
%s
Thanks for using Mutalyzer.
With kind regards,
Mutalyzer batch checker.
......@@ -89,6 +89,7 @@ class Locus(object) :
self.molType = 'c'
self.description = ""
self.proteinDescription = "?"
self.proteinRange = []
self.locusTag = None
#__init__
......@@ -137,10 +138,20 @@ class Gene(object) :
"""
for i in self.transcriptList :
if i.name == name :
if i.name == name or i.name == str("%03i" % int(name)):
return i
return None
#findLocus
def listLoci(self) :
"""
"""
ret = []
for i in self.transcriptList :
ret.append(i.name)
return ret
#listLoci
#Gene
class Record(object) :
......@@ -178,7 +189,7 @@ class Record(object) :
"""
self.geneList = []
self.molType = ''
self.molType = 'g'
self.organelle = None
self.source = Gene(None)
self.description = ""
......@@ -194,6 +205,16 @@ class Record(object) :
return None
#findGene
def listGenes(self) :
"""
"""
ret = []
for i in self.geneList :
ret.append(i.name)
return ret
#listGenes
def addToDescription(self, rawVariant) :
"""
"""
......@@ -240,6 +261,11 @@ class GenRecord() :
ret = []
if not str(location.start).isdigit() or \
not str(location.end).isdigit() :
return None
#if
ret.append(location.start.position + 1)
ret.append(location.end.position)
......@@ -260,6 +286,10 @@ class GenRecord() :
ret = []
if not str(locationList.location.start).isdigit() or \
not str(locationList.location.end).isdigit() :
return None
#if
for i in locationList.sub_features :
if i.ref : # This is a workaround for a bug in BioPython.
ret = None
......@@ -314,6 +344,23 @@ class GenRecord() :
"""
self.record = Record()
mRNAProducts = []
CDSProducts = []
for i in record.features :
if i.qualifiers :
if i.qualifiers.has_key("gene") :
if i.type == "mRNA" :
if i.qualifiers.has_key("product") :
mRNAProducts.append(i.qualifiers["product"][0])
if i.type == "CDS" :
if i.qualifiers.has_key("product") :
CDSProducts.append(i.qualifiers["product"][0])
#if
print mRNAProducts
print CDSProducts
for i in record.features :
if i.qualifiers :
if i.type == "source" :
......@@ -366,6 +413,8 @@ class GenRecord() :
#if
if i.type == "mRNA" :
#if i.qualifiers.has_key("product") :
# print i.qualifiers["product"]
PListInstance = PList()
LocusInstance.mRNA = PListInstance
......@@ -381,6 +430,8 @@ class GenRecord() :
LocusInstance.locusTag = locusTag
#if
if i.type == "CDS" :
#if i.qualifiers.has_key("product") :
# print i.qualifiers["product"]
PListInstance = PList()
LocusInstance.CDS = PListInstance
......@@ -435,12 +486,13 @@ class GenRecord() :
self.__output.addMessage(__file__, 2, "WNOCDS",
"No CDS found for gene %s, transcript " \
"variant %s in GenBank record, " \
"constructing it from genelocation." % (
"constructing it from gene location." % (
i.name, j.name))
j.CDS = GenRecord.Locus()
j.CDS.location = j.location
j.mRNA = j.CDS
j.mRNA.positionList = i.location
j.CDS = None #PList()
#j.CDS.location = i.location
j.mRNA = PList()
j.mRNA.location = i.location
#j.mRNA.positionList = i.location
j.molType = 'n'
#else
#if
......@@ -455,7 +507,7 @@ class GenRecord() :
#if
if not j.mRNA.positionList :
j.mRNA.positionList = j.mRNA.location
if j.CDS :
if j.CDS and j.CDS.positionList != None :
if not j.CDS.positionList :
self.__output.addMessage(__file__, 2, "WNOCDS",
"No CDS list found for gene %s, transcript " \
......
......@@ -241,10 +241,14 @@ class Output() :
__config ; The variable outputlevel is used.
"""
ret = []
for i in self.__messages :
if i.level > self.__config.outputlevel :
print "%s(%s): %s" % (self.__levelToName(i.level), i.origin,
i.description)
#print "%s(%s): %s" % (self.__levelToName(i.level), i.origin,
# i.description)
ret.append("%s(%s): %s" % (self.__levelToName(i.level),
i.origin, i.description))
return ret
#getMessages
def addOutput(self, name, data) :
......
......@@ -60,6 +60,50 @@ class Web() :
return reply
#run
#def tal(self, scheme, filename, args) :
# """
# Compile a TAL template to HTML or XML.
# Arguments:
# scheme ; Either "HTML" or "XML", output will be in this
# format.
# filename ; The filename of the template.
# args ; A dictionary with variables (whose name correspond
# to the ones in the template) and their values.
# Returns:
# string ; An HTML or XML file.
# """
# from simpletal import simpleTALES # context(), addGlobal()
# from simpletal import simpleTAL # compileHTMLTemplate,
# # compileXMLTemplate,
# context = simpleTALES.Context()
#
# for i in args :
# context.addGlobal(i, args[i])
# #templateFile = open("templates/menu.html", 'r')
# #macros = simpleTAL.compileHTMLTemplate(templateFile)
# #templateFile.close()
# #context.addGlobal("sitemacros", macros)
#
# templateFile = open(filename, 'r')
# if scheme == "HTML" :
# template = simpleTAL.compileHTMLTemplate(templateFile)
# else :
# template = simpleTAL.compileXMLTemplate(templateFile)
# templateFile.close()
#
# string = StringIO()
# template.expand(context, string)
#
# return string.getvalue()
##tal
def tal(self, scheme, filename, args) :
"""
Compile a TAL template to HTML or XML.
......@@ -84,19 +128,21 @@ class Web() :
for i in args :
context.addGlobal(i, args[i])
#templateFile = open("templates/menu.html", 'r')
#macros = simpleTAL.compileHTMLTemplate(templateFile)
#templateFile.close()
#context.addGlobal("sitemacros", macros)
templateFile = open(filename, 'r')
if scheme == "HTML" :
templateFile = open(filename, 'r')
macros = simpleTAL.compileHTMLTemplate(templateFile)
templateFile.close()
context.addGlobal("sitemacros", macros)
templateFile = open("templates/menu.html", 'r')
template = simpleTAL.compileHTMLTemplate(templateFile)
templateFile.close()
#if
else :
template = simpleTAL.compileXMLTemplate(templateFile)
templateFile.close()
templateFile = open(filename, 'r')
template = simpleTAL.compileHTMLTemplate(templateFile)
templateFile.close()
#else
string = StringIO()
template.expand(context, string)
......@@ -104,45 +150,6 @@ class Web() :
return string.getvalue()
#tal
#def tal2(self, filename, args) :
# """
# Compile a TAL template to HTML or XML.
# Arguments:
# scheme ; Either "HTML" or "XML", output will be in this
# format.
# filename ; The filename of the template.
# args ; A dictionary with variables (whose name correspond
# to the ones in the template) and their values.
# Returns:
# string ; An HTML or XML file.
# """
# from simpletal import simpleTALES # context(), addGlobal()
# from simpletal import simpleTAL # compileHTMLTemplate,
# # compileXMLTemplate,
# context = simpleTALES.Context()
#
# for i in args :
# context.addGlobal(i, args[i])
# templateFile = open("../templates/download.html", 'r')
# macros = simpleTAL.compileHTMLTemplate(templateFile)
# templateFile.close()
# context.addGlobal("sitemacros", macros)
#
# templateFile = open("../templates/menu.html", 'r')
# template = simpleTAL.compileHTMLTemplate(templateFile)
# templateFile.close()
#
# string = StringIO()
# template.expand(context, string)
#
# return string.getvalue()
##tal2
def read(self, path, req) :
"""
Read a file and return its content.
......
......@@ -23,6 +23,8 @@ from Modules import Mutator
from Modules import Output
from Modules import Config
from operator import itemgetter, attrgetter
#def __order(a, b) :
# """
# """
......@@ -68,7 +70,7 @@ def __palinsnoop(string) :
return -1 # Perfect palindrome.
#__palinsnoop
def __bprint(s) :
def __bprint(s, O, where) :
"""
"""
......@@ -80,16 +82,56 @@ def __bprint(s) :
m = int(math.floor(math.log(len(s), 10)) + 1)
o = 1
print "%s " % str(o).rjust(m),
output = "%s " % str(o).rjust(m)
for i in range(0, len(s), block) :
print s[i:i + block],
output += ' ' + s[i:i + block]
if not (i + block) % line and i + block < len(s) :
o += line
print "\n%s " % str(o).rjust(m),
O.addOutput(where, output)
output = "%s " % str(o).rjust(m)
#if
#for
O.addOutput(where, output)
#__bprint
def __bprint2(s, pos1, pos2) :
"""
"""
if not s :
return
block = 10
line = 6 * block
tag1 = "<tt style=\"color:#FF0080\">"
tag2 = "</tt>"
m = int(math.floor(math.log(len(s), 10)) + 1)
o = 1
newString = s[:pos1] + tag1 + s[pos1:pos2] + tag2 + s[pos2:]
print "%s " % str(o).rjust(m),
i = 0
seen = 0
while i < len(s) :
skip = 0
if i <= pos1 < i + block :
skip += len(tag1)
if i <= pos2 < i + block :
skip += len(tag2)
print newString[i:i + block + skip],
seen += block
if not (seen) % line and seen < len(s) :
o += line
print "\n%s " % str(o).rjust(m),
#if
i += block + skip
#while
#__bprint2
def __PtLoc2main(Loc) :
"""
"""
......@@ -143,7 +185,6 @@ def __nsplice(string, splice_sites, CDS, orientation) :
"""
transcript = ""
if orientation == 1 :
for i in range(0, len(splice_sites), 2) :
if CDS[0] >= splice_sites[i] and CDS[0] <= splice_sites[i + 1] :
......@@ -168,6 +209,17 @@ def __nsplice(string, splice_sites, CDS, orientation) :
return transcript
#__nsplice
def __cdsLen(splice_sites) :
"""
"""
l = 0
for i in range(0, len(splice_sites), 2) :
l += splice_sites[i + 1] - splice_sites[i] + 1
return l
#__cdsLen
def __checkOptArg(ref, p1, p2, arg, O) :
"""
"""
......@@ -250,8 +302,11 @@ def findInFrameDescription(str1, str2) :
str1_end = len(str1) - lcs
str2_end = len(str2) - lcs
# Insertion / Duplication.
# Insertion / Duplication / Extention.
if not str1_end - lcp :
if len(str1) == lcp :
return "p.(*%i%sext*%i)" % (len(str1) + 1, seq3(str2[len(str1)]),
abs(len(str1) - len(str2)))
inLen = str2_end - lcp
if lcp - inLen >= 0 and str1[lcp - inLen:lcp] == str2[lcp:str2_end] :
......@@ -267,8 +322,11 @@ def findInFrameDescription(str1, str2) :
seq3(str2[lcp:str2_end]))
#if
# Deletion.
# Deletion / Inframe stop.
if not str2_end - lcp :
if len(str2) == lcp :
return "p.(%s%i*)" % (seq3(str1[len(str2)]), len(str2) + 1)
if lcp + 1 == str1_end :
return "p.(%s%idel)" % (seq3(str1[lcp]), lcp + 1)
return "p.(%s%i_%s%idel)" % (seq3(str1[lcp - 1]), lcp + 1,
......@@ -277,13 +335,7 @@ def findInFrameDescription(str1, str2) :
# Substitution.
if str1_end == str2_end and str1_end == lcp + 1 :
if len(str1) > len(str2) :
return "p.(*%i%sext*%i)" % (len(str1) + 1, seq3(str2[len(str1)]),
abs(len(str1) - len(str2)))
if len(str1) > len(str2) :
return "p.(%s%i*)" % (seq3(str1[len(str2)]), len(str2) + 1)
return "p.(%s%i%s)" % (seq3(str1[lcp]), lcp + 1, seq3(str2[lcp]))
#if
# InDel.
if lcp + 1 == str1_end :
......@@ -442,14 +494,18 @@ def __rv(MUU, record, RawVar, GenRecordInstance, parts, O, transcript) :
"""
"""
# FIXME check this
# First assume that the variant is given in g. notation.
start_g = int(RawVar.StartLoc.PtLoc.MainSgn + RawVar.StartLoc.PtLoc.Main)
start_offset = __PtLoc2offset(RawVar.StartLoc.PtLoc)
#print RawVar.StartLoc.PtLoc.MainSgn + RawVar.StartLoc.PtLoc.Main
#print __PtLoc2offset(RawVar.StartLoc.PtLoc)
if not RawVar.StartLoc.PtLoc.Main.isdigit() : # For ? in a position.
return
start_g = int(RawVar.StartLoc.PtLoc.Main)
end_g = start_g
end_offset = start_offset
if RawVar.EndLoc :
if not RawVar.EndLoc.PtLoc.Main.isdigit() : # For ? in a position.
return
end_g = int(RawVar.EndLoc.PtLoc.MainSgn + RawVar.EndLoc.PtLoc.Main)
end_offset = __PtLoc2offset(RawVar.EndLoc.PtLoc)
#if
Arg1 = RawVar.Arg1
Arg2 = RawVar.Arg2
......@@ -457,8 +513,21 @@ def __rv(MUU, record, RawVar, GenRecordInstance, parts, O, transcript) :
# If it is not, convert it to g. notation.
if transcript :
start_g = transcript.CM.x2g(start_g, start_offset)
end_g = transcript.CM.x2g(end_g, end_offset)
start_main = transcript.CM.main2int(RawVar.StartLoc.PtLoc.MainSgn + \
RawVar.StartLoc.PtLoc.Main)
#if not RawVar.StartLoc.PtLoc.Offset.isdigit() :
# return
start_offset = __PtLoc2offset(RawVar.StartLoc.PtLoc)
start_g = transcript.CM.x2g(start_main, start_offset)
end_g = start_g
if RawVar.EndLoc :
end_main = transcript.CM.main2int(RawVar.EndLoc.PtLoc.MainSgn + \
RawVar.EndLoc.PtLoc.Main)
#if not RawVar.EndLoc.PtLoc.Offset.isdigit() :
# return
end_offset = __PtLoc2offset(RawVar.EndLoc.PtLoc)
end_g = transcript.CM.x2g(end_main, end_offset)
#if
if transcript.CM.orientation == -1 :
Arg1 = Bio.Seq.reverse_complement(RawVar.Arg1)
Arg2 = Bio.Seq.reverse_complement(RawVar.Arg2)
......@@ -544,10 +613,21 @@ def __ppp(MUU, record, parts, GenRecordInstance, O) :
if parts.Gene.GeneSymbol :
GS = GenRecordInstance.record.findGene(
parts.Gene.GeneSymbol)
if not GS :
O.addMessage(__file__, 3, "EINVALIDGENE",
"Gene %s not found. Please choose from: %s" % (
parts.Gene.GeneSymbol,
GenRecordInstance.record.listGenes()))
return
else :
GS = GenRecordInstance.record.geneList[0]
if parts.Gene.TransVar :
W = GS.findLocus(parts.Gene.TransVar)
if not W :
O.addMessage(__file__, 3, "ENOTRANSCRIPT",
"Transcript %s not found for gene %s. Please " \
"choose from: %s" %(parts.Gene.TransVar, GS.name,
GS.listLoci()))
else :
W = GS.transcriptList[0]
#if
......@@ -556,6 +636,8 @@ def __ppp(MUU, record, parts, GenRecordInstance, O) :
#if
else :
W = None
if W and not W.location :
W = None
if parts.SingleAlleleVarSet :
for i in parts.SingleAlleleVarSet :
......@@ -589,6 +671,7 @@ def __ppp(MUU, record, parts, GenRecordInstance, O) :
#if
orig = cds.translate(table = W.txTable, to_stop = True)
O.addOutput("oldprotein", orig + '*')
__bprint(orig + '*', O, "oldProteinFancy")
trans = cdsm.translate(table = W.txTable, to_stop = True)
if not trans or trans[0] != 'M' :
......@@ -596,12 +679,19 @@ def __ppp(MUU, record, parts, GenRecordInstance, O) :
Bio.Data.CodonTable.unambiguous_dna_by_id[
W.txTable].start_codons :
O.addOutput("newprotein", '?')
__bprint('?', O, "newProteinFancy")
O.addOutput("altstart", str(cdsm[0:3]))
O.addOutput("altprotein", 'M' + trans[1:] + '*')
__bprint('M' + trans[1:] + '*', O, "altProteinFancy")
#if
else :
O.addOutput("newprotein", '?')
__bprint('?', O, "newProteinFancy")
#else
else :
O.addOutput("newprotein", trans + '*')
__bprint(trans + '*', O, "newProteinFancy")
#else
#if not parts.SingleAlleleVarSet :
# #O.addOutput("proteindescription", "p.?")
......@@ -633,7 +723,6 @@ def process(cmd, C, O) :
if ParseObj.LrgAcc:
filetype = "LRG"
RetrieveRecord = ParseObj.LrgAcc
print RetrieveRecord
else:
filetype = "GB"
record = retriever.loadrecord(RetrieveRecord, filetype)
......@@ -659,44 +748,74 @@ def process(cmd, C, O) :
# PROTEIN
for i in GenRecordInstance.record.geneList :
for j in i.transcriptList :
if not ';' in j.description and j.CDS :
print j.CDS.positionList
print j.mRNA.positionList
cds = Seq(str(__splice(MUU.orig, j.CDS.positionList)),
IUPAC.unambiguous_dna)
cdsm = Seq(str(__nsplice(MUU.mutated,
MUU.newSplice(j.mRNA.positionList),
MUU.newSplice(j.CDS.location),
j.CM.orientation)),
IUPAC.unambiguous_dna)
cdsStop = 1
if j.CM.orientation == -1 :
cds = Bio.Seq.reverse_complement(cds)
cdsm = Bio.Seq.reverse_complement(cdsm)
cdsStop = 0
#if
#if '*' in cds.translate()[:-1] :
# O.addMessage(__file__, 3, "ESTOP",
# "In frame stop codon found.")
# return
##if
orig = cds.translate(table = j.txTable, cds = True,
to_stop = True)
#O.addOutput("oldprotein", orig + '*')
trans = cdsm.translate(table = j.txTable, to_stop = True)
#print i.name, j.name
#print j.CDS.location
j.proteinDescription = __toProtDescr(
j.CM.g2x(MUU.newSplice(j.CDS.location)[cdsStop])[0],
orig, trans)
#print j.proteinDescription
if i.location :
for j in i.transcriptList :
if not ';' in j.description and j.CDS :
#print i.name, j.name, j.CDS.positionList, j.CDS.location
cds = Seq(str(__splice(MUU.orig, j.CDS.positionList)),
IUPAC.unambiguous_dna)
cdsm = Seq(str(__nsplice(MUU.mutated,
MUU.newSplice(j.mRNA.positionList),
MUU.newSplice(j.CDS.location),
j.CM.orientation)),
IUPAC.unambiguous_dna)
if j.CM.orientation == -1 :
cds = Bio.Seq.reverse_complement(cds)
cdsm = Bio.Seq.reverse_complement(cdsm)
#if
#if '*' in cds.translate()[:-1] :
# O.addMessage(__file__, 3, "ESTOP",