Commit 220f9806 authored by Alisa Muraveva's avatar Alisa Muraveva
Browse files

Warnings about ignored sequences, geneNames in EMBL format

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/mobile-2013@731 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent d9ca56b0
......@@ -281,6 +281,7 @@ class Record(object) :
"""
self.geneList = []
self.locusDict = {}
self.molType = 'g'
self.seq = ""
self.mapping = []
......
......@@ -774,7 +774,7 @@ class GenBankRetriever(Retriever):
# Now we have the file, so we can parse it.
GenBankParser = genbank.GBparser()
record = GenBankParser.create_record(filename)
record = GenBankParser.create_record(filename, self._output)
record.id = name
# Todo: This will change once we support protein references
......
......@@ -562,6 +562,7 @@ class EMBLparser():
if not geneDict.has_key(geneName) :
myGene = Gene(geneName)
record.geneList.append(myGene)
record.locusDict[i.qualifiers["locus_tag"][0]] = geneName
if i.strand :
myGene.orientation = i.strand
myGene.location = self.__location2pos(i.location)
......
......@@ -243,7 +243,7 @@ class GBparser():
setattr(locus, key, "")
#__tagByDict
def __tagLocus(self, locusList):
def __tagLocus(self, locusList, output):
"""
Enrich a list of locus objects (mRNA or CDS) with information used
for linking (locus_tag, proteinLink and productTag). Also
......@@ -284,6 +284,7 @@ class GBparser():
i.usable = True
else :
i.usable = False
#for
if productList :
......@@ -374,7 +375,7 @@ class GBparser():
return 1 # Everything matches, but there is little information.
#__matchByRange
def link(self, rnaList, cdsList):
def link(self, rnaList, cdsList, output):
"""
Link mRNA loci to CDS loci (all belonging to one gene).
......@@ -402,8 +403,9 @@ class GBparser():
"""
# Enrich the lists with as much information we can find.
self.__tagLocus(rnaList)
self.__tagLocus(cdsList)
self.__tagLocus(rnaList, output)
self.__tagLocus(cdsList, output)
# Prune the tags based upon uniqueness.
self.__checkTags(rnaList, "locus_tag")
......@@ -466,7 +468,7 @@ class GBparser():
#for
#link
def create_record(self, filename):
def create_record(self, filename, output):
"""
Create a GenRecord.Record from a GenBank file
......@@ -565,7 +567,7 @@ class GBparser():
if record.molType in ['g', 'm'] :
for j in geneDict.keys() :
myGene = geneDict[j]
self.link(myGene.rnaList, myGene.cdsList)
self.link(myGene.rnaList, myGene.cdsList, output)
for i in myGene.rnaList :
if i.usable :
......@@ -616,6 +618,9 @@ class GBparser():
#if
myRealGene.transcriptList.append(myTranscript)
#if
else:
output.addMessage(__file__, 2, 'WPOSITION',
"The gene's %s coordinates extend beyound transcript" % i.gene)
#for
for i in myGene.cdsList :
if not i.linked and \
......
......@@ -1499,12 +1499,15 @@ def process_variant(mutator, description, record, output):
else:
# Not an LRG, find our gene manually.
genes = record.record.listGenes()
genes = record.record.listGenes()
transcript_id = transcript_id and "%.3i" % int(transcript_id)
locus = list(record.record.locusDict)
if gene_symbol in genes:
# We found our gene.
gene = record.record.findGene(gene_symbol)
elif record.record.locusDict.has_key(gene_symbol):
# We found our gene by locus_id
gene = record.record.findGene(record.record.locusDict[gene_symbol])
elif not gene_symbol:
if len(genes) == 1:
# No gene given and there is only one gene in the record.
......@@ -1512,11 +1515,12 @@ def process_variant(mutator, description, record, output):
gene = record.record.geneList[0]
else:
output.addMessage(__file__, 4, "EINVALIDGENE",
"No gene specified. Please choose from: %s" % ", ".join(genes))
"No gene specified. Please choose from: %s" % ", ".join(locus))
else:
output.addMessage(__file__, 4, "EINVALIDGENE",
"Gene %s not found. Please choose from: %s" % (
gene_symbol, ", ".join(genes)))
gene_symbol, ", ".join(locus)))
if gene:
# Find transcript.
......@@ -1669,8 +1673,8 @@ def check_variant(description, output):
elif parsed_description.EnsemblId:
filetype = 'EMBL'
if parsed_description.Gene:
gene_symbol = parsed_description.Gene.GeneSymbol or ''
transcript_id = parsed_description.Gene.TransVar or ''
gene_symbol = parsed_description.Gene[0] or ''
transcript_id = parsed_description.Gene[1] or ''
if parsed_description.Gene.ProtIso:
output.addMessage(__file__, 4, 'EPROT',
'Indexing by protein isoform is not supported.')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment