Skip to content
Snippets Groups Projects
Commit 38f90d91 authored by Vermaat's avatar Vermaat
Browse files

Do not crash on non-numeric locus tag end, fixes #81


git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@452 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
parent dba5b193
No related branches found
No related tags found
No related merge requests found
......@@ -4,6 +4,7 @@ mutalyzer GenRecord. Record populated with data from a GenBank file.
"""
import re
import bz2
from itertools import izip_longest
......@@ -15,6 +16,10 @@ from mutalyzer import Db
from mutalyzer.GenRecord import PList, Locus, Gene, Record, GenRecord
# Regular expression used to find version number in locus tag
LOCUS_TAG_VERSION = re.compile('\d{1,3}$')
class tempGene():
"""
Container class for a given gene name.
......@@ -564,9 +569,15 @@ class GBparser():
# Normally, locus_tag ends with three digits, but
# for some (e.g. mobA on NC_011228, a plasmid) it
# ends with two digits prepended with an
# underscore. We really want a number, so 'fix'
# this by substituting a zero.
myTranscript = Locus(i.locus_tag[-3:].replace('_', '0'))
# underscore. Or prepended with a letter. We
# really want a number, so 'fix' this by only
# looking for a numeric part.
try:
version = LOCUS_TAG_VERSION.findall(
i.locus_tag)[0].zfill(3)
except IndexError:
version = '000'
myTranscript = Locus(version)
else :
myTranscript = Locus(myRealGene.newLocusTag())
myTranscript.mRNA = PList()
......@@ -603,9 +614,15 @@ class GBparser():
# Normally, locus_tag ends with three digits, but
# for some (e.g. mobA on NC_011228, a plasmid) it
# ends with two digits prepended with an
# underscore. We really want a number, so 'fix'
# this by substituting a zero.
myTranscript = Locus(i.locus_tag[-3:].replace('_', '0'))
# underscore. Or prepended with a letter. We
# really want a number, so 'fix' this by only
# looking for a numeric part.
try:
version = LOCUS_TAG_VERSION.findall(
i.locus_tag)[0].zfill(3)
except IndexError:
version = '000'
myTranscript = Locus(version)
else :
myTranscript = Locus(myRealGene.newLocusTag())
myTranscript.CDS = PList()
......
......@@ -453,3 +453,10 @@ class TestVariantchecker():
assert_equal(error_count, 0)
assert_equal(self.output.getIndexedOutput('genomicDescription', 0),
'LRG_1:g.6855G>T')
def test_non_numeric_locus_tag_ending(self):
"""
Locus tag in NC_002128 does not end in an underscore and three digits
but we should not crash on it.
"""
check_variant('NC_002128(tagA):c.3del', self.output)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment