diff --git a/mutalyzer/Db.py b/mutalyzer/Db.py index e56de692c36dde9f7a88f0211e41a7e5b5222555..23cf48fb847634ce373c44b24aa6edd0430b3dd7 100644 --- a/mutalyzer/Db.py +++ b/mutalyzer/Db.py @@ -129,49 +129,6 @@ class Db(): #query #Db -class UCSC(Db) : - """ - Database functions for querying the UCSC database. - """ - def __init__(self, build) : - """ - Initialise the Db parent class. Use the local database for a certain - build. - - @arg build: The version of the mapping database - @type build: string - """ - Db.__init__(self, build, 'genome', 'genome-mysql.cse.ucsc.edu') - #__init__ - - def transcripts_by_gene(self, gene): - """ - Get transcript mappings for given gene name. - """ - statement = """ - SELECT DISTINCT - acc, version, txStart, txEnd, cdsStart, cdsEnd, exonStarts, - exonEnds, name2 AS geneName, chrom, strand, protAcc - FROM gbStatus, refGene, refLink - WHERE type = "mRNA" - AND refGene.name = acc - AND acc = mrnaAcc - AND name2 = %s - """, gene - - transcripts = [] - for (acc, version, txStart, txEnd, cdsStart, cdsEnd, exonStarts, exonEnds, - geneName, chrom, strand, protAcc) in self.query(statement): - transcripts.append( - (geneName, acc, version, chrom, strand, - txStart + 1, txEnd, - cdsStart + 1, cdsEnd, - [int(i) + 1 for i in exonStarts.split(',') if i], - [int(i) for i in exonEnds.split(',') if i], - protAcc)) - - return transcripts - #transcripts_by_gene class Mapping(Db) : """ diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py index 810d7d19628a5dd6dd53f3fc313f1446e04583de..98bfe3260dfc683085f0ea8acfc16a4d02753fd1 100644 --- a/mutalyzer/mapping.py +++ b/mutalyzer/mapping.py @@ -10,10 +10,12 @@ update the database with this information. """ -from Bio.Seq import reverse_complement from collections import defaultdict from operator import attrgetter +from Bio.Seq import reverse_complement +import MySQLdb + from mutalyzer.grammar import Grammar from mutalyzer import Db from mutalyzer import Crossmap @@ -1052,7 +1054,10 @@ class UCSCUpdater(Updater): 'hg19'. @type build: string """ - self.ucsc = Db.UCSC(build) + self._ucsc_connection = MySQLdb.connect( + user='genome', + host='genome-mysql.cse.ucsc.edu', + db=build) super(UCSCUpdater, self).__init__(build) #__init__ @@ -1070,7 +1075,34 @@ class UCSCUpdater(Updater): (default: False). @type overwrite: bool """ - transcripts = self.ucsc.transcripts_by_gene(gene) + query = """ + SELECT DISTINCT + acc, version, txStart, txEnd, cdsStart, cdsEnd, exonStarts, + exonEnds, name2 AS geneName, chrom, strand, protAcc + FROM gbStatus, refGene, refLink + WHERE type = "mRNA" + AND refGene.name = acc + AND acc = mrnaAcc + AND name2 = %s + """ + parameters = gene + + cursor = self._ucsc_connection.cursor() + cursor.execute(query, parameters) + result = cursor.fetchall() + cursor.close() + + transcripts = [] + for (acc, version, txStart, txEnd, cdsStart, cdsEnd, exonStarts, exonEnds, + geneName, chrom, strand, protAcc) in result: + transcripts.append( + (geneName, acc, version, chrom, strand, + txStart + 1, txEnd, + cdsStart + 1, cdsEnd, + [int(i) + 1 for i in exonStarts.split(',') if i], + [int(i) for i in exonEnds.split(',') if i], + protAcc)) + self.db.ucsc_load_mapping(transcripts, overwrite=overwrite) #load #UCSCUpdater