diff --git a/mutalyzer/grammar.py b/mutalyzer/grammar.py index 2facd917bc8592ceab6225cb3f60dc6e4c5ba08f..9c107855795078f27da4fab5816ab2895aba0e97 100644 --- a/mutalyzer/grammar.py +++ b/mutalyzer/grammar.py @@ -214,7 +214,7 @@ class Grammar(): Suppress(')') # We use originalTextFrom to retain the original (unparsed) raw variant - # descriptions. It can be retrieved as element[0]. + # descriptions. It can be retrieved as element[-1]. # See: # http://packages.python.org/pyparsing/pyparsing.pyparsing-module.html#originalTextFor diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py index d772f4b24b05762ba4b9a74668dc749750f037f0..a9ba1b60df59f31e647c7ed43d254b716a18f5dc 100644 --- a/mutalyzer/mapping.py +++ b/mutalyzer/mapping.py @@ -443,6 +443,50 @@ class Converter(object) : return "%s:%s" % (chromAcc, var_in_g) #c2chrom + def chromosomal_positions(self, positions, reference, version=None): + """ + Convert c. positions to chromosomal positions. + + @arg positions: Positions in c. notation to convert. + @type positions: list + @arg reference: Transcript reference. + @type reference: string + @kwarg version: Transcript reference version. If omitted, '0' is + assumed. + @type version: string + + @return: Chromosome name, orientation (+ or -), and converted + positions. + @rtype: tuple(string, string, list) + + This only works for positions on transcript references in c. notation. + """ + if not version: + version = 0 + version = int(version) + + versions = self.__database.get_NM_version(reference) + + if version not in versions: + return None + + values = self.__database.getAllFields(reference, version) + self._FieldsFromValues(values) + + mapper = self.makeCrossmap() + if not mapper: + return None + + chromosomal_positions = [] + + for position in positions: + main = mapper.main2int(position.MainSgn + position.Main) + offset = mapper.offset2int(position.OffSgn + position.Offset) + chromosomal_positions.append(mapper.x2g(main, offset)) + + return self.dbFields['chromosome'], self.dbFields['orientation'], chromosomal_positions + #chromosomal_positions + def correctChrVariant(self, variant) : """ @arg variant: diff --git a/mutalyzer/parsers/genbank.py b/mutalyzer/parsers/genbank.py index 8f640f618c72b37bd16c1f513cc626943c8e6651..8ecc643ceeeee678e9b613d6385c84869c5bfe7d 100644 --- a/mutalyzer/parsers/genbank.py +++ b/mutalyzer/parsers/genbank.py @@ -449,6 +449,8 @@ class GBparser(): record.seq = biorecord.seq record.version = biorecord.id.split('.')[1] + # Todo: also set organism in the LRG parser + record.organism = biorecord.annotations['organism'] # Todo: This will change once we support protein references if isinstance(biorecord.seq.alphabet, ProteinAlphabet): diff --git a/mutalyzer/templates/check.html b/mutalyzer/templates/check.html index ca1fd558341c2202182036351688ad0a207221c9..0aaa563809273c671ca3504384d97e88ae224448 100644 --- a/mutalyzer/templates/check.html +++ b/mutalyzer/templates/check.html @@ -48,6 +48,7 @@ </div> </div> </div> <!-- not:visualisation --> + <a tal:condition = "browserLink" tal:attributes = "href browserLink"><br>View original variant in UCSC Genome Browser</a> </div> <!-- form area --> <br> <div tal:condition = "lastpost"> diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py index 5e272e22553858a9b9de8f98f48adc208ac71fab..d438e6c7a76b2a494c2cc5331e33b0c9f2056bbe 100644 --- a/mutalyzer/variantchecker.py +++ b/mutalyzer/variantchecker.py @@ -22,6 +22,7 @@ from Bio.Alphabet import IUPAC from mutalyzer import util from mutalyzer.grammar import Grammar from mutalyzer.mutator import Mutator +from mutalyzer.mapping import Converter from mutalyzer import Retriever from mutalyzer import GenRecord from mutalyzer import Db @@ -911,6 +912,8 @@ def process_raw_variant(mutator, variant, record, transcript, output): @raise _RawVariantError: Cannot process this raw variant. @raise _VariantError: Cannot further process the entire variant. """ + variant, original_description = variant.RawVar, variant[-1] + # {argument} may be a number, or a subsequence of the reference. # {sequence} is the variant subsequence. argument = variant.Arg1 @@ -990,6 +993,8 @@ def process_raw_variant(mutator, variant, record, transcript, output): # Coding positioning. first, last = _coding_to_genomic(first_location, last_location, transcript, output) + output.addOutput('rawVariantsCoding', + (original_description, first_location, last_location)) else: # Genomic positioning. first, last = _genomic_to_genomic(first_location, last_location) @@ -1424,17 +1429,17 @@ def process_variant(mutator, description, record, output): if description.SingleAlleleVarSet: for var in description.SingleAlleleVarSet: try: - process_raw_variant(mutator, var.RawVar, record, transcript, + process_raw_variant(mutator, var, record, transcript, output) except _RawVariantError: #output.addMessage(__file__, 2, 'WSKIPRAWVARIANT', # 'Ignoring raw variant "%s".' % var[0]) output.addMessage(__file__, 1, 'IRAWVARIANTERROR', 'Aborted variant check due to error in ' \ - 'raw variant "%s".' % var[0]) + 'raw variant "%s".' % var[-1]) raise else: - process_raw_variant(mutator, description.RawVar, record, + process_raw_variant(mutator, description, record, transcript, output) # Add transcript-specific variant information. @@ -1508,6 +1513,8 @@ def check_variant(description, output): # Add recordType to output for output formatting. output.addOutput('recordType', filetype) + output.addOutput('organism', retrieved_record.organism) + output.addOutput('reference', record_id) # Note: geneSymbol[0] is used as a filter for batch runs. @@ -1555,6 +1562,28 @@ def check_variant(description, output): output.addOutput('original', str(mutator.orig)) output.addOutput('mutated', str(mutator.mutated)) + # Chromosomal region (only for GenBank human transcript references). + # This is still quite ugly code, and should be cleaned up once we have + # a refactored mapping module. + if retrieved_record.organism == 'Homo sapiens' \ + and parsed_description.RefSeqAcc \ + and parsed_description.RefType == 'c': + raw_variants = output.getOutput('rawVariantsCoding') + # Example value: [('29+4T>C', 29+4, 29+4), ('230_233del', 230, 233)] + if raw_variants: + locations = [pos + for descr, first, last in raw_variants + for pos in (first, last)] + converter = Converter('hg19', output) + chromosomal_positions = converter.chromosomal_positions( + locations, parsed_description.RefSeqAcc, parsed_description.Version) + if chromosomal_positions: + output.addOutput('rawVariantsChromosomal', + (chromosomal_positions[0], chromosomal_positions[1], + zip([descr for descr, first, last in raw_variants], + util.grouper(chromosomal_positions[2])))) + # Example value: ('chr12', [('29+4T>C', (2323, 2323)), ('230_233del', (5342, 5345))]) + # Protein. for gene in record.record.geneList: for transcript in gene.transcriptList: diff --git a/mutalyzer/website.py b/mutalyzer/website.py index bd3328a01236949ddf6b0692404876d96c0a3248..74428d6340538e99eb8399f2787ce1915c535559 100644 --- a/mutalyzer/website.py +++ b/mutalyzer/website.py @@ -5,6 +5,7 @@ General Mutalyzer website interface. WEBSERVICE_LOCATION = '/services' WSDL_VIEWER = 'templates/wsdl-viewer.xsl' +GENOME_BROWSER_URL = 'http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position={chromosome}:{start}-{stop}&hgt.customText={bed_file}' # WSGI applications should never print anything to stdout. We redirect to @@ -64,6 +65,7 @@ urls = ( '/Variant_info', 'VariantInfo', '/getGS', 'GetGS', '/check', 'Check', + '/bed', 'Bed', '/syntaxCheck', 'SyntaxCheck', '/checkForward', 'CheckForward', '/batch([a-zA-Z]+)?', 'BatchChecker', @@ -719,6 +721,19 @@ class Check: link = urllib.quote(description) return description, link + # Create a link to the UCSC Genome Browser + browser_link = None + raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0) + if raw_variants: + positions = [pos + for descr, (first, last) in raw_variants[2] + for pos in (first, last)] + bed_url = web.ctx.homedomain + web.ctx.homepath + '/bed?variant=' + urllib.quote(name) + browser_link = GENOME_BROWSER_URL.format(chromosome=raw_variants[0], + start=min(positions) - 10, + stop=max(positions) + 10, + bed_file=urllib.quote(bed_url)) + # Todo: Generate the fancy HTML views for the proteins here instead # of in mutalyzer/variantchecker.py. args = { @@ -746,13 +761,62 @@ class Check: 'cdsStop_c' : output.getIndexedOutput('cdsStop_c', 0), 'restrictionSites' : output.getOutput('restrictionSites'), 'legends' : output.getOutput('legends'), - 'reference' : reference + 'reference' : reference, + 'browserLink' : browser_link } return render.check(args, standalone=not interactive) #Check +class Bed: + """ + Create BED track. + """ + def GET(self): + """ + Create a BED track for the given variant, listing the positioning of + its raw variants. E.g. for use in the UCSC Genome Browser. + + Parameters: + - mutationName: Variant to create BED track for. + + This basically just runs the variant checker and extracts the raw + variants with positions. + """ + web.header('Content-Type', 'text/plain') + + i = web.input(variant=None) + variant = i.variant + + if not variant: + web.ctx.status = '404 Not Found' + return 'Sorry, we have not BED track for this variant.' + + output = Output(__file__) + + variantchecker.check_variant(str(variant), output) + + raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0) + if not raw_variants: + web.ctx.status = '404 Not Found' + return 'Sorry, we have not BED track for this variant.' + + fields = {'name': 'Mutalyzer', + 'description': 'Mutalyzer track for ' + variant, + 'visibility': 'pack', + 'db': 'hg19', + 'url': web.ctx.homedomain + web.ctx.homepath + '/checkForward?mutationName=' + urllib.quote(variant), + 'color': '255,0,0'} + bed = ' '.join(['track'] + ['%s="%s"' % field for field in fields.items()]) + '\n' + for description, positions in raw_variants[2]: + bed += '\t'.join([raw_variants[0], + str(min(positions) - 1), str(max(positions)), + description, '', raw_variants[1]]) + '\n' + return bed +#Bed + + class CheckForward: """ Set the given variant in the cookie and redirect to the name checker. diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py index 2d38fcc4aa7fb7818f066b02d83dc1c163bbcc3c..76e0d0e65bd30bc5fec24b6bbb56cf6a70bc933e 100644 --- a/tests/test_variantchecker.py +++ b/tests/test_variantchecker.py @@ -426,3 +426,12 @@ class TestVariantchecker(): """ check_variant('g.244355733del', self.output) assert_equal(len(self.output.getMessagesWithErrorCode('ENOREF')), 1) + + def test_chromosomal_positions(self): + """ + Variants on transcripts in c. notation should have chromosomal positions + defined. + """ + check_variant('NM_003002.2:c.274G>T', self.output) + assert_equal(self.output.getIndexedOutput('rawVariantsChromosomal', 0), + ('chr11', '+', [('274G>T', (111959695, 111959695))])) diff --git a/tests/test_website.py b/tests/test_website.py index 438b9e71a9096638422df1a72d91c3c59167a99b..09a30ebb9114f4a5129f751d6a2abc92fc718be8 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -20,6 +20,7 @@ import web from nose.tools import * from webtest import TestApp import logging +import urllib import mutalyzer @@ -202,6 +203,18 @@ class TestWSGI(): '<html>', '</html>') + def test_check_browser_link(self): + """ + Submit the name checker form with a coding variant on a transcript. + Should include link to UCSC Genome Browser. + """ + r = self.app.get('/check') + form = r.forms[0] + form['mutationName'] = 'NM_003002.2:c.274G>T' + r = form.submit() + bed_track = urllib.quote(r.environ['wsgi.url_scheme'] + '://' + r.environ['HTTP_HOST'] + '/bed?variant=' + urllib.quote('NM_003002.2:c.274G>T')) + r.mustcontain('<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr11:111959685-111959705&hgt.customText=%s"><br>View original variant in UCSC Genome Browser</a>' % bed_track) + def test_checkforward(self): """ A checkForward request should set the given variant in the session and @@ -707,3 +720,20 @@ facilisi.""" Test if non-existing reference files gives a 404 on a HEAD request. """ r = self.app.head('/Reference/AB026906.78.gb', status=404) + + def test_bed(self): + """ + BED track for variant. + """ + r = self.app.get('/bed?variant=NM_003002.2%3Ac.274G%3ET') + assert_equal(r.content_type, 'text/plain') + r.mustcontain('\t'.join(['chr11', '111959694', '111959695', '274G>T', '', '+'])) + + def test_bed_reverse(self): + """ + BED track for variant on reverse strand. + """ + r = self.app.get('/bed?variant=NM_000132.3%3Ac.%5B4374A%3ET%3B4380_4381del%5D') + assert_equal(r.content_type, 'text/plain') + r.mustcontain('\t'.join(['chrX', '154157690', '154157691', '4374A>T', '', '-'])) + r.mustcontain('\t'.join(['chrX', '154157683', '154157685', '4380_4381del', '', '-']))