From 11eb9eece964f8030fff696f8aebfe26117923a9 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Wed, 24 Aug 2011 12:01:22 +0000 Subject: [PATCH] Get only primary assembly mappings in position converter (naive fix for #58). git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/refactor-mutalyzer-branch@338 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- mutalyzer/Db.py | 17 ++++++++---- mutalyzer/Mapper.py | 2 ++ tests/test_converter.py | 54 ++++++++++++++++++++++++++++++++++++ tests/test_variantchecker.py | 5 ---- 4 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 tests/test_converter.py diff --git a/mutalyzer/Db.py b/mutalyzer/Db.py index b608cffb..18e8237e 100644 --- a/mutalyzer/Db.py +++ b/mutalyzer/Db.py @@ -242,27 +242,34 @@ class Mapping(Db) : @return: The version number @rtype: integer - """ + @todo: The 'order by chrom asc' is a quick hack to make sure we first + get a primary assembly mapping instead of some haplotype mapping + for genes in the HLA cluster. + A better fix is to return the entire list of mappings, and/or + remove all secondary mappings for the HLA cluster. + See also test_converter.test_hla_cluster and bug #58. + """ q = """ select acc, txStart, txEnd, cdsStart, cdsEnd, exonStarts, exonEnds, geneName, chrom, - strand, protAcc, - MAX(version) + strand, protAcc from map """ if version is None: q += """ - where acc = %s; + where acc = %s + version desc, order by chrom asc; """ statement = (q, mrnaAcc) else: q += """ where acc = %s and - version = %s; + version = %s + order by chrom asc; """ statement = q, (mrnaAcc, version) diff --git a/mutalyzer/Mapper.py b/mutalyzer/Mapper.py index 0138e7e2..ef09e108 100644 --- a/mutalyzer/Mapper.py +++ b/mutalyzer/Mapper.py @@ -24,6 +24,8 @@ positions to I{g.} notation if the variant is in I{c.} notation or vice versa. @requires: Modules.Serializers.Transcript @requires: Bio.Seq.reverse_complement @requires: collections.defaultdict + +@todo: Rename Mapper to converter? """ import sys # argv diff --git a/tests/test_converter.py b/tests/test_converter.py new file mode 100644 index 00000000..17d4c5c5 --- /dev/null +++ b/tests/test_converter.py @@ -0,0 +1,54 @@ +""" +Tests for the converter (Mapper) module. +""" + + +#import logging; logging.basicConfig() +from nose.tools import * + +from mutalyzer.config import Config +from mutalyzer.output import Output +from mutalyzer.Mapper import Converter + + +class TestConverter(): + """ + Test the converter (Mapper) module. + """ + def setUp(self): + """ + Initialize test converter module. + """ + self.config = Config() + self.output = Output(__file__, self.config.Output) + + def _converter(self, build): + """ + Create a Converter instance for a given build. + """ + return Converter(build, self.config, self.output) + + def test_converter(self): + """ + Simple test. + """ + converter = self._converter('hg19') + genomic = converter.c2chrom('NM_003002.2:c.274G>T') + assert_equal(genomic, 'NC_000011.9:g.111959695G>T') + coding = converter.chrom2c(genomic, 'list') + assert 'NM_003002.2:c.274G>T' in coding + + def test_hla_cluster(self): + """ + Convert to primary assembly. + + Transcript NM_000500.5 is mapped to different chromosome locations, + but we like to just see the primary assembly mapping to chromosome 6. + + See also bug #58. + """ + converter = self._converter('hg19') + genomic = converter.c2chrom('NM_000500.5:c.92C>T') + assert_equal(genomic, 'NC_000006.11:g.32006291C>T') + coding = converter.chrom2c(genomic, 'list') + assert 'NM_000500.5:c.92C>T' in coding diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py index 69f9a552..74bbae73 100644 --- a/tests/test_variantchecker.py +++ b/tests/test_variantchecker.py @@ -4,13 +4,8 @@ Tests for the variantchecker module. #import logging; logging.basicConfig() -import re -import os -import random from nose.tools import * -from Bio.Seq import Seq -import mutalyzer from mutalyzer.config import Config from mutalyzer.output import Output from mutalyzer.variantchecker import check_variant -- GitLab