From 11eb9eece964f8030fff696f8aebfe26117923a9 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Wed, 24 Aug 2011 12:01:22 +0000
Subject: [PATCH] Get only primary assembly mappings in position converter
 (naive fix for #58).

git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/branches/refactor-mutalyzer-branch@338 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1
---
 mutalyzer/Db.py              | 17 ++++++++----
 mutalyzer/Mapper.py          |  2 ++
 tests/test_converter.py      | 54 ++++++++++++++++++++++++++++++++++++
 tests/test_variantchecker.py |  5 ----
 4 files changed, 68 insertions(+), 10 deletions(-)
 create mode 100644 tests/test_converter.py

diff --git a/mutalyzer/Db.py b/mutalyzer/Db.py
index b608cffb..18e8237e 100644
--- a/mutalyzer/Db.py
+++ b/mutalyzer/Db.py
@@ -242,27 +242,34 @@ class Mapping(Db) :
 
         @return: The version number
         @rtype: integer
-        """
 
+        @todo: The 'order by chrom asc' is a quick hack to make sure we first
+            get a primary assembly mapping instead of some haplotype mapping
+            for genes in the HLA cluster.
+            A better fix is to return the entire list of mappings, and/or
+            remove all secondary mappings for the HLA cluster.
+            See also test_converter.test_hla_cluster and bug #58.
+        """
         q = """
                 select  acc,
                         txStart, txEnd,
                         cdsStart, cdsEnd,
                         exonStarts, exonEnds,
                         geneName, chrom,
-                        strand, protAcc,
-                        MAX(version)
+                        strand, protAcc
                 from map
         """
         if version is None:
             q += """
-                where acc = %s;
+                where acc = %s
+                version desc, order by chrom asc;
                 """
             statement = (q, mrnaAcc)
         else:
             q += """
                 where acc = %s and
-                      version = %s;
+                      version = %s
+                order by chrom asc;
                 """
             statement = q, (mrnaAcc, version)
 
diff --git a/mutalyzer/Mapper.py b/mutalyzer/Mapper.py
index 0138e7e2..ef09e108 100644
--- a/mutalyzer/Mapper.py
+++ b/mutalyzer/Mapper.py
@@ -24,6 +24,8 @@ positions to I{g.} notation if the variant is in I{c.} notation or vice versa.
 @requires: Modules.Serializers.Transcript
 @requires: Bio.Seq.reverse_complement
 @requires: collections.defaultdict
+
+@todo: Rename Mapper to converter?
 """
 
 import sys                     # argv
diff --git a/tests/test_converter.py b/tests/test_converter.py
new file mode 100644
index 00000000..17d4c5c5
--- /dev/null
+++ b/tests/test_converter.py
@@ -0,0 +1,54 @@
+"""
+Tests for the converter (Mapper) module.
+"""
+
+
+#import logging; logging.basicConfig()
+from nose.tools import *
+
+from mutalyzer.config import Config
+from mutalyzer.output import Output
+from mutalyzer.Mapper import Converter
+
+
+class TestConverter():
+    """
+    Test the converter (Mapper) module.
+    """
+    def setUp(self):
+        """
+        Initialize test converter module.
+        """
+        self.config = Config()
+        self.output = Output(__file__, self.config.Output)
+
+    def _converter(self, build):
+        """
+        Create a Converter instance for a given build.
+        """
+        return Converter(build, self.config, self.output)
+
+    def test_converter(self):
+        """
+        Simple test.
+        """
+        converter = self._converter('hg19')
+        genomic = converter.c2chrom('NM_003002.2:c.274G>T')
+        assert_equal(genomic, 'NC_000011.9:g.111959695G>T')
+        coding = converter.chrom2c(genomic, 'list')
+        assert 'NM_003002.2:c.274G>T' in coding
+
+    def test_hla_cluster(self):
+        """
+        Convert to primary assembly.
+
+        Transcript NM_000500.5 is mapped to different chromosome locations,
+        but we like to just see the primary assembly mapping to chromosome 6.
+
+        See also bug #58.
+        """
+        converter = self._converter('hg19')
+        genomic = converter.c2chrom('NM_000500.5:c.92C>T')
+        assert_equal(genomic, 'NC_000006.11:g.32006291C>T')
+        coding = converter.chrom2c(genomic, 'list')
+        assert 'NM_000500.5:c.92C>T' in coding
diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py
index 69f9a552..74bbae73 100644
--- a/tests/test_variantchecker.py
+++ b/tests/test_variantchecker.py
@@ -4,13 +4,8 @@ Tests for the variantchecker module.
 
 
 #import logging; logging.basicConfig()
-import re
-import os
-import random
 from nose.tools import *
-from Bio.Seq import Seq
 
-import mutalyzer
 from mutalyzer.config import Config
 from mutalyzer.output import Output
 from mutalyzer.variantchecker import check_variant
-- 
GitLab