From d2987465f7a6e72446b1350c68d87aa9a24bac60 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Thu, 11 Feb 2016 19:45:16 +0100
Subject: [PATCH] Fix transcript naming in mapping webservices

The following three webservice methods return a list of transcript
identifiers for some query:

- getTranscriptsRange
- getTranscripts
- getTranscriptsByGeneName

Previously they didn't work correctly for LRG transcripts (a bogus
version was included and no transcript was selected) and refseq
transcripts on mtDNA (no transcript was selected).

Additionally, the getTranscriptsRange method now optionally
includes version numbers with the boolean versions argument
(default false).
---
 mutalyzer/services/rpc.py   | 73 ++++++++++++++++++++++++++++++++-----
 tests/test_services_soap.py | 68 ++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 10 deletions(-)

diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py
index e0137d93..ce2287a8 100644
--- a/mutalyzer/services/rpc.py
+++ b/mutalyzer/services/rpc.py
@@ -264,11 +264,24 @@ class MutalyzerService(ServiceBase):
                      "Finished processing getTranscripts(%s %s %s %s)"
                      % (build, chrom, pos, versions))
 
-        #filter out the accNo
-        if versions:
-            return ['%s.%s' % (m.accession, m.version) for m in mappings]
-        else:
-            return [m.accession for m in mappings]
+        transcripts = []
+        for mapping in mappings:
+            if versions and mapping.version:
+                accession = '%s.%i' % (mapping.accession, mapping.version)
+            else:
+                accession = mapping.accession
+            if mapping.select_transcript:
+                if mapping.reference_type == 'lrg':
+                    selector = 't%d' % mapping.transcript
+                elif mapping.transcript:
+                    selector = '(%s_v%.3i)' % (mapping.gene, mapping.transcript)
+                else:
+                    selector = '(%s)' % mapping.gene
+            else:
+                selector = ''
+            transcripts.append('%s%s' % (accession, selector))
+
+        return transcripts
     #getTranscripts
 
     @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=Array(Mandatory.Unicode))
@@ -296,12 +309,30 @@ class MutalyzerService(ServiceBase):
         L.addMessage(__file__, -1, "INFO",
             "Finished processing getTranscriptsByGene(%s %s)" % (build, name))
 
-        return ['%s.%s' % (m.accession, m.version) for m in mappings]
-    #getTranscriptsByGene
+        transcripts = []
+        for mapping in mappings:
+            if mapping.version:
+                accession = '%s.%i' % (mapping.accession, mapping.version)
+            else:
+                accession = mapping.accession
+            if mapping.select_transcript:
+                if mapping.reference_type == 'lrg':
+                    selector = 't%d' % mapping.transcript
+                elif mapping.transcript:
+                    selector = '(%s_v%.3i)' % (mapping.gene, mapping.transcript)
+                else:
+                    selector = '(%s)' % mapping.gene
+            else:
+                selector = ''
+            transcripts.append('%s%s' % (accession, selector))
+
+        return transcripts
+    #getTranscriptsByGeneName
 
     @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Integer,
-        Mandatory.Integer, Mandatory.Integer, _returns=Array(Mandatory.Unicode))
-    def getTranscriptsRange(build, chrom, pos1, pos2, method) :
+          Mandatory.Integer, Mandatory.Integer, Boolean,
+          _returns=Array(Mandatory.Unicode))
+    def getTranscriptsRange(build, chrom, pos1, pos2, method, versions=False):
         """
         Get all the transcripts that overlap with a range on a chromosome.
 
@@ -319,6 +350,8 @@ class MutalyzerService(ServiceBase):
             - 0 ; Return only the transcripts that completely fall in the range
                   [pos1, pos2].
             - 1 ; Return all hit transcripts.
+        @kwarg versions: If set to True, also include transcript versions.
+        @type versions: bool
 
         @return: A list of transcripts.
         @rtype: list
@@ -381,7 +414,24 @@ class MutalyzerService(ServiceBase):
             "Finished processing getTranscriptsRange(%s %s %s %s %s)" % (
             build, chrom, pos1, pos2, method))
 
-        return [m.accession for m in mappings]
+        transcripts = []
+        for mapping in mappings:
+            if versions and mapping.version:
+                accession = '%s.%i' % (mapping.accession, mapping.version)
+            else:
+                accession = mapping.accession
+            if mapping.select_transcript:
+                if mapping.reference_type == 'lrg':
+                    selector = 't%d' % mapping.transcript
+                elif mapping.transcript:
+                    selector = '(%s_v%.3i)' % (mapping.gene, mapping.transcript)
+                else:
+                    selector = '(%s)' % mapping.gene
+            else:
+                selector = ''
+            transcripts.append('%s%s' % (accession, selector))
+
+        return transcripts
     #getTranscriptsRange
 
     @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Integer,
@@ -476,6 +526,9 @@ class MutalyzerService(ServiceBase):
 
         for mapping in mappings:
             t = TranscriptMappingInfo()
+            # TODO: This doesn't work so well for mappings with select_transcript
+            # set, for example LRG and mtDNA mappings, but it's not so easy to
+            # fix in a backwards compatible way.
             t.name = mapping.accession
             t.version = mapping.version
             t.gene = mapping.gene
diff --git a/tests/test_services_soap.py b/tests/test_services_soap.py
index 37a5799f..a02146da 100644
--- a/tests/test_services_soap.py
+++ b/tests/test_services_soap.py
@@ -166,6 +166,52 @@ def test_numberconversion_gtoc_required_gene(api):
     assert 'XM_001715131.2:c.*19483A>G' in r.string
 
 
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscripts_lrg(api):
+    """
+    Running getTranscripts should give us overlapping transcripts.
+    list of transcripts including LRG transcripts.
+    """
+    r = api('getTranscripts', build='hg19', chrom='chr1',
+            pos=207646118)
+    assert type(r.string) == list
+    assert 'LRG_348t1' in r.string
+
+
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscripts_mtdna(api):
+    """
+    Running getTranscripts should give us overlapping transcripts.
+    list of transcripts, also on chrM.
+    """
+    r = api('getTranscripts', build='hg19', chrom='chrM',
+            pos=10765)
+    assert type(r.string) == list
+    assert 'NC_012920(ND4_v001)' in r.string
+
+
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscriptsrange_lrg(api):
+    """
+    Running getTranscriptsRange should give us overlapping transcripts.
+    list of transcripts including LRG transcripts.
+    """
+    r = api('getTranscriptsRange', 'hg19', 'chr1', 207646118, 207646118, 1)
+    assert type(r.string) == list
+    assert 'LRG_348t1' in r.string
+
+
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscriptsrange_mtdna(api):
+    """
+    Running getTranscripts should give us overlapping transcripts.
+    list of transcripts, also on chrM.
+    """
+    r = api('getTranscriptsRange', 'hg19', 'chrM', 10765, 10765, 1)
+    assert type(r.string) == list
+    assert 'NC_012920(ND4_v001)' in r.string
+
+
 @pytest.mark.usefixtures('hg19_transcript_mappings')
 def test_gettranscriptsbygenename_valid(api):
     """
@@ -180,6 +226,28 @@ def test_gettranscriptsbygenename_valid(api):
         assert t in r.string
 
 
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscriptsbygenename_valid_lrg(api):
+    """
+    Running getTranscriptsByGeneName with valid gene name should give a
+    list of transcripts including LRG transcripts.
+    """
+    r = api('getTranscriptsByGeneName', build='hg19', name='CR2')
+    assert type(r.string) == list
+    assert 'LRG_348t1' in r.string
+
+
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscriptsbygenename_valid_mtdna(api):
+    """
+    Running getTranscriptsByGeneName with valid gene name should give a
+    list of transcripts also on chrM.
+    """
+    r = api('getTranscriptsByGeneName', build='hg19', name='ND4')
+    assert type(r.string) == list
+    assert 'NC_012920.1(ND4_v001)' in r.string
+
+
 @pytest.mark.usefixtures('hg19_transcript_mappings')
 def test_gettranscriptsbygenename_invalid(api):
     """
-- 
GitLab