Fix transcript naming in mapping webservices

The following three webservice methods return a list of transcript identifiers for some query: - getTranscriptsRange - getTranscripts - getTranscriptsByGeneName Previously they didn't work correctly for LRG transcripts (a bogus version was included and no transcript was selected) and refseq transcripts on mtDNA (no transcript was selected). Additionally, the getTranscriptsRange method now optionally includes version numbers with the boolean versions argument (default false).

Fix transcript naming in mapping webservices
d2987465 · Vermaat · d9335656 · d2987465 · d2987465
Commit d2987465 authored 9 years ago by Vermaat
--- a/mutalyzer/services/rpc.py
+++ b/mutalyzer/services/rpc.py
@@ -264,11 +264,24 @@ class MutalyzerService(ServiceBase):
                     "Finished processing getTranscripts(%s %s %s %s)"
                     % (build, chrom, pos, versions))

-        #filter out the accNo
-        if versions:
-            return ['%s.%s' % (m.accession, m.version) for m in mappings]
-        else:
-            return [m.accession for m in mappings]
+        transcripts = []
+        for mapping in mappings:
+            if versions and mapping.version:
+                accession = '%s.%i' % (mapping.accession, mapping.version)
+            else:
+                accession = mapping.accession
+            if mapping.select_transcript:
+                if mapping.reference_type == 'lrg':
+                    selector = 't%d' % mapping.transcript
+                elif mapping.transcript:
+                    selector = '(%s_v%.3i)' % (mapping.gene, mapping.transcript)
+                else:
+                    selector = '(%s)' % mapping.gene
+            else:
+                selector = ''
+            transcripts.append('%s%s' % (accession, selector))
+
+        return transcripts
    #getTranscripts

    @srpc(Mandatory.Unicode, Mandatory.Unicode, _returns=Array(Mandatory.Unicode))
@@ -296,12 +309,30 @@ class MutalyzerService(ServiceBase):
        L.addMessage(__file__, -1, "INFO",
            "Finished processing getTranscriptsByGene(%s %s)" % (build, name))

-        return ['%s.%s' % (m.accession, m.version) for m in mappings]
-    #getTranscriptsByGene
+        transcripts = []
+        for mapping in mappings:
+            if mapping.version:
+                accession = '%s.%i' % (mapping.accession, mapping.version)
+            else:
+                accession = mapping.accession
+            if mapping.select_transcript:
+                if mapping.reference_type == 'lrg':
+                    selector = 't%d' % mapping.transcript
+                elif mapping.transcript:
+                    selector = '(%s_v%.3i)' % (mapping.gene, mapping.transcript)
+                else:
+                    selector = '(%s)' % mapping.gene
+            else:
+                selector = ''
+            transcripts.append('%s%s' % (accession, selector))
+
+        return transcripts
+    #getTranscriptsByGeneName

    @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Integer,
-        Mandatory.Integer, Mandatory.Integer, _returns=Array(Mandatory.Unicode))
-    def getTranscriptsRange(build, chrom, pos1, pos2, method) :
+          Mandatory.Integer, Mandatory.Integer, Boolean,
+          _returns=Array(Mandatory.Unicode))
+    def getTranscriptsRange(build, chrom, pos1, pos2, method, versions=False):
        """
        Get all the transcripts that overlap with a range on a chromosome.

@@ -319,6 +350,8 @@ class MutalyzerService(ServiceBase):
            - 0 ; Return only the transcripts that completely fall in the range
                  [pos1, pos2].
            - 1 ; Return all hit transcripts.
+        @kwarg versions: If set to True, also include transcript versions.
+        @type versions: bool

        @return: A list of transcripts.
        @rtype: list
@@ -381,7 +414,24 @@ class MutalyzerService(ServiceBase):
            "Finished processing getTranscriptsRange(%s %s %s %s %s)" % (
            build, chrom, pos1, pos2, method))

-        return [m.accession for m in mappings]
+        transcripts = []
+        for mapping in mappings:
+            if versions and mapping.version:
+                accession = '%s.%i' % (mapping.accession, mapping.version)
+            else:
+                accession = mapping.accession
+            if mapping.select_transcript:
+                if mapping.reference_type == 'lrg':
+                    selector = 't%d' % mapping.transcript
+                elif mapping.transcript:
+                    selector = '(%s_v%.3i)' % (mapping.gene, mapping.transcript)
+                else:
+                    selector = '(%s)' % mapping.gene
+            else:
+                selector = ''
+            transcripts.append('%s%s' % (accession, selector))
+
+        return transcripts
    #getTranscriptsRange

    @srpc(Mandatory.Unicode, Mandatory.Unicode, Mandatory.Integer,
@@ -476,6 +526,9 @@ class MutalyzerService(ServiceBase):

        for mapping in mappings:
            t = TranscriptMappingInfo()
+            # TODO: This doesn't work so well for mappings with select_transcript
+            # set, for example LRG and mtDNA mappings, but it's not so easy to
+            # fix in a backwards compatible way.
            t.name = mapping.accession
            t.version = mapping.version
            t.gene = mapping.gene

--- a/tests/test_services_soap.py
+++ b/tests/test_services_soap.py
@@ -166,6 +166,52 @@ def test_numberconversion_gtoc_required_gene(api):
    assert 'XM_001715131.2:c.*19483A>G' in r.string


+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscripts_lrg(api):
+    """
+    Running getTranscripts should give us overlapping transcripts.
+    list of transcripts including LRG transcripts.
+    """
+    r = api('getTranscripts', build='hg19', chrom='chr1',
+            pos=207646118)
+    assert type(r.string) == list
+    assert 'LRG_348t1' in r.string
+
+
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscripts_mtdna(api):
+    """
+    Running getTranscripts should give us overlapping transcripts.
+    list of transcripts, also on chrM.
+    """
+    r = api('getTranscripts', build='hg19', chrom='chrM',
+            pos=10765)
+    assert type(r.string) == list
+    assert 'NC_012920(ND4_v001)' in r.string
+
+
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscriptsrange_lrg(api):
+    """
+    Running getTranscriptsRange should give us overlapping transcripts.
+    list of transcripts including LRG transcripts.
+    """
+    r = api('getTranscriptsRange', 'hg19', 'chr1', 207646118, 207646118, 1)
+    assert type(r.string) == list
+    assert 'LRG_348t1' in r.string
+
+
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscriptsrange_mtdna(api):
+    """
+    Running getTranscripts should give us overlapping transcripts.
+    list of transcripts, also on chrM.
+    """
+    r = api('getTranscriptsRange', 'hg19', 'chrM', 10765, 10765, 1)
+    assert type(r.string) == list
+    assert 'NC_012920(ND4_v001)' in r.string
+
+
 @pytest.mark.usefixtures('hg19_transcript_mappings')
 def test_gettranscriptsbygenename_valid(api):
    """
@@ -180,6 +226,28 @@ def test_gettranscriptsbygenename_valid(api):
        assert t in r.string


+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscriptsbygenename_valid_lrg(api):
+    """
+    Running getTranscriptsByGeneName with valid gene name should give a
+    list of transcripts including LRG transcripts.
+    """
+    r = api('getTranscriptsByGeneName', build='hg19', name='CR2')
+    assert type(r.string) == list
+    assert 'LRG_348t1' in r.string
+
+
+@pytest.mark.usefixtures('hg19_transcript_mappings')
+def test_gettranscriptsbygenename_valid_mtdna(api):
+    """
+    Running getTranscriptsByGeneName with valid gene name should give a
+    list of transcripts also on chrM.
+    """
+    r = api('getTranscriptsByGeneName', build='hg19', name='ND4')
+    assert type(r.string) == list
+    assert 'NC_012920.1(ND4_v001)' in r.string
+
+
 @pytest.mark.usefixtures('hg19_transcript_mappings')
 def test_gettranscriptsbygenename_invalid(api):
    """