From a89fcf463979747843829bbcd581818dc25248a3 Mon Sep 17 00:00:00 2001
From: Martijn Vermaat <martijn@vermaat.name>
Date: Wed, 26 Mar 2014 14:27:47 +0100
Subject: [PATCH] In LRG, only support one CDS per transcript (ignore all
 others)

The LRG model allows for more than one CDS per transcript (and each CDS
can be coupled with more than one transcript). The Mutalyzer gene model
does not allow that.

Previously, multiple CDSs per transcript where treated incorrectly (I
think), so now we just ignore any CDS except the first per transcript.

Unfortunately, it is a bit more involved to give good warnings for this,
so that is not implemented.
---
 mutalyzer/parsers/lrg.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/mutalyzer/parsers/lrg.py b/mutalyzer/parsers/lrg.py
index 9b5d0895..d8f742c8 100644
--- a/mutalyzer/parsers/lrg.py
+++ b/mutalyzer/parsers/lrg.py
@@ -192,7 +192,16 @@ def create_record(data):
         # ending position, keep the possibility in mind that multiple CDS
         # regions are given
         CDSPList = GenRecord.PList()
-        for CDS in tData.getElementsByTagName("coding_region"):
+        for cds_id, CDS in enumerate(tData.getElementsByTagName("coding_region")):
+            if cds_id > 0:
+                # Todo: For now, we only support one CDS per transcript and
+                #   ignore all others.
+                #   By the way, I don't think the loop and sorting of CDS
+                #   positions makes any sense here, but I leave it in place
+                #   and just ignore everything except the first iteration.
+                #translationName = CDS.getElementsByTagName("translation")[0].getAttribute("name").encode("utf8")[1:]
+                #print 'Ignoring transcript %s translation %s' % (transcriptName, translationName)
+                continue
             coordinates = _get_coordinates(CDS, lrg_id)
             CDSPList.positionList.extend(\
             [int(coordinates["start"]), int(coordinates["end"])])
-- 
GitLab