GenRecord.py 29.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""
Module to convert a GenBank record to a nested dictionary consisting of
a list of genes, which itself consists of a list of loci. This structure
makes it possible to iterate over genes and transcripts without having to
search for them each time.

@requires: Crossmap
@requires: Bio
@requires: Db
"""
# Public classes:
#     - PList     ; Store a general location and a list of splice sites.
#     - Locus     ; Store data about the mRNA and CDS splice sites.
#     - Gene      ; Store a list of Locus objects and the orientation.
#     - Record    ; Store a geneList and other additional information.
#     - GenRecord ; Convert a GenBank record to a nested dictionary.


19
import Bio
20

Vermaat's avatar
Vermaat committed
21
from mutalyzer import util
22
from mutalyzer import config
23
24
from mutalyzer import Crossmap
from mutalyzer import Db
25
26
27


class PList(object) :
Laros's avatar
Added:  
Laros committed
28
    """
29
30
    A position list object, to store a general location and a list of
    specific splice sites (if available).
Laros's avatar
Added:  
Laros committed
31

32
33
34
35
    These objects are used to describe either a list of mRNA splice sites
    or a list of CDS splice sites. These splice sites are stored in the
    list element. The location element is a fallback in case the splice
    sites are not available.
Laros's avatar
Added:  
Laros committed
36

37
38
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
39

40
41
42
    Public variables:
        - location ; A tuple of integers between which the object resides.
        - list     ; A list (with an even amount of entries) of splice sites.
Laros's avatar
Added:  
Laros committed
43
44
45
46
    """

    def __init__(self) :
        """
47
        Initialise the class.
Laros's avatar
Added:  
Laros committed
48

49
50
51
52
53
        Public variables (altered):
            - location     ; A tuple of integers between which the object
                             resides.
            - POSITIONlist ; A list (with an even amount of entries) of splice
                             sites.
Laros's avatar
Added:  
Laros committed
54
55
56
        """

        self.location = []
57
        self.positionList = []
Laros's avatar
Added:  
Laros committed
58
    #__init__
59
#PList
Laros's avatar
Added:  
Laros committed
60
61
62

class Locus(object) :
    """
63
    A Locus object, to store data about the mRNA and CDS splice sites.
Laros's avatar
Added:  
Laros committed
64

65
66
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
67

68
69
70
71
    Public variables:
        - mRNA ; A position list object.
        - CDS  ; A position list object.
        - exon ; A position list object.
Laros's avatar
Added:  
Laros committed
72
73
    """

74
    def __init__(self, name) :
Laros's avatar
Added:  
Laros committed
75
        """
76
        Initialise the class.
Laros's avatar
Added:  
Laros committed
77

78
79
80
81
82
83
84
        Public variables (altered):
            - mRNA     ; A position list object.
            - CDS      ; A position list object.
            - location ;
            - exon     ; A position list object.
            - txTable  ; The translation table.
            - CM       ; A Crossmap object.
85

86
87
        @arg name: identifier of the locus
        @type name: string
Laros's avatar
Added:  
Laros committed
88
89
        """

90
        self.name = name
Vermaat's avatar
Vermaat committed
91
        self.current = False
Laros's avatar
Added:  
Laros committed
92
93
        self.mRNA = None
        self.CDS = None
Laros's avatar
Laros committed
94
        self.location = []
95
        self.exon = None
Laros's avatar
Laros committed
96
        self.txTable = 1
Laros's avatar
Added:    
Laros committed
97
        self.CM = None
98
99
        self.transcriptID = None
        self.proteinID = None
Gerben Stouten's avatar
Gerben Stouten committed
100
        self.genomicID = None
101
102
        self.molType = 'c'
        self.description = ""
103
        self.proteinDescription = "?"
104
        self.proteinRange = []
105
        self.locusTag = None
106
        self.link = None
107
108
        self.transcribe = False
        self.translate = False
109
110
111
        self.linkMethod = None
        self.transcriptProduct = None
        self.proteinProduct = None
Laros's avatar
Added:  
Laros committed
112
    #__init__
113

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
    def cancelDescription(self):
        """
        Set the description on this locus to 'unknown'.

        This can be used if at some point we give up creating a sensible
        description on this locus. It also makes sure future additions to
        the description are ignored and it keeps the 'unknown' value.

        @note: This depends on the check for the unknown value in the
            addToDescription method. This is a not a beatiful solution.
        """
        self.description = '?'
    #cancelDescription

    def addToDescription(self, rawVariant):
129
        """
130
        Expands the DNA description with a new raw variant.
131

132
133
        @arg rawVariant: description of a single mutation
        @type rawVariant: string
134
        """
Vermaat's avatar
Vermaat committed
135
        if self.description:
136
137
138
139
            # Don't change anything if we already have an unknown value.
            if self.description != '?':
                self.description = "%s;%s" % (self.description, rawVariant)
        else:
140
141
142
            self.description = rawVariant
    #addToDescription
#Locus
Laros's avatar
Added:  
Laros committed
143

144

Laros's avatar
Added:  
Laros committed
145
146
class Gene(object) :
    """
147
148
    A Gene object, to store a list of Locus objects and the orientation of
    the gene.
149

150
151
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
152

153
154
    Public variables:
        - orientation; The orientation of the gene: 1 = forward, -1 = reverse.
155
        - transcriptslist; A list of Locus objects.
Laros's avatar
Added:  
Laros committed
156
157
    """

158
    def __init__(self, name) :
Laros's avatar
Added:  
Laros committed
159
        """
160
        Initialise the class.
Laros's avatar
Added:  
Laros committed
161

162
163
164
165
166
167
168
169
        Public variables (altered):
            - name
            - orientation    ; The orientation of the gene.
            - transcriptList ; A list of transcripts
            - location ;
            - longName ;
        Private variables (altered):
            - __locusTag ;
170

171
172
        @arg name: gene name
        @type name: string
Laros's avatar
Added:  
Laros committed
173
174
        """

175
176
177
        self.name = name
        self.orientation = 1
        self.transcriptList = []
178
        self.location = []
179
        self.longName = ""
180
        self.__locusTag = "000"
Laros's avatar
Added:  
Laros committed
181
182
    #__init__

183
184
    def newLocusTag(self) :
        """
185
        Generates a new Locus tag.
186

187
188
        @return: Locus tag
        @rtype: integer (3 digits, if < 100 preceeded with 0's)
189
190
191
192
193
194
195
        """

        self.__locusTag = "%03i" % (int(self.__locusTag) + 1)

        return self.__locusTag
    #newLocusTag

196
197
    def findLocus(self, name) :
        """
198
        Find a transcript, given its name.
199

200
201
        @arg name: transcript variant number
        @type name: string
202

203
204
        @return: transcript
        @rtype: object
205
206
207
        """

        for i in self.transcriptList :
208
            if i.name == name or i.name == str("%03i" % int(name)):
209
210
211
                return i
        return None
    #findLocus
212
213
214

    def listLoci(self) :
        """
215
        Provides a list of transcript variant numbers
216

217
218
        @return: list of transcript variant numbers
        @rtype: list
219
220
221
222
223
224
225
        """

        ret = []
        for i in self.transcriptList :
            ret.append(i.name)
        return ret
    #listLoci
226
227
228

    def findLink(self, protAcc) :
        """
229
        Look in the list of transcripts for a given protein accession number.
230

231
232
        @arg protAcc: protein accession number
        @type protAcc: string
233

234
235
        @return: transcript
        @rtype: object
236
237
238
239
240
241
        """

        for i in self.transcriptList :
            if i.link == protAcc :
                return i
        return None
242
    #findLink
243
244
245
#Gene

class Record(object) :
246
    """
247
248
249
250
251
252
253
254
255
256
257
258
259
260
    A Record object, to store a geneList and other additional
    information.

    Special methods:
        - __init__() ; Initialise the class.

    Public variables:
        - geneList  ; List of Gene objects.
        - mol_type  ; Variable to indicate the sequence type (DNA, RNA, ...)
        - organelle ; Variable to indicate whether the sequence is from the
                      nucleus or from an organelle (if so, also from which
                      one).
        - source    ; A fake gene that can be used when no gene information
                      is present.
261
    """
Laros's avatar
Laros committed
262
263

    def __init__(self) :
264
        """
265
        Initialise the class.
266
267


268
269
270
271
272
273
274
275
276
277
278
279
        Public variables (altered):
            - geneList  ; List of Gene objects.
            - molType   ; Variable to indicate the sequence type (DNA, RNA,
                          ...)
            - seq       ; The reference sequence
            - mapping   ; The mapping of the reference sequence to the genome
                          include a list of differences between the sequences
            - organelle ; Variable to indicate whether the sequence is from
                          the nucleus or from an organelle (if so, also from
                          which one).
            - source    ; A fake gene that can be used when no gene
                          information is present.
280
        """
Laros's avatar
Laros committed
281

282
        self.geneList = []
283
        self.molType = 'g'
284
285
        self.seq = ""
        self.mapping = []
Laros's avatar
Laros committed
286
        self.organelle = None
287
        self.source = Gene(None)
288
        self.description = ""
289
        self._sourcetype = None           #LRG or GB
290
        self.version = None
291
292
293
294
        self.chromOffset = 0
        self.chromDescription = ""
        self.orientation = 1
        self.recordId = None
Laros's avatar
Laros committed
295
    #__init__
296
297
298

    def findGene(self, name) :
        """
299
        Returns a Gene object, given its name.
300

301
302
        @arg name: Gene name
        @type name: string
303

304
305
        @return: Gene object
        @rtype: object
306
307
308
309
310
311
312
        """

        for i in self.geneList :
            if i.name == name :
                return i
        return None
    #findGene
313

314
315
    def listGenes(self) :
        """
316
        List the names of all genes found in this record.
317

318
319
        @return: Genes list
        @rtype: list
320

321
322
323
324
325
326
327
328
        """

        ret = []
        for i in self.geneList :
            ret.append(i.name)
        return ret
    #listGenes

329
330
    def addToDescription(self, rawVariant) :
        """
331
        Expands the DNA description with a new raw variant.
332

333
334
        @arg rawVariant: description of a single mutation
        @type rawVariant: string
335
336
337
338
339
340
341
        """

        if self.description :
            self.description = "%s;%s" % (self.description, rawVariant)
        else :
            self.description = rawVariant
    #addToDescription
342
343
344

    def toChromPos(self, i) :
        """
345
        Converts a g. position (relative to the start of the record) to a
346
347
        chromosomal g. position

348
349
        @arg i: g. position (relative to the start of the record)
        @type i: integer
350

351
352
        @return: chromosomal g. position
        @rtype: integer
353
        """
354
355
        if not self.chromOffset:
            return None
356
357
358
359
360
361
362
363

        if self.orientation == 1 :
            return self.chromOffset + i - 1
        return self.chromOffset - i + 1
    #toChromPos

    def addToChromDescription(self, rawVariant) :
        """
364
        @todo document me
365
366
367
368
369
        """

        if not self.chromOffset :
            return
        if self.chromDescription :
370
            self.chromDescription = "%s;%s" % (self.chromDescription,
371
372
373
374
                rawVariant)
        else :
            self.chromDescription = rawVariant
    #addToChromDescription
375
#Record
Laros's avatar
Laros committed
376

Laros's avatar
Added:  
Laros committed
377
378
class GenRecord() :
    """
379
    Convert a GenBank record to a nested dictionary.
Laros's avatar
Added:  
Laros committed
380

381
382
    Public methods:
        - checkRecord()   ;   Check and repair self.record.
Laros's avatar
Added:  
Laros committed
383
384
    """

385
    def __init__(self, output) :
386
        """
387
        Initialise the class.
388

389
390
        Public variable:
            - record    ; A record object
391

392
393
        @arg output: an output object
        @type output: object
394
395
396
397
398
        """
        self.__output = output
        self.record = None
    #__init__

399
400
    def __checkExonList(self, exonList, CDSpos) :
        """
401
        @todo document me
402

403
404
405
406
        @arg exonList: list of splice sites
        @type exonList: list (object)
        @arg CDSpos: location of the CDS
        @type CDSpos: object
407
408

        @return:
409
        @rtype: boolean
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
        """

        if not exonList :
            return False
        if not CDSpos :
            return True

        e = exonList.positionList
        c = CDSpos.location

        seen = 0
        for i in range(0, len(e), 2) :
            if e[i] <= c[0] and e[i + 1] >= c[0] :
                seen += 1
            if e[i] <= c[1] and e[i + 1] >= c[1] :
                seen += 1
        #for

        if seen == 2 :
            return True
        return False
    #__checkExonList
432

433
434
    def __constructCDS(self, mRNA, CDSpos) :
        """
435
        Construct a list of coordinates that contains CDS start and stop and
436
        the internal splice sites.
437

438
439
440
441
442
443
444
        @arg mRNA: mRNA positions/coordinates list
        @type mRNA: list (integer)
        @arg CDSpos: coding DNA positions/coordinates
        @type CDSpos: list (integer)

        @return: CDS positions plus internal splice sites
        @rtype: list (integer)
445
        """
446

447
448
        i = 1
        ret = [CDSpos[0]]
449

450
451
        while CDSpos[0] > mRNA[i] :
            i += 2
452

453
454
455
        j = i
        while CDSpos[1] > mRNA[j] :
            j += 2
456

457
458
        ret.extend(mRNA[i:j])
        ret.append(CDSpos[1])
459

460
461
462
        return ret
    #__constructCDS

Vermaat's avatar
Vermaat committed
463
    def __maybeInvert(self, gene, string, string_reverse=None) :
464
        """
465
466
        Return the reverse-complement of a DNA sequence if the gene is in
        the reverse orientation.
467
468

        @arg gene: Gene
469
470
471
        @type gene: object
        @arg string: DNA sequence
        @type string: string
Vermaat's avatar
Vermaat committed
472
473
        @kwarg string_reverse: DNA sequence to use (if not None) for the
            reverse complement.
474

475
476
477
        @return: reverse-complement (if applicable), otherwise return the
            original.
        @rtype: string
478
        """
Vermaat's avatar
Vermaat committed
479
480
481
        if gene.orientation == -1:
            if string_reverse:
                string = string_reverse
482
483
484
485
            return Bio.Seq.reverse_complement(string)
        return string
    #__maybeInvert

486
    def checkRecord(self) :
487
        """
488
489
        Check if the record in self.record is compatible with mutalyzer.
        Update the mRNA PList with the exon and CDS data.
490

491
492
        @todo: This function should really check the record for minimal
        requirements
493
494
        """

495
        #TODO:  This function should really check
496
        #       the record for minimal requirements.
497
        for i in self.record.geneList :
498
499
500
501
502
503
504
505
506
507
508
509
510
            """
            if len(i.transcriptList) == 2 :
                if i.transcriptList[0].CDS and not i.transcriptList[1].CDS and \
                   i.transcriptList[1].mRNA and not i.transcriptList[0].mRNA :
                    i.transcriptList[0].mRNA = i.transcriptList[1].mRNA
                if i.transcriptList[1].CDS and not i.transcriptList[0].CDS and \
                   i.transcriptList[0].mRNA and not i.transcriptList[1].mRNA :
                    i.transcriptList[0].CDS = i.transcriptList[1].CDS
                i.transcriptList = [i.transcriptList[0]]
                i.transcriptList[0].transcribe = True
                i.transcriptList[0].translate = True
            #if
            """
511
512
            for j in i.transcriptList :
                if not j.mRNA :
513
514
                    usableExonList = self.__checkExonList(j.exon, j.CDS)
                    if not j.exon or not usableExonList :
515
516
517
518
                        if self.record.molType == 'g' :
                            self.__output.addMessage(__file__, 2, "WNOMRNA",
                                "No mRNA field found for gene %s, transcript " \
                                "variant %s in record, constructing " \
519
520
521
                                "it from CDS. Please note that descriptions "\
                                "exceeding CDS boundaries are invalid." % (
                                i.name, j.name))
522
523
524
525
526
527
528
529
                        if j.exon and j.exon.positionList and \
                           not usableExonList :
                            self.__output.addMessage(__file__, 2, "WNOMRNA",
                                "Exons were found for gene %s, transcript " \
                                "variant %s but were not usable. " \
                                "Please note that descriptions "\
                                "exceeding CDS boundaries are invalid." % (
                                i.name, j.name))
530
531
                        if j.CDS :
                            if not j.CDS.positionList :
532
533
534
535
536
                                #self.__output.addMessage(__file__, 2,
                                #    "WNOCDSLIST", "No CDS list found for " \
                                #    "gene %s, transcript variant %s in " \
                                #    "record, constructing it from " \
                                #    "CDS location." % (i.name, j.name))
537
538
539
540
541
                                j.mRNA = j.CDS
                                j.mRNA.positionList = j.CDS.location
                            #if
                            else :
                                j.mRNA = j.CDS
Laros's avatar
Laros committed
542
                            j.linkMethod = "construction"
543
544
                            j.transcribe = True
                            j.translate = True
545
546
547
548
                        #if
                        else :
                            self.__output.addMessage(__file__, 2, "WNOCDS",
                                "No CDS found for gene %s, transcript " \
549
                                "variant %s in record, " \
550
                                "constructing it from gene location." % (
551
                                i.name, j.name))
552
553
554
555
556
                            j.CDS = None #PList()
                            #j.CDS.location = i.location
                            j.mRNA = PList()
                            j.mRNA.location = i.location
                            #j.mRNA.positionList = i.location
557
558
                            j.molType = 'n'
                        #else
Laros's avatar
Added:  
Laros committed
559
560
                    #if
                    else :
561
562
563
564
565
                        #self.__output.addMessage(__file__, 2, "WNOMRNA",
                        #    "No mRNA field found for gene %s, transcript " \
                        #    "variant %s in record, constructing " \
                        #    "it from gathered exon information." % (
                        #    i.name, j.name))
566
567
568
                        j.mRNA = j.exon
                    #else
                #if
569
570
571
                #else :
                #    j.transcribe = True

572
573
                if not j.mRNA.positionList :
                    j.mRNA.positionList = j.mRNA.location
574
                if j.mRNA.positionList and j.CDS and j.CDS.positionList != None :
575
                    if not j.CDS.positionList :
576
577
578
                        #self.__output.addMessage(__file__, 2, "WNOCDS",
                        #    "No CDS list found for gene %s, transcript " \
                        #    "variant %s in record, constructing " \
579
                        #    "it from mRNA list and CDS location." % (i.name,
580
                        #    j.name))
581
582
583
584
585
586
                        if j.mRNA.positionList :
                            j.CDS.positionList = self.__constructCDS(
                                j.mRNA.positionList, j.CDS.location)
                        else :
                            j.CDS.positionList = self.__constructCDS(
                                j.mRNA.location, j.CDS.location)
587
588
                        j.transcribe = True
                        j.translate = True
589
                    #if
590
                    j.CM = Crossmap.Crossmap(j.mRNA.positionList,
591
                                             j.CDS.location, i.orientation)
Laros's avatar
Added:  
Laros committed
592
                #if
593
594
595
                else :
                    j.molType = 'n'
                    if j.mRNA.positionList :
596
                        j.CM = Crossmap.Crossmap(j.mRNA.positionList,
597
                                                 [], i.orientation)
Laros's avatar
Laros committed
598
                        j.transcribe = True
599
600
                    else :
                        j.description = '?'
601
                #else
602
603
            #for
        #for
604
    #checkRecord
605

606
607
608
609
610
611
612
613
614
615
616
617
618
619
    def current_transcript(self):
        """
        Return the current transcript.

        @return: Current transcript if there is one, None otherwise.
        @rtype: GenRecord.Locus
        """
        for i in self.record.geneList:
            for j in i.transcriptList:
                if j.current:
                    return j
        return None
    #current_transcript

620
621
    def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None,
             start_fuzzy=False, stop_fuzzy=False):
622
        """
623
        Generate variant descriptions for all genes, transcripts, etc.
Vermaat's avatar
Vermaat committed
624

625
626
627
628
629
630
631
632
633
634
635
636
        @arg start_g: start position
        @type start_g: integer
        @arg stop_g: stop position
        @type stop_g: integer
        @arg varType: variant type
        @type varType: string
        @arg arg1: argument 1 of a raw variant
        @type arg1: string
        @arg arg2: argument 2 of a raw variant
        @type arg2: string
        @arg roll: ???
        @type roll: tuple (integer, integer)
Vermaat's avatar
Vermaat committed
637
638
        @kwarg arg1_reverse: argument 1 to be used on reverse strand
        @type arg1_reverse: string
639
640
641
642
        @kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
        @type start_fuzzy: bool
        @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
        @type stop_fuzzy: bool
643
        """
644
645
646
647
        forwardStart = start_g
        forwardStop = stop_g
        reverseStart = stop_g
        reverseStop = start_g
Vermaat's avatar
Vermaat committed
648
649
650
651
652
653
654
655
656
657
658
659
660

        if self.record.orientation == 1:
            chromStart = self.record.toChromPos(start_g)
            chromStop = self.record.toChromPos(stop_g)
            chromArg1 = arg1
            chromArg2 = arg2
        else:
            chromStart = self.record.toChromPos(stop_g)
            chromStop = self.record.toChromPos(start_g)
            chromArg1 = Bio.Seq.reverse_complement(arg1)
            chromArg2 = Bio.Seq.reverse_complement(arg2)
            # Todo: Should we use arg1_reverse here?

661
662
663
664
665
        if roll :
            forwardStart += roll[1]
            forwardStop += roll[1]
            reverseStart -= roll[0]
            reverseStop -= roll[0]
Vermaat's avatar
Vermaat committed
666
667
668
669
670
671
672
            if chromStart is not None:
                if self.record.orientation == 1:
                    chromStart += roll[1]
                    chromStop += roll[1]
                else:
                    chromStart += roll[0]
                    chromStop += roll[0]
673
674
675
676
        #if

        if varType != "subst" :
            if forwardStart != forwardStop :
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
                # Todo: Fuzzy offsets to genomic positions (see bug #38).
                #
                # The genomic positioning is problematic. We would like to
                # have it in brackets (as fuzzy positions), like the above
                # g.(34299_23232)del example.
                #
                # Now consider a variant c.a-?_b+18del where only the offset
                # before the exon is unknown but the offset after the exon is
                # exact. Now a genomic description like g.(34299)_23232del
                # comes to mind, however, this notation is not allowed by the
                # HGVS grammar.
                #
                # I think all we can do is to treat both positions as fuzzy in
                # the genomic description, even if only one of them really is.
                #
                # Peter thinks the HGVS grammar should at some point be
                # updated to allow the brackets around individual locations.
                if start_fuzzy or stop_fuzzy:
                    self.record.addToDescription("(%s_%s)%s%s" % (
                        forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("(%s_%s)%s%s" % (
Vermaat's avatar
Vermaat committed
698
                        chromStart, chromStop, varType, chromArg1))
699
700
701
702
                else:
                    self.record.addToDescription("%s_%s%s%s" % (
                        forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("%s_%s%s%s" % (
Vermaat's avatar
Vermaat committed
703
                        chromStart, chromStop, varType, chromArg1))
704
            #if
705
            else :
706
707
708
709
710
711
                if start_fuzzy or stop_fuzzy:
                    # Todo: Current HGVS does not allow for () around single
                    # positions, only around ranges (see above and #38).
                    self.record.addToDescription("(%s)%s%s" % (
                        forwardStart, varType, arg1))
                    self.record.addToChromDescription("(%s)%s%s" % (
Vermaat's avatar
Vermaat committed
712
                        chromStart, varType, chromArg1))
713
714
715
716
                else:
                    self.record.addToDescription("%s%s%s" % (
                        forwardStart, varType, arg1))
                    self.record.addToChromDescription("%s%s%s" % (
Vermaat's avatar
Vermaat committed
717
                        chromStart, varType, chromArg1))
718
            #else
719
720
        #if
        else :
721
722
723
724
725
726
            if start_fuzzy or stop_fuzzy:
                # Todo: Current HGVS does not allow for () around single
                # positions, only around ranges (see above and #38).
                self.record.addToDescription("(%s)%c>%c" % (
                    forwardStart, arg1, arg2))
                self.record.addToChromDescription("(%s)%c>%c" % (
Vermaat's avatar
Vermaat committed
727
                    chromStart, chromArg1, chromArg2))
728
729
730
731
            else:
                self.record.addToDescription("%s%c>%c" % (
                    forwardStart, arg1, arg2))
                self.record.addToChromDescription("%s%c>%c" % (
Vermaat's avatar
Vermaat committed
732
                    chromStart, chromArg1, chromArg2))
733

734
735
736
        for i in self.record.geneList :
            for j in i.transcriptList :
                if j.CM :
737
738
739
740
741
742
743
                    orientedStart = forwardStart
                    orientedStop = forwardStop
                    if i.orientation == -1 :
                        orientedStart = reverseStart
                        orientedStop = reverseStop
                    #if

Vermaat's avatar
Vermaat committed
744
745
746
747
748
749
750
751
752
                    # Turn of translation to protein if we hit splice sites.
                    # For the current transcript, this is handled with more
                    # care in variantchecker.py.
                    if not j.current and \
                           util.over_splice_site(orientedStart, orientedStop,
                                                 j.CM.RNA):
                        j.translate = False

                    # And check whether the variant hits CDS start.
753
754
                    if j.molType == 'c' and forwardStop >= j.CM.x2g(1, 0) \
                       and forwardStart <= j.CM.x2g(3, 0) :
755
                        self.__output.addMessage(__file__, 2, "WSTART",
756
757
                            "Mutation in start codon of gene %s transcript " \
                            "%s." % (i.name, j.name))
Vermaat's avatar
Vermaat committed
758
759
                        if not j.current:
                            j.translate = False
760
761
762

                    # FIXME Check whether the variant hits a splice site.

763
                    if varType != "subst" :
764
                        if orientedStart != orientedStop :
765
766
767
768
769
770
771
772
773
774
775
776
                            if (start_fuzzy or stop_fuzzy) and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription("%s_%s%s%s" % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    j.CM.g2c(orientedStop, stop_fuzzy),
                                    varType, self.__maybeInvert(i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
                                self.checkIntron(i, j, orientedStop)
777
                        #if
778
                        else :
779
780
781
782
783
784
785
786
787
788
789
                            if start_fuzzy and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription("%s%s%s" % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    varType,
                                    self.__maybeInvert(i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
790
                        #else
Laros's avatar
Added:  
Laros committed
791
792
                    #if
                    else :
793
794
795
796
797
798
799
800
801
802
803
                        if start_fuzzy and not j.current:
                            # Don't generate descriptions on transcripts
                            # other than the current in the case of fuzzy
                            # positions.
                            j.cancelDescription()
                        else:
                            j.addToDescription("%s%c>%c" % (
                                j.CM.g2c(orientedStart, start_fuzzy),
                                self.__maybeInvert(i, arg1, arg1_reverse),
                                self.__maybeInvert(i, arg2)))
                            self.checkIntron(i, j, orientedStart)
804
                    #else
Laros's avatar
Added:  
Laros committed
805
806
807
                #if
            #for
        #for
808
    #name
809

810
    def checkIntron(self, gene, transcript, position):
811
812
        """
        Checks if a position is on or near a splice site
813

814
815
816
817
818
819
820
        @arg gene: Gene
        @type gene: object
        @arg transcript: transcript
        @type transcript: object
        @arg position: g. position
        @type position: integer
        """
821
        intronPos = abs(transcript.CM.g2x(position)[1])
822

823
        if intronPos :
824
825
826
            # It should be easy for SOAP clients to filter out all warnings
            # related to other transcripts, so we use two codes here.
            if transcript.current:
Vermaat's avatar
Vermaat committed
827
                warning = 'WSPLICE'
828
829
                str_transcript = 'transcript %s (selected)' % transcript.name
            else:
Vermaat's avatar
Vermaat committed
830
                warning = 'WSPLICE_OTHER'
831
832
                str_transcript = 'transcript %s' % transcript.name

833
            if intronPos <= config.get('spliceAlarm'):
834
835
836
837
838
839
840
                self.__output.addMessage(__file__, 2, warning,
                    "Mutation on splice site in gene %s %s." % (
                    gene.name, str_transcript))
            elif intronPos <= config.get('spliceWarn'):
                self.__output.addMessage(__file__, 2, warning,
                    "Mutation near splice site in gene %s %s." % (
                    gene.name, str_transcript))
841
    #checkIntron
Laros's avatar
Added:  
Laros committed
842
#GenRecord