GenRecord.py 30.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""
Module to convert a GenBank record to a nested dictionary consisting of
a list of genes, which itself consists of a list of loci. This structure
makes it possible to iterate over genes and transcripts without having to
search for them each time.

@requires: Crossmap
@requires: Bio
@requires: Db
"""
# Public classes:
#     - PList     ; Store a general location and a list of splice sites.
#     - Locus     ; Store data about the mRNA and CDS splice sites.
#     - Gene      ; Store a list of Locus objects and the orientation.
#     - Record    ; Store a geneList and other additional information.
#     - GenRecord ; Convert a GenBank record to a nested dictionary.


19
import Bio
20

Vermaat's avatar
Vermaat committed
21
from mutalyzer import util
22
from mutalyzer import config
23
24
from mutalyzer import Crossmap
from mutalyzer import Db
25
26
27


class PList(object) :
Laros's avatar
Added:  
Laros committed
28
    """
29
30
    A position list object, to store a general location and a list of
    specific splice sites (if available).
Laros's avatar
Added:  
Laros committed
31

32
33
34
35
    These objects are used to describe either a list of mRNA splice sites
    or a list of CDS splice sites. These splice sites are stored in the
    list element. The location element is a fallback in case the splice
    sites are not available.
Laros's avatar
Added:  
Laros committed
36

37
38
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
39

40
41
42
    Public variables:
        - location ; A tuple of integers between which the object resides.
        - list     ; A list (with an even amount of entries) of splice sites.
Laros's avatar
Added:  
Laros committed
43
44
45
46
    """

    def __init__(self) :
        """
47
        Initialise the class.
Laros's avatar
Added:  
Laros committed
48

49
50
51
52
53
        Public variables (altered):
            - location     ; A tuple of integers between which the object
                             resides.
            - POSITIONlist ; A list (with an even amount of entries) of splice
                             sites.
Laros's avatar
Added:  
Laros committed
54
55
56
        """

        self.location = []
57
        self.positionList = []
Laros's avatar
Added:  
Laros committed
58
    #__init__
59
#PList
Laros's avatar
Added:  
Laros committed
60
61
62

class Locus(object) :
    """
63
    A Locus object, to store data about the mRNA and CDS splice sites.
Laros's avatar
Added:  
Laros committed
64

65
66
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
67

68
69
70
71
    Public variables:
        - mRNA ; A position list object.
        - CDS  ; A position list object.
        - exon ; A position list object.
Laros's avatar
Added:  
Laros committed
72
73
    """

74
    def __init__(self, name) :
Laros's avatar
Added:  
Laros committed
75
        """
76
        Initialise the class.
Laros's avatar
Added:  
Laros committed
77

78
79
80
81
82
83
84
        Public variables (altered):
            - mRNA     ; A position list object.
            - CDS      ; A position list object.
            - location ;
            - exon     ; A position list object.
            - txTable  ; The translation table.
            - CM       ; A Crossmap object.
85

86
87
        @arg name: identifier of the locus
        @type name: string
Laros's avatar
Added:  
Laros committed
88
89
        """

90
        self.name = name
Vermaat's avatar
Vermaat committed
91
        self.current = False
Laros's avatar
Added:  
Laros committed
92
93
        self.mRNA = None
        self.CDS = None
Laros's avatar
Laros committed
94
        self.location = []
95
        self.exon = None
Laros's avatar
Laros committed
96
        self.txTable = 1
97
        self.transl_except=[]
Laros's avatar
Added:    
Laros committed
98
        self.CM = None
99
100
        self.transcriptID = None
        self.proteinID = None
Gerben Stouten's avatar
Gerben Stouten committed
101
        self.genomicID = None
102
103
        self.molType = 'c'
        self.description = ""
104
        self.proteinDescription = "?"
105
        self.proteinRange = []
106
        self.locusTag = None
107
        self.link = None
108
109
        self.transcribe = False
        self.translate = False
110
111
112
        self.linkMethod = None
        self.transcriptProduct = None
        self.proteinProduct = None
Laros's avatar
Added:  
Laros committed
113
    #__init__
114

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    def cancelDescription(self):
        """
        Set the description on this locus to 'unknown'.

        This can be used if at some point we give up creating a sensible
        description on this locus. It also makes sure future additions to
        the description are ignored and it keeps the 'unknown' value.

        @note: This depends on the check for the unknown value in the
            addToDescription method. This is a not a beatiful solution.
        """
        self.description = '?'
    #cancelDescription

    def addToDescription(self, rawVariant):
130
        """
131
        Expands the DNA description with a new raw variant.
132

133
134
        @arg rawVariant: description of a single mutation
        @type rawVariant: string
135
        """
Vermaat's avatar
Vermaat committed
136
        if self.description:
137
138
139
140
            # Don't change anything if we already have an unknown value.
            if self.description != '?':
                self.description = "%s;%s" % (self.description, rawVariant)
        else:
141
142
143
            self.description = rawVariant
    #addToDescription
#Locus
Laros's avatar
Added:  
Laros committed
144

145

Laros's avatar
Added:  
Laros committed
146
147
class Gene(object) :
    """
148
149
    A Gene object, to store a list of Locus objects and the orientation of
    the gene.
150

151
152
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
153

154
155
    Public variables:
        - orientation; The orientation of the gene: 1 = forward, -1 = reverse.
156
        - transcriptslist; A list of Locus objects.
Laros's avatar
Added:  
Laros committed
157
158
    """

159
    def __init__(self, name) :
Laros's avatar
Added:  
Laros committed
160
        """
161
        Initialise the class.
Laros's avatar
Added:  
Laros committed
162

163
164
165
166
167
168
169
170
        Public variables (altered):
            - name
            - orientation    ; The orientation of the gene.
            - transcriptList ; A list of transcripts
            - location ;
            - longName ;
        Private variables (altered):
            - __locusTag ;
171

172
173
        @arg name: gene name
        @type name: string
Laros's avatar
Added:  
Laros committed
174
175
        """

176
177
178
        self.name = name
        self.orientation = 1
        self.transcriptList = []
179
        self.location = []
180
        self.longName = ""
181
        self.__locusTag = "000"
Laros's avatar
Added:  
Laros committed
182
183
    #__init__

184
185
    def newLocusTag(self) :
        """
186
        Generates a new Locus tag.
187

188
189
        @return: Locus tag
        @rtype: integer (3 digits, if < 100 preceeded with 0's)
190
191
192
193
194
195
196
        """

        self.__locusTag = "%03i" % (int(self.__locusTag) + 1)

        return self.__locusTag
    #newLocusTag

197
198
    def findLocus(self, name) :
        """
199
        Find a transcript, given its name.
200

201
202
        @arg name: transcript variant number
        @type name: string
203

204
205
        @return: transcript
        @rtype: object
206
207
208
        """

        for i in self.transcriptList :
209
            if i.name == name or i.name == str("%03i" % int(name)):
210
211
212
                return i
        return None
    #findLocus
213
214
215

    def listLoci(self) :
        """
216
        Provides a list of transcript variant numbers
217

218
219
        @return: list of transcript variant numbers
        @rtype: list
220
221
222
223
224
225
226
        """

        ret = []
        for i in self.transcriptList :
            ret.append(i.name)
        return ret
    #listLoci
227
228
229

    def findLink(self, protAcc) :
        """
230
        Look in the list of transcripts for a given protein accession number.
231

232
233
        @arg protAcc: protein accession number
        @type protAcc: string
234

235
236
        @return: transcript
        @rtype: object
237
238
239
240
241
242
        """

        for i in self.transcriptList :
            if i.link == protAcc :
                return i
        return None
243
    #findLink
244
245
246
#Gene

class Record(object) :
247
    """
248
249
250
251
252
253
254
255
256
257
258
259
260
261
    A Record object, to store a geneList and other additional
    information.

    Special methods:
        - __init__() ; Initialise the class.

    Public variables:
        - geneList  ; List of Gene objects.
        - mol_type  ; Variable to indicate the sequence type (DNA, RNA, ...)
        - organelle ; Variable to indicate whether the sequence is from the
                      nucleus or from an organelle (if so, also from which
                      one).
        - source    ; A fake gene that can be used when no gene information
                      is present.
262
    """
Laros's avatar
Laros committed
263
264

    def __init__(self) :
265
        """
266
        Initialise the class.
267
268


269
270
271
272
273
274
275
276
277
278
279
280
        Public variables (altered):
            - geneList  ; List of Gene objects.
            - molType   ; Variable to indicate the sequence type (DNA, RNA,
                          ...)
            - seq       ; The reference sequence
            - mapping   ; The mapping of the reference sequence to the genome
                          include a list of differences between the sequences
            - organelle ; Variable to indicate whether the sequence is from
                          the nucleus or from an organelle (if so, also from
                          which one).
            - source    ; A fake gene that can be used when no gene
                          information is present.
281
        """
Laros's avatar
Laros committed
282

283
        self.geneList = []
284
        self.locusDict = {}
285
        self.molType = 'g'
286
287
        self.seq = ""
        self.mapping = []
Laros's avatar
Laros committed
288
        self.organelle = None
289
        self.source = Gene(None)
290
        self.description = ""
291
        self._sourcetype = None           #LRG or GB
292
        self.version = None
293
294
295
296
        self.chromOffset = 0
        self.chromDescription = ""
        self.orientation = 1
        self.recordId = None
Laros's avatar
Laros committed
297
    #__init__
298
299
300

    def findGene(self, name) :
        """
301
        Returns a Gene object, given its name.
302

303
304
        @arg name: Gene name
        @type name: string
305

306
307
        @return: Gene object
        @rtype: object
308
309
310
311
312
313
314
        """

        for i in self.geneList :
            if i.name == name :
                return i
        return None
    #findGene
315

316
317
    def listGenes(self) :
        """
318
        List the names of all genes found in this record.
319

320
321
        @return: Genes list
        @rtype: list
322

323
324
325
326
327
328
329
330
        """

        ret = []
        for i in self.geneList :
            ret.append(i.name)
        return ret
    #listGenes

331
332
    def addToDescription(self, rawVariant) :
        """
333
        Expands the DNA description with a new raw variant.
334

335
336
        @arg rawVariant: description of a single mutation
        @type rawVariant: string
337
338
339
340
341
342
343
        """

        if self.description :
            self.description = "%s;%s" % (self.description, rawVariant)
        else :
            self.description = rawVariant
    #addToDescription
344
345
346

    def toChromPos(self, i) :
        """
347
        Converts a g. position (relative to the start of the record) to a
348
349
        chromosomal g. position

350
351
        @arg i: g. position (relative to the start of the record)
        @type i: integer
352

353
354
        @return: chromosomal g. position
        @rtype: integer
355
        """
356
357
        if not self.chromOffset:
            return None
358
359
360
361
362
363
364
365

        if self.orientation == 1 :
            return self.chromOffset + i - 1
        return self.chromOffset - i + 1
    #toChromPos

    def addToChromDescription(self, rawVariant) :
        """
366
        @todo document me
367
368
369
370
371
        """

        if not self.chromOffset :
            return
        if self.chromDescription :
372
            self.chromDescription = "%s;%s" % (self.chromDescription,
373
374
375
376
                rawVariant)
        else :
            self.chromDescription = rawVariant
    #addToChromDescription
377
#Record
Laros's avatar
Laros committed
378

Laros's avatar
Added:  
Laros committed
379
380
class GenRecord() :
    """
381
    Convert a GenBank record to a nested dictionary.
Laros's avatar
Added:  
Laros committed
382

383
384
    Public methods:
        - checkRecord()   ;   Check and repair self.record.
Laros's avatar
Added:  
Laros committed
385
386
    """

387
    def __init__(self, output) :
388
        """
389
        Initialise the class.
390

391
392
        Public variable:
            - record    ; A record object
393

394
395
        @arg output: an output object
        @type output: object
396
397
398
399
400
        """
        self.__output = output
        self.record = None
    #__init__

401
402
    def __checkExonList(self, exonList, CDSpos) :
        """
403
        @todo document me
404

405
406
407
408
        @arg exonList: list of splice sites
        @type exonList: list (object)
        @arg CDSpos: location of the CDS
        @type CDSpos: object
409
410

        @return:
411
        @rtype: boolean
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
        """

        if not exonList :
            return False
        if not CDSpos :
            return True

        e = exonList.positionList
        c = CDSpos.location

        seen = 0
        for i in range(0, len(e), 2) :
            if e[i] <= c[0] and e[i + 1] >= c[0] :
                seen += 1
            if e[i] <= c[1] and e[i + 1] >= c[1] :
                seen += 1
        #for

        if seen == 2 :
            return True
        return False
    #__checkExonList
434

435
436
    def __constructCDS(self, mRNA, CDSpos) :
        """
437
        Construct a list of coordinates that contains CDS start and stop and
438
        the internal splice sites.
439

440
441
442
443
444
445
446
        @arg mRNA: mRNA positions/coordinates list
        @type mRNA: list (integer)
        @arg CDSpos: coding DNA positions/coordinates
        @type CDSpos: list (integer)

        @return: CDS positions plus internal splice sites
        @rtype: list (integer)
447
        """
448

449
450
        i = 1
        ret = [CDSpos[0]]
451

452
453
        while CDSpos[0] > mRNA[i] :
            i += 2
454

455
456
457
        j = i
        while CDSpos[1] > mRNA[j] :
            j += 2
458

459
460
        ret.extend(mRNA[i:j])
        ret.append(CDSpos[1])
461

462
463
464
        return ret
    #__constructCDS

Vermaat's avatar
Vermaat committed
465
    def __maybeInvert(self, gene, string, string_reverse=None) :
466
        """
467
468
        Return the reverse-complement of a DNA sequence if the gene is in
        the reverse orientation.
469
470

        @arg gene: Gene
471
472
473
        @type gene: object
        @arg string: DNA sequence
        @type string: string
Vermaat's avatar
Vermaat committed
474
475
        @kwarg string_reverse: DNA sequence to use (if not None) for the
            reverse complement.
476

477
478
479
        @return: reverse-complement (if applicable), otherwise return the
            original.
        @rtype: string
480
        """
Vermaat's avatar
Vermaat committed
481
482
483
        if gene.orientation == -1:
            if string_reverse:
                string = string_reverse
484
485
486
487
            return Bio.Seq.reverse_complement(string)
        return string
    #__maybeInvert

488
    def checkRecord(self) :
489
        """
490
491
        Check if the record in self.record is compatible with mutalyzer.
        Update the mRNA PList with the exon and CDS data.
492

493
494
        @todo: This function should really check the record for minimal
        requirements
495
496
        """

497
        #TODO:  This function should really check
498
        #       the record for minimal requirements.
499
        for i in self.record.geneList :
500
501
502
503
504
505
506
507
508
509
510
511
512
            """
            if len(i.transcriptList) == 2 :
                if i.transcriptList[0].CDS and not i.transcriptList[1].CDS and \
                   i.transcriptList[1].mRNA and not i.transcriptList[0].mRNA :
                    i.transcriptList[0].mRNA = i.transcriptList[1].mRNA
                if i.transcriptList[1].CDS and not i.transcriptList[0].CDS and \
                   i.transcriptList[0].mRNA and not i.transcriptList[1].mRNA :
                    i.transcriptList[0].CDS = i.transcriptList[1].CDS
                i.transcriptList = [i.transcriptList[0]]
                i.transcriptList[0].transcribe = True
                i.transcriptList[0].translate = True
            #if
            """
513
514
            for j in i.transcriptList :
                if not j.mRNA :
515
                    usableExonList = self.__checkExonList(j.exon, j.CDS)
516
517
518
519
520
521
522
523
524
                    if self.record.molType == 'n' and j.exon:
                        if not all(p1 + 1 == p2 for p1, p2 in
                                   util.grouper(j.exon.positionList[1:-1])):
                            code = 'WEXON_ANNOTATION' if j.current else 'WEXON_ANNOTATION_OTHER'
                            self.__output.addMessage(__file__, 2, code,
                                "Exons for gene %s, transcript variant %s were "
                                "found not to be adjacent. This signifies a "
                                "possible problem in the annotation of the "
                                "reference sequence." % (i.name, j.name))
525
                    if not j.exon or not usableExonList :
526
                        if self.record.molType == 'g' :
527
528
                            code = 'WNOMRNA' if j.current else 'WNOMRNA_OTHER'
                            self.__output.addMessage(__file__, 2, code,
529
530
                                "No mRNA field found for gene %s, transcript " \
                                "variant %s in record, constructing " \
531
532
533
                                "it from CDS. Please note that descriptions "\
                                "exceeding CDS boundaries are invalid." % (
                                i.name, j.name))
534
535
                        if j.exon and j.exon.positionList and \
                           not usableExonList :
536
537
                            code = 'WNOMRNA' if j.current else 'WNOMRNA_OTHER'
                            self.__output.addMessage(__file__, 2, code,
538
539
540
541
542
                                "Exons were found for gene %s, transcript " \
                                "variant %s but were not usable. " \
                                "Please note that descriptions "\
                                "exceeding CDS boundaries are invalid." % (
                                i.name, j.name))
543
544
                        if j.CDS :
                            if not j.CDS.positionList :
545
546
547
548
549
                                #self.__output.addMessage(__file__, 2,
                                #    "WNOCDSLIST", "No CDS list found for " \
                                #    "gene %s, transcript variant %s in " \
                                #    "record, constructing it from " \
                                #    "CDS location." % (i.name, j.name))
550
551
552
553
554
                                j.mRNA = j.CDS
                                j.mRNA.positionList = j.CDS.location
                            #if
                            else :
                                j.mRNA = j.CDS
Laros's avatar
Laros committed
555
                            j.linkMethod = "construction"
556
557
                            j.transcribe = True
                            j.translate = True
558
559
560
561
                        #if
                        else :
                            self.__output.addMessage(__file__, 2, "WNOCDS",
                                "No CDS found for gene %s, transcript " \
562
                                "variant %s in record, " \
563
                                "constructing it from gene location." % (
564
                                i.name, j.name))
565
566
567
568
569
                            j.CDS = None #PList()
                            #j.CDS.location = i.location
                            j.mRNA = PList()
                            j.mRNA.location = i.location
                            #j.mRNA.positionList = i.location
570
571
                            j.molType = 'n'
                        #else
Laros's avatar
Added:  
Laros committed
572
573
                    #if
                    else :
574
575
576
577
578
                        #self.__output.addMessage(__file__, 2, "WNOMRNA",
                        #    "No mRNA field found for gene %s, transcript " \
                        #    "variant %s in record, constructing " \
                        #    "it from gathered exon information." % (
                        #    i.name, j.name))
579
580
581
                        j.mRNA = j.exon
                    #else
                #if
582
583
584
                #else :
                #    j.transcribe = True

585
586
                if not j.mRNA.positionList :
                    j.mRNA.positionList = j.mRNA.location
587
                if j.mRNA.positionList and j.CDS and j.CDS.positionList != None :
588
                    if not j.CDS.positionList :
589
590
591
                        #self.__output.addMessage(__file__, 2, "WNOCDS",
                        #    "No CDS list found for gene %s, transcript " \
                        #    "variant %s in record, constructing " \
592
                        #    "it from mRNA list and CDS location." % (i.name,
593
                        #    j.name))
594
595
596
597
598
599
                        if j.mRNA.positionList :
                            j.CDS.positionList = self.__constructCDS(
                                j.mRNA.positionList, j.CDS.location)
                        else :
                            j.CDS.positionList = self.__constructCDS(
                                j.mRNA.location, j.CDS.location)
600
601
                        j.transcribe = True
                        j.translate = True
602
                    #if
603
                    j.CM = Crossmap.Crossmap(j.mRNA.positionList,
604
                                             j.CDS.location, i.orientation)
Laros's avatar
Added:  
Laros committed
605
                #if
606
607
608
                else :
                    j.molType = 'n'
                    if j.mRNA.positionList :
609
                        j.CM = Crossmap.Crossmap(j.mRNA.positionList,
610
                                                 [], i.orientation)
Laros's avatar
Laros committed
611
                        j.transcribe = True
612
613
                    else :
                        j.description = '?'
614
                #else
615
616
            #for
        #for
617
    #checkRecord
618

619
620
621
622
623
624
625
626
627
628
629
630
631
632
    def current_transcript(self):
        """
        Return the current transcript.

        @return: Current transcript if there is one, None otherwise.
        @rtype: GenRecord.Locus
        """
        for i in self.record.geneList:
            for j in i.transcriptList:
                if j.current:
                    return j
        return None
    #current_transcript

633
634
    def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None,
             start_fuzzy=False, stop_fuzzy=False):
635
        """
636
        Generate variant descriptions for all genes, transcripts, etc.
Vermaat's avatar
Vermaat committed
637

638
639
640
641
642
643
644
645
646
647
648
649
        @arg start_g: start position
        @type start_g: integer
        @arg stop_g: stop position
        @type stop_g: integer
        @arg varType: variant type
        @type varType: string
        @arg arg1: argument 1 of a raw variant
        @type arg1: string
        @arg arg2: argument 2 of a raw variant
        @type arg2: string
        @arg roll: ???
        @type roll: tuple (integer, integer)
Vermaat's avatar
Vermaat committed
650
651
        @kwarg arg1_reverse: argument 1 to be used on reverse strand
        @type arg1_reverse: string
652
653
654
655
        @kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
        @type start_fuzzy: bool
        @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
        @type stop_fuzzy: bool
656
        """
657
658
659
660
        forwardStart = start_g
        forwardStop = stop_g
        reverseStart = stop_g
        reverseStop = start_g
Vermaat's avatar
Vermaat committed
661
662
663
664
665
666
667
668
669
670
671
672
673

        if self.record.orientation == 1:
            chromStart = self.record.toChromPos(start_g)
            chromStop = self.record.toChromPos(stop_g)
            chromArg1 = arg1
            chromArg2 = arg2
        else:
            chromStart = self.record.toChromPos(stop_g)
            chromStop = self.record.toChromPos(start_g)
            chromArg1 = Bio.Seq.reverse_complement(arg1)
            chromArg2 = Bio.Seq.reverse_complement(arg2)
            # Todo: Should we use arg1_reverse here?

674
675
676
677
678
        if roll :
            forwardStart += roll[1]
            forwardStop += roll[1]
            reverseStart -= roll[0]
            reverseStop -= roll[0]
Vermaat's avatar
Vermaat committed
679
680
681
682
683
684
685
            if chromStart is not None:
                if self.record.orientation == 1:
                    chromStart += roll[1]
                    chromStop += roll[1]
                else:
                    chromStart += roll[0]
                    chromStop += roll[0]
686
687
688
689
        #if

        if varType != "subst" :
            if forwardStart != forwardStop :
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
                # Todo: Fuzzy offsets to genomic positions (see bug #38).
                #
                # The genomic positioning is problematic. We would like to
                # have it in brackets (as fuzzy positions), like the above
                # g.(34299_23232)del example.
                #
                # Now consider a variant c.a-?_b+18del where only the offset
                # before the exon is unknown but the offset after the exon is
                # exact. Now a genomic description like g.(34299)_23232del
                # comes to mind, however, this notation is not allowed by the
                # HGVS grammar.
                #
                # I think all we can do is to treat both positions as fuzzy in
                # the genomic description, even if only one of them really is.
                #
                # Peter thinks the HGVS grammar should at some point be
                # updated to allow the brackets around individual locations.
                if start_fuzzy or stop_fuzzy:
                    self.record.addToDescription("(%s_%s)%s%s" % (
                        forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("(%s_%s)%s%s" % (
Vermaat's avatar
Vermaat committed
711
                        chromStart, chromStop, varType, chromArg1))
712
713
714
715
                else:
                    self.record.addToDescription("%s_%s%s%s" % (
                        forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("%s_%s%s%s" % (
Vermaat's avatar
Vermaat committed
716
                        chromStart, chromStop, varType, chromArg1))
717
            #if
718
            else :
719
720
721
722
723
724
                if start_fuzzy or stop_fuzzy:
                    # Todo: Current HGVS does not allow for () around single
                    # positions, only around ranges (see above and #38).
                    self.record.addToDescription("(%s)%s%s" % (
                        forwardStart, varType, arg1))
                    self.record.addToChromDescription("(%s)%s%s" % (
Vermaat's avatar
Vermaat committed
725
                        chromStart, varType, chromArg1))
726
727
728
729
                else:
                    self.record.addToDescription("%s%s%s" % (
                        forwardStart, varType, arg1))
                    self.record.addToChromDescription("%s%s%s" % (
Vermaat's avatar
Vermaat committed
730
                        chromStart, varType, chromArg1))
731
            #else
732
733
        #if
        else :
734
735
736
737
738
739
            if start_fuzzy or stop_fuzzy:
                # Todo: Current HGVS does not allow for () around single
                # positions, only around ranges (see above and #38).
                self.record.addToDescription("(%s)%c>%c" % (
                    forwardStart, arg1, arg2))
                self.record.addToChromDescription("(%s)%c>%c" % (
Vermaat's avatar
Vermaat committed
740
                    chromStart, chromArg1, chromArg2))
741
742
743
744
            else:
                self.record.addToDescription("%s%c>%c" % (
                    forwardStart, arg1, arg2))
                self.record.addToChromDescription("%s%c>%c" % (
Vermaat's avatar
Vermaat committed
745
                    chromStart, chromArg1, chromArg2))
746

747
748
749
        for i in self.record.geneList :
            for j in i.transcriptList :
                if j.CM :
750
751
752
753
754
755
756
                    orientedStart = forwardStart
                    orientedStop = forwardStop
                    if i.orientation == -1 :
                        orientedStart = reverseStart
                        orientedStop = reverseStop
                    #if

Vermaat's avatar
Vermaat committed
757
758
759
760
761
762
763
764
765
                    # Turn of translation to protein if we hit splice sites.
                    # For the current transcript, this is handled with more
                    # care in variantchecker.py.
                    if not j.current and \
                           util.over_splice_site(orientedStart, orientedStop,
                                                 j.CM.RNA):
                        j.translate = False

                    # And check whether the variant hits CDS start.
766
767
                    if j.molType == 'c' and forwardStop >= j.CM.x2g(1, 0) \
                       and forwardStart <= j.CM.x2g(3, 0) :
768
                        self.__output.addMessage(__file__, 2, "WSTART",
769
770
                            "Mutation in start codon of gene %s transcript " \
                            "%s." % (i.name, j.name))
Vermaat's avatar
Vermaat committed
771
772
                        if not j.current:
                            j.translate = False
773
774
775

                    # FIXME Check whether the variant hits a splice site.

776
                    if varType != "subst" :
777
                        if orientedStart != orientedStop :
778
779
780
781
782
783
784
785
786
787
788
789
                            if (start_fuzzy or stop_fuzzy) and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription("%s_%s%s%s" % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    j.CM.g2c(orientedStop, stop_fuzzy),
                                    varType, self.__maybeInvert(i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
                                self.checkIntron(i, j, orientedStop)
790
                        #if
791
                        else :
792
793
794
795
796
797
798
799
800
801
802
                            if start_fuzzy and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription("%s%s%s" % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    varType,
                                    self.__maybeInvert(i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
803
                        #else
Laros's avatar
Added:  
Laros committed
804
805
                    #if
                    else :
806
807
808
809
810
811
812
813
814
815
816
                        if start_fuzzy and not j.current:
                            # Don't generate descriptions on transcripts
                            # other than the current in the case of fuzzy
                            # positions.
                            j.cancelDescription()
                        else:
                            j.addToDescription("%s%c>%c" % (
                                j.CM.g2c(orientedStart, start_fuzzy),
                                self.__maybeInvert(i, arg1, arg1_reverse),
                                self.__maybeInvert(i, arg2)))
                            self.checkIntron(i, j, orientedStart)
817
                    #else
Laros's avatar
Added:  
Laros committed
818
819
820
                #if
            #for
        #for
821
    #name
822

823
    def checkIntron(self, gene, transcript, position):
824
825
        """
        Checks if a position is on or near a splice site
826

827
828
829
830
831
832
833
        @arg gene: Gene
        @type gene: object
        @arg transcript: transcript
        @type transcript: object
        @arg position: g. position
        @type position: integer
        """
834
        intronPos = abs(transcript.CM.g2x(position)[1])
835

836
        if intronPos :
837
838
839
            # It should be easy for SOAP clients to filter out all warnings
            # related to other transcripts, so we use two codes here.
            if transcript.current:
Vermaat's avatar
Vermaat committed
840
                warning = 'WSPLICE'
841
842
                str_transcript = 'transcript %s (selected)' % transcript.name
            else:
Vermaat's avatar
Vermaat committed
843
                warning = 'WSPLICE_OTHER'
844
845
                str_transcript = 'transcript %s' % transcript.name

846
            if intronPos <= config.get('spliceAlarm'):
847
848
849
850
851
852
853
                self.__output.addMessage(__file__, 2, warning,
                    "Mutation on splice site in gene %s %s." % (
                    gene.name, str_transcript))
            elif intronPos <= config.get('spliceWarn'):
                self.__output.addMessage(__file__, 2, warning,
                    "Mutation near splice site in gene %s %s." % (
                    gene.name, str_transcript))
854
    #checkIntron
Laros's avatar
Added:  
Laros committed
855
#GenRecord