GenRecord.py 29.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""
Module to convert a GenBank record to a nested dictionary consisting of
a list of genes, which itself consists of a list of loci. This structure
makes it possible to iterate over genes and transcripts without having to
search for them each time.

@requires: Crossmap
@requires: Bio
@requires: Db
"""
# Public classes:
#     - PList     ; Store a general location and a list of splice sites.
#     - Locus     ; Store data about the mRNA and CDS splice sites.
#     - Gene      ; Store a list of Locus objects and the orientation.
#     - Record    ; Store a geneList and other additional information.
#     - GenRecord ; Convert a GenBank record to a nested dictionary.


19
import Bio
20

Vermaat's avatar
Vermaat committed
21
from mutalyzer import util
22
from mutalyzer import config
23
24
from mutalyzer import Crossmap
from mutalyzer import Db
25
26
27


class PList(object) :
Laros's avatar
Added:  
Laros committed
28
    """
29
30
    A position list object, to store a general location and a list of
    specific splice sites (if available).
Laros's avatar
Added:  
Laros committed
31

32
33
34
35
    These objects are used to describe either a list of mRNA splice sites
    or a list of CDS splice sites. These splice sites are stored in the
    list element. The location element is a fallback in case the splice
    sites are not available.
Laros's avatar
Added:  
Laros committed
36

37
38
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
39

40
41
42
    Public variables:
        - location ; A tuple of integers between which the object resides.
        - list     ; A list (with an even amount of entries) of splice sites.
Laros's avatar
Added:  
Laros committed
43
44
45
46
    """

    def __init__(self) :
        """
47
        Initialise the class.
Laros's avatar
Added:  
Laros committed
48

49
50
51
52
53
        Public variables (altered):
            - location     ; A tuple of integers between which the object
                             resides.
            - POSITIONlist ; A list (with an even amount of entries) of splice
                             sites.
Laros's avatar
Added:  
Laros committed
54
55
56
        """

        self.location = []
57
        self.positionList = []
Laros's avatar
Added:  
Laros committed
58
    #__init__
59
#PList
Laros's avatar
Added:  
Laros committed
60
61
62

class Locus(object) :
    """
63
    A Locus object, to store data about the mRNA and CDS splice sites.
Laros's avatar
Added:  
Laros committed
64

65
66
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
67

68
69
70
71
    Public variables:
        - mRNA ; A position list object.
        - CDS  ; A position list object.
        - exon ; A position list object.
Laros's avatar
Added:  
Laros committed
72
73
    """

74
    def __init__(self, name) :
Laros's avatar
Added:  
Laros committed
75
        """
76
        Initialise the class.
Laros's avatar
Added:  
Laros committed
77

78
79
80
81
82
83
84
        Public variables (altered):
            - mRNA     ; A position list object.
            - CDS      ; A position list object.
            - location ;
            - exon     ; A position list object.
            - txTable  ; The translation table.
            - CM       ; A Crossmap object.
85

86
87
        @arg name: identifier of the locus
        @type name: string
Laros's avatar
Added:  
Laros committed
88
89
        """

90
        self.name = name
Vermaat's avatar
Vermaat committed
91
        self.current = False
Laros's avatar
Added:  
Laros committed
92
93
        self.mRNA = None
        self.CDS = None
Laros's avatar
Laros committed
94
        self.location = []
95
        self.exon = None
Laros's avatar
Laros committed
96
        self.txTable = 1
Laros's avatar
Added:    
Laros committed
97
        self.CM = None
98
99
        self.transcriptID = None
        self.proteinID = None
Gerben Stouten's avatar
Gerben Stouten committed
100
        self.genomicID = None
101
102
        self.molType = 'c'
        self.description = ""
103
        self.proteinDescription = "?"
104
        self.proteinRange = []
105
        self.locusTag = None
106
        self.link = None
107
108
        self.transcribe = False
        self.translate = False
109
110
111
        self.linkMethod = None
        self.transcriptProduct = None
        self.proteinProduct = None
Laros's avatar
Added:  
Laros committed
112
    #__init__
113

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
    def cancelDescription(self):
        """
        Set the description on this locus to 'unknown'.

        This can be used if at some point we give up creating a sensible
        description on this locus. It also makes sure future additions to
        the description are ignored and it keeps the 'unknown' value.

        @note: This depends on the check for the unknown value in the
            addToDescription method. This is a not a beatiful solution.
        """
        self.description = '?'
    #cancelDescription

    def addToDescription(self, rawVariant):
129
        """
130
        Expands the DNA description with a new raw variant.
131

132
133
        @arg rawVariant: description of a single mutation
        @type rawVariant: string
134
        """
Vermaat's avatar
Vermaat committed
135
        if self.description:
136
137
138
139
            # Don't change anything if we already have an unknown value.
            if self.description != '?':
                self.description = "%s;%s" % (self.description, rawVariant)
        else:
140
141
142
            self.description = rawVariant
    #addToDescription
#Locus
Laros's avatar
Added:  
Laros committed
143

144

Laros's avatar
Added:  
Laros committed
145
146
class Gene(object) :
    """
147
148
    A Gene object, to store a list of Locus objects and the orientation of
    the gene.
149

150
151
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
152

153
154
    Public variables:
        - orientation; The orientation of the gene: 1 = forward, -1 = reverse.
155
        - transcriptslist; A list of Locus objects.
Laros's avatar
Added:  
Laros committed
156
157
    """

158
    def __init__(self, name) :
Laros's avatar
Added:  
Laros committed
159
        """
160
        Initialise the class.
Laros's avatar
Added:  
Laros committed
161

162
163
164
165
166
167
168
169
        Public variables (altered):
            - name
            - orientation    ; The orientation of the gene.
            - transcriptList ; A list of transcripts
            - location ;
            - longName ;
        Private variables (altered):
            - __locusTag ;
170

171
172
        @arg name: gene name
        @type name: string
Laros's avatar
Added:  
Laros committed
173
174
        """

175
176
177
        self.name = name
        self.orientation = 1
        self.transcriptList = []
178
        self.location = []
179
        self.longName = ""
180
        self.__locusTag = "000"
Laros's avatar
Added:  
Laros committed
181
182
    #__init__

183
184
    def newLocusTag(self) :
        """
185
        Generates a new Locus tag.
186

187
188
        @return: Locus tag
        @rtype: integer (3 digits, if < 100 preceeded with 0's)
189
190
191
192
193
194
195
        """

        self.__locusTag = "%03i" % (int(self.__locusTag) + 1)

        return self.__locusTag
    #newLocusTag

196
197
    def findLocus(self, name) :
        """
198
        Find a transcript, given its name.
199

200
201
        @arg name: transcript variant number
        @type name: string
202

203
204
        @return: transcript
        @rtype: object
205
206
207
        """

        for i in self.transcriptList :
208
            if i.name == name or i.name == str("%03i" % int(name)):
209
210
211
                return i
        return None
    #findLocus
212
213
214

    def listLoci(self) :
        """
215
        Provides a list of transcript variant numbers
216

217
218
        @return: list of transcript variant numbers
        @rtype: list
219
220
221
222
223
224
225
        """

        ret = []
        for i in self.transcriptList :
            ret.append(i.name)
        return ret
    #listLoci
226
227
228

    def findLink(self, protAcc) :
        """
229
        Look in the list of transcripts for a given protein accession number.
230

231
232
        @arg protAcc: protein accession number
        @type protAcc: string
233

234
235
        @return: transcript
        @rtype: object
236
237
238
239
240
241
        """

        for i in self.transcriptList :
            if i.link == protAcc :
                return i
        return None
242
    #findLink
243
244
245
#Gene

class Record(object) :
246
    """
247
248
249
250
251
252
253
254
255
256
257
258
259
260
    A Record object, to store a geneList and other additional
    information.

    Special methods:
        - __init__() ; Initialise the class.

    Public variables:
        - geneList  ; List of Gene objects.
        - mol_type  ; Variable to indicate the sequence type (DNA, RNA, ...)
        - organelle ; Variable to indicate whether the sequence is from the
                      nucleus or from an organelle (if so, also from which
                      one).
        - source    ; A fake gene that can be used when no gene information
                      is present.
261
    """
Laros's avatar
Laros committed
262
263

    def __init__(self) :
264
        """
265
        Initialise the class.
266
267


268
269
270
271
272
273
274
275
276
277
278
279
        Public variables (altered):
            - geneList  ; List of Gene objects.
            - molType   ; Variable to indicate the sequence type (DNA, RNA,
                          ...)
            - seq       ; The reference sequence
            - mapping   ; The mapping of the reference sequence to the genome
                          include a list of differences between the sequences
            - organelle ; Variable to indicate whether the sequence is from
                          the nucleus or from an organelle (if so, also from
                          which one).
            - source    ; A fake gene that can be used when no gene
                          information is present.
280
        """
Laros's avatar
Laros committed
281

282
        self.geneList = []
283
        self.molType = 'g'
284
285
        self.seq = ""
        self.mapping = []
Laros's avatar
Laros committed
286
        self.organelle = None
287
        self.source = Gene(None)
288
        self.description = ""
289
        self._sourcetype = None           #LRG or GB
290
        self.version = None
291
292
293
294
        self.chromOffset = 0
        self.chromDescription = ""
        self.orientation = 1
        self.recordId = None
Laros's avatar
Laros committed
295
    #__init__
296
297
298

    def findGene(self, name) :
        """
299
        Returns a Gene object, given its name.
300

301
302
        @arg name: Gene name
        @type name: string
303

304
305
        @return: Gene object
        @rtype: object
306
307
308
309
310
311
312
        """

        for i in self.geneList :
            if i.name == name :
                return i
        return None
    #findGene
313

314
315
    def listGenes(self) :
        """
316
        List the names of all genes found in this record.
317

318
319
        @return: Genes list
        @rtype: list
320

321
322
323
324
325
326
327
328
        """

        ret = []
        for i in self.geneList :
            ret.append(i.name)
        return ret
    #listGenes

329
330
    def addToDescription(self, rawVariant) :
        """
331
        Expands the DNA description with a new raw variant.
332

333
334
        @arg rawVariant: description of a single mutation
        @type rawVariant: string
335
336
337
338
339
340
341
        """

        if self.description :
            self.description = "%s;%s" % (self.description, rawVariant)
        else :
            self.description = rawVariant
    #addToDescription
342
343
344

    def toChromPos(self, i) :
        """
345
        Converts a g. position (relative to the start of the record) to a
346
347
        chromosomal g. position

348
349
        @arg i: g. position (relative to the start of the record)
        @type i: integer
350

351
352
        @return: chromosomal g. position
        @rtype: integer
353
        """
354
355
        if not self.chromOffset:
            return None
356
357
358
359
360
361
362
363

        if self.orientation == 1 :
            return self.chromOffset + i - 1
        return self.chromOffset - i + 1
    #toChromPos

    def addToChromDescription(self, rawVariant) :
        """
364
        @todo document me
365
366
367
368
369
        """

        if not self.chromOffset :
            return
        if self.chromDescription :
370
            self.chromDescription = "%s;%s" % (self.chromDescription,
371
372
373
374
                rawVariant)
        else :
            self.chromDescription = rawVariant
    #addToChromDescription
375
#Record
Laros's avatar
Laros committed
376

Laros's avatar
Added:  
Laros committed
377
378
class GenRecord() :
    """
379
    Convert a GenBank record to a nested dictionary.
Laros's avatar
Added:  
Laros committed
380

381
382
    Public methods:
        - checkRecord()   ;   Check and repair self.record.
Laros's avatar
Added:  
Laros committed
383
384
    """

385
    def __init__(self, output) :
386
        """
387
        Initialise the class.
388

389
390
        Public variable:
            - record    ; A record object
391

392
393
        @arg output: an output object
        @type output: object
394
395
396
397
398
        """
        self.__output = output
        self.record = None
    #__init__

399
400
    def __checkExonList(self, exonList, CDSpos) :
        """
401
        @todo document me
402

403
404
405
406
        @arg exonList: list of splice sites
        @type exonList: list (object)
        @arg CDSpos: location of the CDS
        @type CDSpos: object
407
408

        @return:
409
        @rtype: boolean
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
        """

        if not exonList :
            return False
        if not CDSpos :
            return True

        e = exonList.positionList
        c = CDSpos.location

        seen = 0
        for i in range(0, len(e), 2) :
            if e[i] <= c[0] and e[i + 1] >= c[0] :
                seen += 1
            if e[i] <= c[1] and e[i + 1] >= c[1] :
                seen += 1
        #for

        if seen == 2 :
            return True
        return False
    #__checkExonList
432

433
434
    def __constructCDS(self, mRNA, CDSpos) :
        """
435
        Construct a list of coordinates that contains CDS start and stop and
436
        the internal splice sites.
437

438
439
440
441
442
443
444
        @arg mRNA: mRNA positions/coordinates list
        @type mRNA: list (integer)
        @arg CDSpos: coding DNA positions/coordinates
        @type CDSpos: list (integer)

        @return: CDS positions plus internal splice sites
        @rtype: list (integer)
445
        """
446

447
448
        i = 1
        ret = [CDSpos[0]]
449

450
451
        while CDSpos[0] > mRNA[i] :
            i += 2
452

453
454
455
        j = i
        while CDSpos[1] > mRNA[j] :
            j += 2
456

457
458
        ret.extend(mRNA[i:j])
        ret.append(CDSpos[1])
459

460
461
462
        return ret
    #__constructCDS

Vermaat's avatar
Vermaat committed
463
    def __maybeInvert(self, gene, string, string_reverse=None) :
464
        """
465
466
        Return the reverse-complement of a DNA sequence if the gene is in
        the reverse orientation.
467
468

        @arg gene: Gene
469
470
471
        @type gene: object
        @arg string: DNA sequence
        @type string: string
Vermaat's avatar
Vermaat committed
472
473
        @kwarg string_reverse: DNA sequence to use (if not None) for the
            reverse complement.
474

475
476
477
        @return: reverse-complement (if applicable), otherwise return the
            original.
        @rtype: string
478
        """
Vermaat's avatar
Vermaat committed
479
480
481
        if gene.orientation == -1:
            if string_reverse:
                string = string_reverse
482
483
484
485
            return Bio.Seq.reverse_complement(string)
        return string
    #__maybeInvert

486
    def checkRecord(self) :
487
        """
488
489
        Check if the record in self.record is compatible with mutalyzer.
        Update the mRNA PList with the exon and CDS data.
490

491
492
        @todo: This function should really check the record for minimal
        requirements
493
494
        """

495
        #TODO:  This function should really check
496
        #       the record for minimal requirements.
497
        for i in self.record.geneList :
498
499
500
501
502
503
504
505
506
507
508
509
510
            """
            if len(i.transcriptList) == 2 :
                if i.transcriptList[0].CDS and not i.transcriptList[1].CDS and \
                   i.transcriptList[1].mRNA and not i.transcriptList[0].mRNA :
                    i.transcriptList[0].mRNA = i.transcriptList[1].mRNA
                if i.transcriptList[1].CDS and not i.transcriptList[0].CDS and \
                   i.transcriptList[0].mRNA and not i.transcriptList[1].mRNA :
                    i.transcriptList[0].CDS = i.transcriptList[1].CDS
                i.transcriptList = [i.transcriptList[0]]
                i.transcriptList[0].transcribe = True
                i.transcriptList[0].translate = True
            #if
            """
511
512
            for j in i.transcriptList :
                if not j.mRNA :
513
514
                    usableExonList = self.__checkExonList(j.exon, j.CDS)
                    if not j.exon or not usableExonList :
515
                        if self.record.molType == 'g' :
516
517
                            code = 'WNOMRNA' if j.current else 'WNOMRNA_OTHER'
                            self.__output.addMessage(__file__, 2, code,
518
519
                                "No mRNA field found for gene %s, transcript " \
                                "variant %s in record, constructing " \
520
521
522
                                "it from CDS. Please note that descriptions "\
                                "exceeding CDS boundaries are invalid." % (
                                i.name, j.name))
523
524
                        if j.exon and j.exon.positionList and \
                           not usableExonList :
525
526
                            code = 'WNOMRNA' if j.current else 'WNOMRNA_OTHER'
                            self.__output.addMessage(__file__, 2, code,
527
528
529
530
531
                                "Exons were found for gene %s, transcript " \
                                "variant %s but were not usable. " \
                                "Please note that descriptions "\
                                "exceeding CDS boundaries are invalid." % (
                                i.name, j.name))
532
533
                        if j.CDS :
                            if not j.CDS.positionList :
534
535
536
537
538
                                #self.__output.addMessage(__file__, 2,
                                #    "WNOCDSLIST", "No CDS list found for " \
                                #    "gene %s, transcript variant %s in " \
                                #    "record, constructing it from " \
                                #    "CDS location." % (i.name, j.name))
539
540
541
542
543
                                j.mRNA = j.CDS
                                j.mRNA.positionList = j.CDS.location
                            #if
                            else :
                                j.mRNA = j.CDS
Laros's avatar
Laros committed
544
                            j.linkMethod = "construction"
545
546
                            j.transcribe = True
                            j.translate = True
547
548
549
550
                        #if
                        else :
                            self.__output.addMessage(__file__, 2, "WNOCDS",
                                "No CDS found for gene %s, transcript " \
551
                                "variant %s in record, " \
552
                                "constructing it from gene location." % (
553
                                i.name, j.name))
554
555
556
557
558
                            j.CDS = None #PList()
                            #j.CDS.location = i.location
                            j.mRNA = PList()
                            j.mRNA.location = i.location
                            #j.mRNA.positionList = i.location
559
560
                            j.molType = 'n'
                        #else
Laros's avatar
Added:  
Laros committed
561
562
                    #if
                    else :
563
564
565
566
567
                        #self.__output.addMessage(__file__, 2, "WNOMRNA",
                        #    "No mRNA field found for gene %s, transcript " \
                        #    "variant %s in record, constructing " \
                        #    "it from gathered exon information." % (
                        #    i.name, j.name))
568
569
570
                        j.mRNA = j.exon
                    #else
                #if
571
572
573
                #else :
                #    j.transcribe = True

574
575
                if not j.mRNA.positionList :
                    j.mRNA.positionList = j.mRNA.location
576
                if j.mRNA.positionList and j.CDS and j.CDS.positionList != None :
577
                    if not j.CDS.positionList :
578
579
580
                        #self.__output.addMessage(__file__, 2, "WNOCDS",
                        #    "No CDS list found for gene %s, transcript " \
                        #    "variant %s in record, constructing " \
581
                        #    "it from mRNA list and CDS location." % (i.name,
582
                        #    j.name))
583
584
585
586
587
588
                        if j.mRNA.positionList :
                            j.CDS.positionList = self.__constructCDS(
                                j.mRNA.positionList, j.CDS.location)
                        else :
                            j.CDS.positionList = self.__constructCDS(
                                j.mRNA.location, j.CDS.location)
589
590
                        j.transcribe = True
                        j.translate = True
591
                    #if
592
                    j.CM = Crossmap.Crossmap(j.mRNA.positionList,
593
                                             j.CDS.location, i.orientation)
Laros's avatar
Added:  
Laros committed
594
                #if
595
596
597
                else :
                    j.molType = 'n'
                    if j.mRNA.positionList :
598
                        j.CM = Crossmap.Crossmap(j.mRNA.positionList,
599
                                                 [], i.orientation)
Laros's avatar
Laros committed
600
                        j.transcribe = True
601
602
                    else :
                        j.description = '?'
603
                #else
604
605
            #for
        #for
606
    #checkRecord
607

608
609
610
611
612
613
614
615
616
617
618
619
620
621
    def current_transcript(self):
        """
        Return the current transcript.

        @return: Current transcript if there is one, None otherwise.
        @rtype: GenRecord.Locus
        """
        for i in self.record.geneList:
            for j in i.transcriptList:
                if j.current:
                    return j
        return None
    #current_transcript

622
623
    def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None,
             start_fuzzy=False, stop_fuzzy=False):
624
        """
625
        Generate variant descriptions for all genes, transcripts, etc.
Vermaat's avatar
Vermaat committed
626

627
628
629
630
631
632
633
634
635
636
637
638
        @arg start_g: start position
        @type start_g: integer
        @arg stop_g: stop position
        @type stop_g: integer
        @arg varType: variant type
        @type varType: string
        @arg arg1: argument 1 of a raw variant
        @type arg1: string
        @arg arg2: argument 2 of a raw variant
        @type arg2: string
        @arg roll: ???
        @type roll: tuple (integer, integer)
Vermaat's avatar
Vermaat committed
639
640
        @kwarg arg1_reverse: argument 1 to be used on reverse strand
        @type arg1_reverse: string
641
642
643
644
        @kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
        @type start_fuzzy: bool
        @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
        @type stop_fuzzy: bool
645
        """
646
647
648
649
        forwardStart = start_g
        forwardStop = stop_g
        reverseStart = stop_g
        reverseStop = start_g
Vermaat's avatar
Vermaat committed
650
651
652
653
654
655
656
657
658
659
660
661
662

        if self.record.orientation == 1:
            chromStart = self.record.toChromPos(start_g)
            chromStop = self.record.toChromPos(stop_g)
            chromArg1 = arg1
            chromArg2 = arg2
        else:
            chromStart = self.record.toChromPos(stop_g)
            chromStop = self.record.toChromPos(start_g)
            chromArg1 = Bio.Seq.reverse_complement(arg1)
            chromArg2 = Bio.Seq.reverse_complement(arg2)
            # Todo: Should we use arg1_reverse here?

663
664
665
666
667
        if roll :
            forwardStart += roll[1]
            forwardStop += roll[1]
            reverseStart -= roll[0]
            reverseStop -= roll[0]
Vermaat's avatar
Vermaat committed
668
669
670
671
672
673
674
            if chromStart is not None:
                if self.record.orientation == 1:
                    chromStart += roll[1]
                    chromStop += roll[1]
                else:
                    chromStart += roll[0]
                    chromStop += roll[0]
675
676
677
678
        #if

        if varType != "subst" :
            if forwardStart != forwardStop :
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
                # Todo: Fuzzy offsets to genomic positions (see bug #38).
                #
                # The genomic positioning is problematic. We would like to
                # have it in brackets (as fuzzy positions), like the above
                # g.(34299_23232)del example.
                #
                # Now consider a variant c.a-?_b+18del where only the offset
                # before the exon is unknown but the offset after the exon is
                # exact. Now a genomic description like g.(34299)_23232del
                # comes to mind, however, this notation is not allowed by the
                # HGVS grammar.
                #
                # I think all we can do is to treat both positions as fuzzy in
                # the genomic description, even if only one of them really is.
                #
                # Peter thinks the HGVS grammar should at some point be
                # updated to allow the brackets around individual locations.
                if start_fuzzy or stop_fuzzy:
                    self.record.addToDescription("(%s_%s)%s%s" % (
                        forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("(%s_%s)%s%s" % (
Vermaat's avatar
Vermaat committed
700
                        chromStart, chromStop, varType, chromArg1))
701
702
703
704
                else:
                    self.record.addToDescription("%s_%s%s%s" % (
                        forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("%s_%s%s%s" % (
Vermaat's avatar
Vermaat committed
705
                        chromStart, chromStop, varType, chromArg1))
706
            #if
707
            else :
708
709
710
711
712
713
                if start_fuzzy or stop_fuzzy:
                    # Todo: Current HGVS does not allow for () around single
                    # positions, only around ranges (see above and #38).
                    self.record.addToDescription("(%s)%s%s" % (
                        forwardStart, varType, arg1))
                    self.record.addToChromDescription("(%s)%s%s" % (
Vermaat's avatar
Vermaat committed
714
                        chromStart, varType, chromArg1))
715
716
717
718
                else:
                    self.record.addToDescription("%s%s%s" % (
                        forwardStart, varType, arg1))
                    self.record.addToChromDescription("%s%s%s" % (
Vermaat's avatar
Vermaat committed
719
                        chromStart, varType, chromArg1))
720
            #else
721
722
        #if
        else :
723
724
725
726
727
728
            if start_fuzzy or stop_fuzzy:
                # Todo: Current HGVS does not allow for () around single
                # positions, only around ranges (see above and #38).
                self.record.addToDescription("(%s)%c>%c" % (
                    forwardStart, arg1, arg2))
                self.record.addToChromDescription("(%s)%c>%c" % (
Vermaat's avatar
Vermaat committed
729
                    chromStart, chromArg1, chromArg2))
730
731
732
733
            else:
                self.record.addToDescription("%s%c>%c" % (
                    forwardStart, arg1, arg2))
                self.record.addToChromDescription("%s%c>%c" % (
Vermaat's avatar
Vermaat committed
734
                    chromStart, chromArg1, chromArg2))
735

736
737
738
        for i in self.record.geneList :
            for j in i.transcriptList :
                if j.CM :
739
740
741
742
743
744
745
                    orientedStart = forwardStart
                    orientedStop = forwardStop
                    if i.orientation == -1 :
                        orientedStart = reverseStart
                        orientedStop = reverseStop
                    #if

Vermaat's avatar
Vermaat committed
746
747
748
749
750
751
752
753
754
                    # Turn of translation to protein if we hit splice sites.
                    # For the current transcript, this is handled with more
                    # care in variantchecker.py.
                    if not j.current and \
                           util.over_splice_site(orientedStart, orientedStop,
                                                 j.CM.RNA):
                        j.translate = False

                    # And check whether the variant hits CDS start.
755
756
                    if j.molType == 'c' and forwardStop >= j.CM.x2g(1, 0) \
                       and forwardStart <= j.CM.x2g(3, 0) :
757
                        self.__output.addMessage(__file__, 2, "WSTART",
758
759
                            "Mutation in start codon of gene %s transcript " \
                            "%s." % (i.name, j.name))
Vermaat's avatar
Vermaat committed
760
761
                        if not j.current:
                            j.translate = False
762
763
764

                    # FIXME Check whether the variant hits a splice site.

765
                    if varType != "subst" :
766
                        if orientedStart != orientedStop :
767
768
769
770
771
772
773
774
775
776
777
778
                            if (start_fuzzy or stop_fuzzy) and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription("%s_%s%s%s" % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    j.CM.g2c(orientedStop, stop_fuzzy),
                                    varType, self.__maybeInvert(i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
                                self.checkIntron(i, j, orientedStop)
779
                        #if
780
                        else :
781
782
783
784
785
786
787
788
789
790
791
                            if start_fuzzy and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription("%s%s%s" % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    varType,
                                    self.__maybeInvert(i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
792
                        #else
Laros's avatar
Added:  
Laros committed
793
794
                    #if
                    else :
795
796
797
798
799
800
801
802
803
804
805
                        if start_fuzzy and not j.current:
                            # Don't generate descriptions on transcripts
                            # other than the current in the case of fuzzy
                            # positions.
                            j.cancelDescription()
                        else:
                            j.addToDescription("%s%c>%c" % (
                                j.CM.g2c(orientedStart, start_fuzzy),
                                self.__maybeInvert(i, arg1, arg1_reverse),
                                self.__maybeInvert(i, arg2)))
                            self.checkIntron(i, j, orientedStart)
806
                    #else
Laros's avatar
Added:  
Laros committed
807
808
809
                #if
            #for
        #for
810
    #name
811

812
    def checkIntron(self, gene, transcript, position):
813
814
        """
        Checks if a position is on or near a splice site
815

816
817
818
819
820
821
822
        @arg gene: Gene
        @type gene: object
        @arg transcript: transcript
        @type transcript: object
        @arg position: g. position
        @type position: integer
        """
823
        intronPos = abs(transcript.CM.g2x(position)[1])
824

825
        if intronPos :
826
827
828
            # It should be easy for SOAP clients to filter out all warnings
            # related to other transcripts, so we use two codes here.
            if transcript.current:
Vermaat's avatar
Vermaat committed
829
                warning = 'WSPLICE'
830
831
                str_transcript = 'transcript %s (selected)' % transcript.name
            else:
Vermaat's avatar
Vermaat committed
832
                warning = 'WSPLICE_OTHER'
833
834
                str_transcript = 'transcript %s' % transcript.name

835
            if intronPos <= config.get('spliceAlarm'):
836
837
838
839
840
841
842
                self.__output.addMessage(__file__, 2, warning,
                    "Mutation on splice site in gene %s %s." % (
                    gene.name, str_transcript))
            elif intronPos <= config.get('spliceWarn'):
                self.__output.addMessage(__file__, 2, warning,
                    "Mutation near splice site in gene %s %s." % (
                    gene.name, str_transcript))
843
    #checkIntron
Laros's avatar
Added:  
Laros committed
844
#GenRecord