GenRecord.py 30.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""
Module to convert a GenBank record to a nested dictionary consisting of
a list of genes, which itself consists of a list of loci. This structure
makes it possible to iterate over genes and transcripts without having to
search for them each time.

@requires: Crossmap
@requires: Bio
@requires: Db
"""
# Public classes:
#     - PList     ; Store a general location and a list of splice sites.
#     - Locus     ; Store data about the mRNA and CDS splice sites.
#     - Gene      ; Store a list of Locus objects and the orientation.
#     - Record    ; Store a geneList and other additional information.
#     - GenRecord ; Convert a GenBank record to a nested dictionary.


19
import Bio
20

Vermaat's avatar
Vermaat committed
21
from mutalyzer import util
22
from mutalyzer import config
23
24
from mutalyzer import Crossmap
from mutalyzer import Db
25
26
27


class PList(object) :
Laros's avatar
Added:  
Laros committed
28
    """
29
30
    A position list object, to store a general location and a list of
    specific splice sites (if available).
Laros's avatar
Added:  
Laros committed
31

32
33
34
35
    These objects are used to describe either a list of mRNA splice sites
    or a list of CDS splice sites. These splice sites are stored in the
    list element. The location element is a fallback in case the splice
    sites are not available.
Laros's avatar
Added:  
Laros committed
36

37
38
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
39

40
41
42
    Public variables:
        - location ; A tuple of integers between which the object resides.
        - list     ; A list (with an even amount of entries) of splice sites.
Laros's avatar
Added:  
Laros committed
43
44
45
46
    """

    def __init__(self) :
        """
47
        Initialise the class.
Laros's avatar
Added:  
Laros committed
48

49
50
51
52
53
        Public variables (altered):
            - location     ; A tuple of integers between which the object
                             resides.
            - POSITIONlist ; A list (with an even amount of entries) of splice
                             sites.
Laros's avatar
Added:  
Laros committed
54
55
56
        """

        self.location = []
57
        self.positionList = []
Laros's avatar
Added:  
Laros committed
58
    #__init__
59
#PList
Laros's avatar
Added:  
Laros committed
60
61
62

class Locus(object) :
    """
63
    A Locus object, to store data about the mRNA and CDS splice sites.
Laros's avatar
Added:  
Laros committed
64

65
66
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
67

68
69
70
71
    Public variables:
        - mRNA ; A position list object.
        - CDS  ; A position list object.
        - exon ; A position list object.
Laros's avatar
Added:  
Laros committed
72
73
    """

74
    def __init__(self, name) :
Laros's avatar
Added:  
Laros committed
75
        """
76
        Initialise the class.
Laros's avatar
Added:  
Laros committed
77

78
79
80
81
82
83
84
        Public variables (altered):
            - mRNA     ; A position list object.
            - CDS      ; A position list object.
            - location ;
            - exon     ; A position list object.
            - txTable  ; The translation table.
            - CM       ; A Crossmap object.
85

86
87
        @arg name: identifier of the locus
        @type name: string
Laros's avatar
Added:  
Laros committed
88
89
        """

90
        self.name = name
Vermaat's avatar
Vermaat committed
91
        self.current = False
Laros's avatar
Added:  
Laros committed
92
93
        self.mRNA = None
        self.CDS = None
Laros's avatar
Laros committed
94
        self.location = []
95
        self.exon = None
Laros's avatar
Laros committed
96
        self.txTable = 1
97
        self.transl_except=[]
Laros's avatar
Added:    
Laros committed
98
        self.CM = None
99
100
        self.transcriptID = None
        self.proteinID = None
Gerben Stouten's avatar
Gerben Stouten committed
101
        self.genomicID = None
102
103
        self.molType = 'c'
        self.description = ""
104
        self.proteinDescription = "?"
105
        self.proteinRange = []
106
        self.locusTag = None
107
        self.link = None
108
109
        self.transcribe = False
        self.translate = False
110
111
112
        self.linkMethod = None
        self.transcriptProduct = None
        self.proteinProduct = None
Laros's avatar
Added:  
Laros committed
113
    #__init__
114

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
    def cancelDescription(self):
        """
        Set the description on this locus to 'unknown'.

        This can be used if at some point we give up creating a sensible
        description on this locus. It also makes sure future additions to
        the description are ignored and it keeps the 'unknown' value.

        @note: This depends on the check for the unknown value in the
            addToDescription method. This is a not a beatiful solution.
        """
        self.description = '?'
    #cancelDescription

    def addToDescription(self, rawVariant):
130
        """
131
        Expands the DNA description with a new raw variant.
132

133
134
        @arg rawVariant: description of a single mutation
        @type rawVariant: string
135
        """
Vermaat's avatar
Vermaat committed
136
        if self.description:
137
138
139
140
            # Don't change anything if we already have an unknown value.
            if self.description != '?':
                self.description = "%s;%s" % (self.description, rawVariant)
        else:
141
142
143
            self.description = rawVariant
    #addToDescription
#Locus
Laros's avatar
Added:  
Laros committed
144

145

Laros's avatar
Added:  
Laros committed
146
147
class Gene(object) :
    """
148
149
    A Gene object, to store a list of Locus objects and the orientation of
    the gene.
150

151
152
    Special methods:
        - __init__() ; Initialise the class.
Laros's avatar
Added:  
Laros committed
153

154
155
    Public variables:
        - orientation; The orientation of the gene: 1 = forward, -1 = reverse.
156
        - transcriptslist; A list of Locus objects.
Laros's avatar
Added:  
Laros committed
157
158
    """

159
    def __init__(self, name) :
Laros's avatar
Added:  
Laros committed
160
        """
161
        Initialise the class.
Laros's avatar
Added:  
Laros committed
162

163
164
165
166
167
168
169
170
        Public variables (altered):
            - name
            - orientation    ; The orientation of the gene.
            - transcriptList ; A list of transcripts
            - location ;
            - longName ;
        Private variables (altered):
            - __locusTag ;
171

172
173
        @arg name: gene name
        @type name: string
Laros's avatar
Added:  
Laros committed
174
175
        """

176
177
178
        self.name = name
        self.orientation = 1
        self.transcriptList = []
179
        self.location = []
180
        self.longName = ""
181
        self.__locusTag = "000"
Laros's avatar
Added:  
Laros committed
182
183
    #__init__

184
185
    def newLocusTag(self) :
        """
186
        Generates a new Locus tag.
187

188
189
        @return: Locus tag
        @rtype: integer (3 digits, if < 100 preceeded with 0's)
190
191
192
193
194
195
196
        """

        self.__locusTag = "%03i" % (int(self.__locusTag) + 1)

        return self.__locusTag
    #newLocusTag

197
198
    def findLocus(self, name) :
        """
199
        Find a transcript, given its name.
200

201
202
        @arg name: transcript variant number
        @type name: string
203

204
205
        @return: transcript
        @rtype: object
206
207
208
        """

        for i in self.transcriptList :
209
            if i.name == name or i.name == str("%03i" % int(name)):
210
211
212
                return i
        return None
    #findLocus
213
214
215

    def listLoci(self) :
        """
216
        Provides a list of transcript variant numbers
217

218
219
        @return: list of transcript variant numbers
        @rtype: list
220
221
222
223
224
225
226
        """

        ret = []
        for i in self.transcriptList :
            ret.append(i.name)
        return ret
    #listLoci
227
228
229

    def findLink(self, protAcc) :
        """
230
        Look in the list of transcripts for a given protein accession number.
231

232
233
        @arg protAcc: protein accession number
        @type protAcc: string
234

235
236
        @return: transcript
        @rtype: object
237
238
239
240
241
242
        """

        for i in self.transcriptList :
            if i.link == protAcc :
                return i
        return None
243
    #findLink
244
245
246
#Gene

class Record(object) :
247
    """
248
249
250
251
252
253
254
255
256
257
258
259
260
261
    A Record object, to store a geneList and other additional
    information.

    Special methods:
        - __init__() ; Initialise the class.

    Public variables:
        - geneList  ; List of Gene objects.
        - mol_type  ; Variable to indicate the sequence type (DNA, RNA, ...)
        - organelle ; Variable to indicate whether the sequence is from the
                      nucleus or from an organelle (if so, also from which
                      one).
        - source    ; A fake gene that can be used when no gene information
                      is present.
262
    """
Laros's avatar
Laros committed
263
264

    def __init__(self) :
265
        """
266
        Initialise the class.
267
268


269
270
271
272
273
274
275
276
277
278
279
280
        Public variables (altered):
            - geneList  ; List of Gene objects.
            - molType   ; Variable to indicate the sequence type (DNA, RNA,
                          ...)
            - seq       ; The reference sequence
            - mapping   ; The mapping of the reference sequence to the genome
                          include a list of differences between the sequences
            - organelle ; Variable to indicate whether the sequence is from
                          the nucleus or from an organelle (if so, also from
                          which one).
            - source    ; A fake gene that can be used when no gene
                          information is present.
281
        """
Laros's avatar
Laros committed
282

283
        self.geneList = []
284
        self.molType = 'g'
285
286
        self.seq = ""
        self.mapping = []
Laros's avatar
Laros committed
287
        self.organelle = None
288
        self.source = Gene(None)
289
        self.description = ""
290
        self._sourcetype = None           #LRG or GB
291
        self.version = None
292
293
294
295
        self.chromOffset = 0
        self.chromDescription = ""
        self.orientation = 1
        self.recordId = None
Laros's avatar
Laros committed
296
    #__init__
297
298
299

    def findGene(self, name) :
        """
300
        Returns a Gene object, given its name.
301

302
303
        @arg name: Gene name
        @type name: string
304

305
306
        @return: Gene object
        @rtype: object
307
308
309
310
311
312
313
        """

        for i in self.geneList :
            if i.name == name :
                return i
        return None
    #findGene
314

315
316
    def listGenes(self) :
        """
317
        List the names of all genes found in this record.
318

319
320
        @return: Genes list
        @rtype: list
321

322
323
324
325
326
327
328
329
        """

        ret = []
        for i in self.geneList :
            ret.append(i.name)
        return ret
    #listGenes

330
331
    def addToDescription(self, rawVariant) :
        """
332
        Expands the DNA description with a new raw variant.
333

334
335
        @arg rawVariant: description of a single mutation
        @type rawVariant: string
336
337
338
339
340
341
342
        """

        if self.description :
            self.description = "%s;%s" % (self.description, rawVariant)
        else :
            self.description = rawVariant
    #addToDescription
343
344
345

    def toChromPos(self, i) :
        """
346
        Converts a g. position (relative to the start of the record) to a
347
348
        chromosomal g. position

349
350
        @arg i: g. position (relative to the start of the record)
        @type i: integer
351

352
353
        @return: chromosomal g. position
        @rtype: integer
354
        """
355
356
        if not self.chromOffset:
            return None
357
358
359
360
361
362
363
364

        if self.orientation == 1 :
            return self.chromOffset + i - 1
        return self.chromOffset - i + 1
    #toChromPos

    def addToChromDescription(self, rawVariant) :
        """
365
        @todo document me
366
367
368
369
370
        """

        if not self.chromOffset :
            return
        if self.chromDescription :
371
            self.chromDescription = "%s;%s" % (self.chromDescription,
372
373
374
375
                rawVariant)
        else :
            self.chromDescription = rawVariant
    #addToChromDescription
376
#Record
Laros's avatar
Laros committed
377

Laros's avatar
Added:  
Laros committed
378
379
class GenRecord() :
    """
380
    Convert a GenBank record to a nested dictionary.
Laros's avatar
Added:  
Laros committed
381

382
383
    Public methods:
        - checkRecord()   ;   Check and repair self.record.
Laros's avatar
Added:  
Laros committed
384
385
    """

386
    def __init__(self, output) :
387
        """
388
        Initialise the class.
389

390
391
        Public variable:
            - record    ; A record object
392

393
394
        @arg output: an output object
        @type output: object
395
396
397
398
399
        """
        self.__output = output
        self.record = None
    #__init__

400
401
    def __checkExonList(self, exonList, CDSpos) :
        """
402
        @todo document me
403

404
405
406
407
        @arg exonList: list of splice sites
        @type exonList: list (object)
        @arg CDSpos: location of the CDS
        @type CDSpos: object
408
409

        @return:
410
        @rtype: boolean
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
        """

        if not exonList :
            return False
        if not CDSpos :
            return True

        e = exonList.positionList
        c = CDSpos.location

        seen = 0
        for i in range(0, len(e), 2) :
            if e[i] <= c[0] and e[i + 1] >= c[0] :
                seen += 1
            if e[i] <= c[1] and e[i + 1] >= c[1] :
                seen += 1
        #for

        if seen == 2 :
            return True
        return False
    #__checkExonList
433

434
435
    def __constructCDS(self, mRNA, CDSpos) :
        """
436
        Construct a list of coordinates that contains CDS start and stop and
437
        the internal splice sites.
438

439
440
441
442
443
444
445
        @arg mRNA: mRNA positions/coordinates list
        @type mRNA: list (integer)
        @arg CDSpos: coding DNA positions/coordinates
        @type CDSpos: list (integer)

        @return: CDS positions plus internal splice sites
        @rtype: list (integer)
446
        """
447

448
449
        i = 1
        ret = [CDSpos[0]]
450

451
452
        while CDSpos[0] > mRNA[i] :
            i += 2
453

454
455
456
        j = i
        while CDSpos[1] > mRNA[j] :
            j += 2
457

458
459
        ret.extend(mRNA[i:j])
        ret.append(CDSpos[1])
460

461
462
463
        return ret
    #__constructCDS

Vermaat's avatar
Vermaat committed
464
    def __maybeInvert(self, gene, string, string_reverse=None) :
465
        """
466
467
        Return the reverse-complement of a DNA sequence if the gene is in
        the reverse orientation.
468
469

        @arg gene: Gene
470
471
472
        @type gene: object
        @arg string: DNA sequence
        @type string: string
Vermaat's avatar
Vermaat committed
473
474
        @kwarg string_reverse: DNA sequence to use (if not None) for the
            reverse complement.
475

476
477
478
        @return: reverse-complement (if applicable), otherwise return the
            original.
        @rtype: string
479
        """
Vermaat's avatar
Vermaat committed
480
481
482
        if gene.orientation == -1:
            if string_reverse:
                string = string_reverse
483
484
485
486
            return Bio.Seq.reverse_complement(string)
        return string
    #__maybeInvert

487
    def checkRecord(self) :
488
        """
489
490
        Check if the record in self.record is compatible with mutalyzer.
        Update the mRNA PList with the exon and CDS data.
491

492
493
        @todo: This function should really check the record for minimal
        requirements
494
495
        """

496
        #TODO:  This function should really check
497
        #       the record for minimal requirements.
498
        for i in self.record.geneList :
499
500
501
502
503
504
505
506
507
508
509
510
511
            """
            if len(i.transcriptList) == 2 :
                if i.transcriptList[0].CDS and not i.transcriptList[1].CDS and \
                   i.transcriptList[1].mRNA and not i.transcriptList[0].mRNA :
                    i.transcriptList[0].mRNA = i.transcriptList[1].mRNA
                if i.transcriptList[1].CDS and not i.transcriptList[0].CDS and \
                   i.transcriptList[0].mRNA and not i.transcriptList[1].mRNA :
                    i.transcriptList[0].CDS = i.transcriptList[1].CDS
                i.transcriptList = [i.transcriptList[0]]
                i.transcriptList[0].transcribe = True
                i.transcriptList[0].translate = True
            #if
            """
512
513
            for j in i.transcriptList :
                if not j.mRNA :
514
                    usableExonList = self.__checkExonList(j.exon, j.CDS)
515
516
517
518
519
520
521
522
523
                    if self.record.molType == 'n' and j.exon:
                        if not all(p1 + 1 == p2 for p1, p2 in
                                   util.grouper(j.exon.positionList[1:-1])):
                            code = 'WEXON_ANNOTATION' if j.current else 'WEXON_ANNOTATION_OTHER'
                            self.__output.addMessage(__file__, 2, code,
                                "Exons for gene %s, transcript variant %s were "
                                "found not to be adjacent. This signifies a "
                                "possible problem in the annotation of the "
                                "reference sequence." % (i.name, j.name))
524
                    if not j.exon or not usableExonList :
525
                        if self.record.molType == 'g' :
526
527
                            code = 'WNOMRNA' if j.current else 'WNOMRNA_OTHER'
                            self.__output.addMessage(__file__, 2, code,
528
529
                                "No mRNA field found for gene %s, transcript " \
                                "variant %s in record, constructing " \
530
531
532
                                "it from CDS. Please note that descriptions "\
                                "exceeding CDS boundaries are invalid." % (
                                i.name, j.name))
533
534
                        if j.exon and j.exon.positionList and \
                           not usableExonList :
535
536
                            code = 'WNOMRNA' if j.current else 'WNOMRNA_OTHER'
                            self.__output.addMessage(__file__, 2, code,
537
538
539
540
541
                                "Exons were found for gene %s, transcript " \
                                "variant %s but were not usable. " \
                                "Please note that descriptions "\
                                "exceeding CDS boundaries are invalid." % (
                                i.name, j.name))
542
543
                        if j.CDS :
                            if not j.CDS.positionList :
544
545
546
547
548
                                #self.__output.addMessage(__file__, 2,
                                #    "WNOCDSLIST", "No CDS list found for " \
                                #    "gene %s, transcript variant %s in " \
                                #    "record, constructing it from " \
                                #    "CDS location." % (i.name, j.name))
549
550
551
552
553
                                j.mRNA = j.CDS
                                j.mRNA.positionList = j.CDS.location
                            #if
                            else :
                                j.mRNA = j.CDS
Laros's avatar
Laros committed
554
                            j.linkMethod = "construction"
555
556
                            j.transcribe = True
                            j.translate = True
557
558
559
560
                        #if
                        else :
                            self.__output.addMessage(__file__, 2, "WNOCDS",
                                "No CDS found for gene %s, transcript " \
561
                                "variant %s in record, " \
562
                                "constructing it from gene location." % (
563
                                i.name, j.name))
564
565
566
567
568
                            j.CDS = None #PList()
                            #j.CDS.location = i.location
                            j.mRNA = PList()
                            j.mRNA.location = i.location
                            #j.mRNA.positionList = i.location
569
570
                            j.molType = 'n'
                        #else
Laros's avatar
Added:  
Laros committed
571
572
                    #if
                    else :
573
574
575
576
577
                        #self.__output.addMessage(__file__, 2, "WNOMRNA",
                        #    "No mRNA field found for gene %s, transcript " \
                        #    "variant %s in record, constructing " \
                        #    "it from gathered exon information." % (
                        #    i.name, j.name))
578
579
580
                        j.mRNA = j.exon
                    #else
                #if
581
582
583
                #else :
                #    j.transcribe = True

584
585
                if not j.mRNA.positionList :
                    j.mRNA.positionList = j.mRNA.location
586
                if j.mRNA.positionList and j.CDS and j.CDS.positionList != None :
587
                    if not j.CDS.positionList :
588
589
590
                        #self.__output.addMessage(__file__, 2, "WNOCDS",
                        #    "No CDS list found for gene %s, transcript " \
                        #    "variant %s in record, constructing " \
591
                        #    "it from mRNA list and CDS location." % (i.name,
592
                        #    j.name))
593
594
595
596
597
598
                        if j.mRNA.positionList :
                            j.CDS.positionList = self.__constructCDS(
                                j.mRNA.positionList, j.CDS.location)
                        else :
                            j.CDS.positionList = self.__constructCDS(
                                j.mRNA.location, j.CDS.location)
599
600
                        j.transcribe = True
                        j.translate = True
601
                    #if
602
                    j.CM = Crossmap.Crossmap(j.mRNA.positionList,
603
                                             j.CDS.location, i.orientation)
Laros's avatar
Added:  
Laros committed
604
                #if
605
606
607
                else :
                    j.molType = 'n'
                    if j.mRNA.positionList :
608
                        j.CM = Crossmap.Crossmap(j.mRNA.positionList,
609
                                                 [], i.orientation)
Laros's avatar
Laros committed
610
                        j.transcribe = True
611
612
                    else :
                        j.description = '?'
613
                #else
614
615
            #for
        #for
616
    #checkRecord
617

618
619
620
621
622
623
624
625
626
627
628
629
630
631
    def current_transcript(self):
        """
        Return the current transcript.

        @return: Current transcript if there is one, None otherwise.
        @rtype: GenRecord.Locus
        """
        for i in self.record.geneList:
            for j in i.transcriptList:
                if j.current:
                    return j
        return None
    #current_transcript

632
633
    def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None,
             start_fuzzy=False, stop_fuzzy=False):
634
        """
635
        Generate variant descriptions for all genes, transcripts, etc.
Vermaat's avatar
Vermaat committed
636

637
638
639
640
641
642
643
644
645
646
647
648
        @arg start_g: start position
        @type start_g: integer
        @arg stop_g: stop position
        @type stop_g: integer
        @arg varType: variant type
        @type varType: string
        @arg arg1: argument 1 of a raw variant
        @type arg1: string
        @arg arg2: argument 2 of a raw variant
        @type arg2: string
        @arg roll: ???
        @type roll: tuple (integer, integer)
Vermaat's avatar
Vermaat committed
649
650
        @kwarg arg1_reverse: argument 1 to be used on reverse strand
        @type arg1_reverse: string
651
652
653
654
        @kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
        @type start_fuzzy: bool
        @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
        @type stop_fuzzy: bool
655
        """
656
657
658
659
        forwardStart = start_g
        forwardStop = stop_g
        reverseStart = stop_g
        reverseStop = start_g
Vermaat's avatar
Vermaat committed
660
661
662
663
664
665
666
667
668
669
670
671
672

        if self.record.orientation == 1:
            chromStart = self.record.toChromPos(start_g)
            chromStop = self.record.toChromPos(stop_g)
            chromArg1 = arg1
            chromArg2 = arg2
        else:
            chromStart = self.record.toChromPos(stop_g)
            chromStop = self.record.toChromPos(start_g)
            chromArg1 = Bio.Seq.reverse_complement(arg1)
            chromArg2 = Bio.Seq.reverse_complement(arg2)
            # Todo: Should we use arg1_reverse here?

673
674
675
676
677
        if roll :
            forwardStart += roll[1]
            forwardStop += roll[1]
            reverseStart -= roll[0]
            reverseStop -= roll[0]
Vermaat's avatar
Vermaat committed
678
679
680
681
682
683
684
            if chromStart is not None:
                if self.record.orientation == 1:
                    chromStart += roll[1]
                    chromStop += roll[1]
                else:
                    chromStart += roll[0]
                    chromStop += roll[0]
685
686
687
688
        #if

        if varType != "subst" :
            if forwardStart != forwardStop :
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
                # Todo: Fuzzy offsets to genomic positions (see bug #38).
                #
                # The genomic positioning is problematic. We would like to
                # have it in brackets (as fuzzy positions), like the above
                # g.(34299_23232)del example.
                #
                # Now consider a variant c.a-?_b+18del where only the offset
                # before the exon is unknown but the offset after the exon is
                # exact. Now a genomic description like g.(34299)_23232del
                # comes to mind, however, this notation is not allowed by the
                # HGVS grammar.
                #
                # I think all we can do is to treat both positions as fuzzy in
                # the genomic description, even if only one of them really is.
                #
                # Peter thinks the HGVS grammar should at some point be
                # updated to allow the brackets around individual locations.
                if start_fuzzy or stop_fuzzy:
                    self.record.addToDescription("(%s_%s)%s%s" % (
                        forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("(%s_%s)%s%s" % (
Vermaat's avatar
Vermaat committed
710
                        chromStart, chromStop, varType, chromArg1))
711
712
713
714
                else:
                    self.record.addToDescription("%s_%s%s%s" % (
                        forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("%s_%s%s%s" % (
Vermaat's avatar
Vermaat committed
715
                        chromStart, chromStop, varType, chromArg1))
716
            #if
717
            else :
718
719
720
721
722
723
                if start_fuzzy or stop_fuzzy:
                    # Todo: Current HGVS does not allow for () around single
                    # positions, only around ranges (see above and #38).
                    self.record.addToDescription("(%s)%s%s" % (
                        forwardStart, varType, arg1))
                    self.record.addToChromDescription("(%s)%s%s" % (
Vermaat's avatar
Vermaat committed
724
                        chromStart, varType, chromArg1))
725
726
727
728
                else:
                    self.record.addToDescription("%s%s%s" % (
                        forwardStart, varType, arg1))
                    self.record.addToChromDescription("%s%s%s" % (
Vermaat's avatar
Vermaat committed
729
                        chromStart, varType, chromArg1))
730
            #else
731
732
        #if
        else :
733
734
735
736
737
738
            if start_fuzzy or stop_fuzzy:
                # Todo: Current HGVS does not allow for () around single
                # positions, only around ranges (see above and #38).
                self.record.addToDescription("(%s)%c>%c" % (
                    forwardStart, arg1, arg2))
                self.record.addToChromDescription("(%s)%c>%c" % (
Vermaat's avatar
Vermaat committed
739
                    chromStart, chromArg1, chromArg2))
740
741
742
743
            else:
                self.record.addToDescription("%s%c>%c" % (
                    forwardStart, arg1, arg2))
                self.record.addToChromDescription("%s%c>%c" % (
Vermaat's avatar
Vermaat committed
744
                    chromStart, chromArg1, chromArg2))
745

746
747
748
        for i in self.record.geneList :
            for j in i.transcriptList :
                if j.CM :
749
750
751
752
753
754
755
                    orientedStart = forwardStart
                    orientedStop = forwardStop
                    if i.orientation == -1 :
                        orientedStart = reverseStart
                        orientedStop = reverseStop
                    #if

Vermaat's avatar
Vermaat committed
756
757
758
759
760
761
762
763
764
                    # Turn of translation to protein if we hit splice sites.
                    # For the current transcript, this is handled with more
                    # care in variantchecker.py.
                    if not j.current and \
                           util.over_splice_site(orientedStart, orientedStop,
                                                 j.CM.RNA):
                        j.translate = False

                    # And check whether the variant hits CDS start.
765
766
                    if j.molType == 'c' and forwardStop >= j.CM.x2g(1, 0) \
                       and forwardStart <= j.CM.x2g(3, 0) :
767
                        self.__output.addMessage(__file__, 2, "WSTART",
768
769
                            "Mutation in start codon of gene %s transcript " \
                            "%s." % (i.name, j.name))
Vermaat's avatar
Vermaat committed
770
771
                        if not j.current:
                            j.translate = False
772
773
774

                    # FIXME Check whether the variant hits a splice site.

775
                    if varType != "subst" :
776
                        if orientedStart != orientedStop :
777
778
779
780
781
782
783
784
785
786
787
788
                            if (start_fuzzy or stop_fuzzy) and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription("%s_%s%s%s" % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    j.CM.g2c(orientedStop, stop_fuzzy),
                                    varType, self.__maybeInvert(i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
                                self.checkIntron(i, j, orientedStop)
789
                        #if
790
                        else :
791
792
793
794
795
796
797
798
799
800
801
                            if start_fuzzy and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription("%s%s%s" % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    varType,
                                    self.__maybeInvert(i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
802
                        #else
Laros's avatar
Added:  
Laros committed
803
804
                    #if
                    else :
805
806
807
808
809
810
811
812
813
814
815
                        if start_fuzzy and not j.current:
                            # Don't generate descriptions on transcripts
                            # other than the current in the case of fuzzy
                            # positions.
                            j.cancelDescription()
                        else:
                            j.addToDescription("%s%c>%c" % (
                                j.CM.g2c(orientedStart, start_fuzzy),
                                self.__maybeInvert(i, arg1, arg1_reverse),
                                self.__maybeInvert(i, arg2)))
                            self.checkIntron(i, j, orientedStart)
816
                    #else
Laros's avatar
Added:  
Laros committed
817
818
819
                #if
            #for
        #for
820
    #name
821

822
    def checkIntron(self, gene, transcript, position):
823
824
        """
        Checks if a position is on or near a splice site
825

826
827
828
829
830
831
832
        @arg gene: Gene
        @type gene: object
        @arg transcript: transcript
        @type transcript: object
        @arg position: g. position
        @type position: integer
        """
833
        intronPos = abs(transcript.CM.g2x(position)[1])
834

835
        if intronPos :
836
837
838
            # It should be easy for SOAP clients to filter out all warnings
            # related to other transcripts, so we use two codes here.
            if transcript.current:
Vermaat's avatar
Vermaat committed
839
                warning = 'WSPLICE'
840
841
                str_transcript = 'transcript %s (selected)' % transcript.name
            else:
Vermaat's avatar
Vermaat committed
842
                warning = 'WSPLICE_OTHER'
843
844
                str_transcript = 'transcript %s' % transcript.name

845
            if intronPos <= config.get('spliceAlarm'):
846
847
848
849
850
851
852
                self.__output.addMessage(__file__, 2, warning,
                    "Mutation on splice site in gene %s %s." % (
                    gene.name, str_transcript))
            elif intronPos <= config.get('spliceWarn'):
                self.__output.addMessage(__file__, 2, warning,
                    "Mutation near splice site in gene %s %s." % (
                    gene.name, str_transcript))
853
    #checkIntron
Laros's avatar
Added:  
Laros committed
854
#GenRecord