diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py index 4e4e595e99ee47f9a621dd26ffc80d988c67cec0..4ecba590e35343324bc106a1f1635835a8740a21 100644 --- a/mutalyzer/mapping.py +++ b/mutalyzer/mapping.py @@ -19,6 +19,43 @@ from mutalyzer import Crossmap from mutalyzer.models import SoapMessage, Mapping, Transcript +def _construct_change(var, reverse=False): + """ + Construct mutation description. + + @arg var: RawVar object. + @type var: pyparsing.ParseResults + @var reverse: Variant is on the reverse strand. + @type reverse: bool + + @return: Description of mutation (without reference and positions). + @rtype: string + """ + if reverse: + # todo: if var.Arg1 is unicode, this crashes + try: + arg1 = str(int(var.Arg1)) + except ValueError: + arg1 = reverse_complement(var.Arg1 or '') + try: + arg2 = str(int(var.Arg2)) + except ValueError: + arg2 = reverse_complement(var.Arg2 or '') + else: + arg1 = var.Arg1 + arg2 = var.Arg2 + + if var.MutationType == 'subst': + change = '%s>%s' % (arg1, arg2) + elif var.MutationType == 'delins' and arg2: + change = '%s%s' % (var.MutationType, arg2) + else: + change = '%s%s' % (var.MutationType, arg1 or arg2 or '') + + return change +#_construct_change + + class Converter(object) : """ Convert between transcript and chromosomal locations. @@ -83,12 +120,6 @@ class Converter(object) : "Could not parse the given variant") return None #if - if parseTree.SingleAlleleVarSet : - #Only simple mutations - self.__output.addMessage(__file__, 4, "EPARSE", - "Can not process multiple mutation variant") - return None - #if if not parseTree.RefSeqAcc: #In case of LRG for example self.__output.addMessage(__file__, 4, "EONLYGB", "Currently we only support GenBank Records") @@ -287,35 +318,44 @@ class Converter(object) : if not Cross : return None - mutation = self.parseTree.RawVar + if self.parseTree.SingleAlleleVarSet: + mutations = [v.RawVar for v in self.parseTree.SingleAlleleVarSet] + else: + mutations = [self.parseTree.RawVar] - if not mutation.StartLoc : - return None + mappings = [] - # Get the coordinates of the start position - startmain, startoffset, start_g = \ - self._getcoords(Cross, mutation.StartLoc, - self.parseTree.RefType) + for mutation in mutations: - # If there is an end position, calculate the coordinates. - if mutation.EndLoc : - endmain, endoffset, end_g = \ - self._getcoords(Cross, mutation.EndLoc, - self.parseTree.RefType) - else : - end_g, endmain, endoffset = start_g, startmain, startoffset - - # Assign these values to the Mapping ClassSerializer - V = Mapping() - V.startmain = startmain - V.startoffset = startoffset - V.endmain = endmain - V.endoffset = endoffset - V.start_g = start_g - V.end_g = end_g - V.mutationType = mutation.MutationType - - return V + if not mutation.StartLoc : + return None + + # Get the coordinates of the start position + startmain, startoffset, start_g = \ + self._getcoords(Cross, mutation.StartLoc, + self.parseTree.RefType) + + # If there is an end position, calculate the coordinates. + if mutation.EndLoc : + endmain, endoffset, end_g = \ + self._getcoords(Cross, mutation.EndLoc, + self.parseTree.RefType) + else : + end_g, endmain, endoffset = start_g, startmain, startoffset + + # Assign these values to the Mapping ClassSerializer + V = Mapping() + V.startmain = startmain + V.startoffset = startoffset + V.endmain = endmain + V.endoffset = endoffset + V.start_g = start_g + V.end_g = end_g + V.mutationType = mutation.MutationType + + mappings.append(V) + + return mappings #_coreMapping def giveInfo(self, accNo) : @@ -375,14 +415,13 @@ class Converter(object) : @return: ClassSerializer object @rtype: object """ - variant = "%s:%s" % (accNo, mutation) if self._parseInput(variant) : acc = self.parseTree.RefSeqAcc version = self.parseTree.Version self._FieldsFromDb(acc, version) - mapping = self._coreMapping() + mappings = self._coreMapping() errors = [] for message in self.__output.getMessages(): @@ -391,14 +430,62 @@ class Converter(object) : soap_message.message = message.description errors.append(soap_message) - if mapping is None : # Something went wrong - mapping = Mapping() - mapping.errorcode = len(errors) - else : - mapping.errorcode = 0 + mapping = Mapping() mapping.messages = errors + if not mappings: # Something went wrong + mapping.errorcode = len(errors) + return mapping + + mapping.errorcode = 0 + + if len(mappings) == 1: + return mappings[0] + + mapping.mutationType = 'compound' + + # Now we have to do some tricks to combine the information from + # possibly multiple variants into one Mapping object. + # Todo: Maybe it is better to use self.dbFields['orientation'] here. + min_g = 9000000000 + max_g = 0 + forward = True + for m in mappings: + if m.start_g > m.end_g: + forward = False + if m.start_g < min_g: + min_g = m.start_g + min_main = m.startmain + min_offset = m.startoffset + if m.end_g < min_g: + min_g = m.end_g + min_main = m.endmain + min_offset = m.endoffset + if m.start_g > max_g: + max_g = m.start_g + max_main = m.startmain + max_offset = m.startoffset + if m.end_g > max_g: + max_g = m.end_g + max_main = m.endmain + max_offset = m.endoffset + + if forward: + mapping.startmain = min_main + mapping.startoffset = min_offset + mapping.endmain = max_main + mapping.endoffset = max_offset + mapping.start_g = min_g + mapping.end_g = max_g + else: + mapping.startmain = max_main + mapping.startoffset = max_offset + mapping.endmain = min_main + mapping.endoffset = min_offset + mapping.start_g = max_g + mapping.end_g = min_g + return mapping #main_Mapping @@ -412,40 +499,52 @@ class Converter(object) : @return: var_in_g ; The variant in HGVS I{g.} notation @rtype: string """ - - if self._parseInput(variant) : + if self._parseInput(variant): acc = self.parseTree.RefSeqAcc version = self.parseTree.Version self._FieldsFromDb(acc, version) - #if - M = self._coreMapping() - if M is None : + + mappings = self._coreMapping() + if not mappings: return None - # construct the variant description + if self.parseTree.SingleAlleleVarSet: + variants = [v.RawVar for v in self.parseTree.SingleAlleleVarSet] + else: + variants = [self.parseTree.RawVar] + chromAcc = self.__database.chromAcc(self.dbFields["chromosome"]) - f_change = self._constructChange(False) - r_change = self._constructChange(True) - if self.dbFields["orientation"] == "+" : - change = f_change - else : - change = r_change - if M.start_g != M.end_g: - if self.dbFields["orientation"] == '-': - last_g, first_g = M.start_g, M.end_g - else: - first_g, last_g = M.start_g, M.end_g - if last_g < first_g: - self.__output.addMessage(__file__, 3, 'ERANGE', 'End position ' - 'is smaller than the begin position.') - return None - var_in_g = "g.%s_%s%s" % (first_g, last_g, change) - #if - else : - var_in_g = "g.%s%s" % (M.start_g, change) + # Construct the variant descriptions + descriptions = [] + for variant, mapping in zip(variants, mappings): + f_change = _construct_change(variant) + r_change = _construct_change(variant, reverse=True) + if self.dbFields["orientation"] == "+" : + change = f_change + else : + change = r_change + + if mapping.start_g != mapping.end_g: + if self.dbFields["orientation"] == '-': + last_g, first_g = mapping.start_g, mapping.end_g + else: + first_g, last_g = mapping.start_g, mapping.end_g + if last_g < first_g: + self.__output.addMessage(__file__, 3, 'ERANGE', 'End position ' + 'is smaller than the begin position.') + return None + descriptions.append('%s_%s%s' % (first_g, last_g, change)) + #if + else : + descriptions.append('%s%s' % (mapping.start_g, change)) - return "%s:%s" % (chromAcc, var_in_g) + if len(descriptions) == 1: + description = descriptions[0] + else: + description = '[' + ';'.join(descriptions) + ']' + + return "%s:g.%s" % (chromAcc, description) #c2chrom def chromosomal_positions(self, positions, reference, version=None): @@ -551,25 +650,34 @@ class Converter(object) : acc) return None #if - f_change = self._constructChange(False) - r_change = self._constructChange(True) - #FIXME This should be a proper conversion. - loc = int(self.parseTree.RawVar.StartLoc.PtLoc.Main) - if self.parseTree.RawVar.EndLoc : - loc2 = int(self.parseTree.RawVar.EndLoc.PtLoc.Main) - else : - loc2 = loc + if self.parseTree.SingleAlleleVarSet: + variants = [v.RawVar for v in self.parseTree.SingleAlleleVarSet] + else: + variants = [self.parseTree.RawVar] + + min_loc = 9000000000 + max_loc = 0 + for variant in variants: + #FIXME This should be a proper conversion. + loc = int(variant.StartLoc.PtLoc.Main) + if variant.EndLoc : + loc2 = int(variant.EndLoc.PtLoc.Main) + else : + loc2 = loc - if loc2 < loc: - self.__output.addMessage(__file__, 3, 'ERANGE', 'End position is ' - 'smaller than the begin position.') - return None + if loc2 < loc: + self.__output.addMessage(__file__, 3, 'ERANGE', 'End position is ' + 'smaller than the begin position.') + return None + + min_loc = min(min_loc, loc) + max_loc = max(max_loc, loc2) if gene: transcripts = self.__database.get_TranscriptsByGeneName(gene) else: - transcripts = self.__database.get_Transcripts(chrom, loc-5000, loc2+5000, 1) + transcripts = self.__database.get_Transcripts(chrom, min_loc - 5000, max_loc + 5000, 1) HGVS_notatations = defaultdict(list) NM_list = [] @@ -579,22 +687,14 @@ class Converter(object) : if self.dbFields['chromosome'] != chrom: # Could be the case if we got transcripts by gene name continue - M = self._coreMapping() - if M is None : + mappings = self._coreMapping() + if not mappings: #balen continue # construct the variant description accNo = "%s.%s" % (self.dbFields["transcript"],self.dbFields["version"]) geneName = self.dbFields["gene"] or "" strand = self.dbFields["orientation"] == '+' - startp = self.crossmap.tuple2string((M.startmain, M.startoffset)) - endp = self.crossmap.tuple2string((M.endmain, M.endoffset)) - - if strand : - change = f_change - else : - change = r_change - startp, endp = endp, startp #Check if n or c type info = self.crossmap.info() @@ -603,60 +703,40 @@ class Converter(object) : else : mtype = 'c' - if M.start_g != M.end_g : - loca = "%s_%s" % (startp, endp) - else : - loca = "%s" % startp + mutations = [] + for variant, mapping in zip(variants, mappings): + f_change = _construct_change(variant) + r_change = _construct_change(variant, reverse=True) - variant = "%s:%c.%s%s" % (accNo, mtype, loca, change) - HGVS_notatations[geneName].append(variant) - NM_list.append(variant) - #for - if rt == "list" : - return NM_list - return HGVS_notatations - #chrom2c + startp = self.crossmap.tuple2string((mapping.startmain, mapping.startoffset)) + endp = self.crossmap.tuple2string((mapping.endmain, mapping.endoffset)) - def _constructChange(self, revc = False) : - """ - @todo document me + if strand : + change = f_change + else : + change = r_change + startp, endp = endp, startp - @arg revc: - @type revc: + if mapping.start_g != mapping.end_g : + loca = "%s_%s" % (startp, endp) + else : + loca = "%s" % startp - @return: - @rtype: string - """ + mutations.append('%s%s' % (loca, change)) - p = self.parseTree - if not p or p.SingleAlleleVarSet : - return None - var = p.RawVar - - if revc : - # todo: if var.Arg1 is unicode, this crashes - try: - arg1 = str(int(var.Arg1)) - except ValueError: - arg1 = reverse_complement(var.Arg1 or "") - try: - arg2 = str(int(var.Arg2)) - except ValueError: - arg2 = reverse_complement(var.Arg2 or "") - #if - else : - arg1 = var.Arg1 - arg2 = var.Arg2 - #else + if len(mutations) == 1: + mutation = mutations[0] + else: + mutation = '[' + ';'.join(mutations) + ']' - if var.MutationType == "subst" : - change = "%s>%s" % (arg1, arg2) - elif var.MutationType == 'delins' and arg2: - change = "%s%s" % (var.MutationType, arg2) - else : - change = "%s%s" % (var.MutationType, arg1 or arg2 or "") - return change - #_constructChange + description = "%s:%c.%s" % (accNo, mtype, mutation) + HGVS_notatations[geneName].append(description) + NM_list.append(description) + #for + if rt == "list" : + return NM_list + return HGVS_notatations + #chrom2c #Converter diff --git a/tests/test_mapping.py b/tests/test_mapping.py index 69b2941560b604716b618dbe6672424615c8c72d..2499d016e8b79535d0b180023d5394ec46505d40 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -35,6 +35,20 @@ class TestConverter(): assert_equal(genomic, 'NC_000011.9:g.111959695G>T') coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274G>T' in coding + # Fix for r536: disable the -u and +d convention. + #assert 'NR_028383.1:c.1-u2173C>A' in coding + assert 'NR_028383.1:c.-2173C>A' in coding + + def test_converter_compound(self): + """ + Test with compound variant. + """ + converter = self._converter('hg19') + genomic = converter.c2chrom('NM_003002.2:c.[274G>T;278A>G]') + assert_equal(genomic, 'NC_000011.9:g.[111959695G>T;111959699A>G]') + coding = converter.chrom2c(genomic, 'list') + assert 'NM_003002.2:c.[274G>T;278A>G]' in coding + assert 'NR_028383.1:c.[-2173C>A;-2177T>C]' in coding def test_hla_cluster(self): """ diff --git a/tests/test_services_soap.py b/tests/test_services_soap.py index 0a9096c551958496a90b89d6aece597aab5cc20f..8ac6c91f08208c46ff8ce5b7b2e06d43ce7246c6 100644 --- a/tests/test_services_soap.py +++ b/tests/test_services_soap.py @@ -229,6 +229,71 @@ class TestServicesSoap(): 'NR_034083'): assert t in names + def test_mappinginfo(self): + """ + Running mappingInfo should give a Mapping object. + """ + r = self.client.service.mappingInfo('3.0-beta-06', 'hg19', 'NM_001100.3', 'g.112037014G>T') + assert_equal(r.endoffset, 117529978) + assert_equal(r.start_g, 112037014) + assert_equal(r.startoffset, 117529978) + assert_equal(r.mutationType, "subst") + assert_equal(r.end_g, 112037014) + assert_equal(r.startmain, 1388) + assert_equal(r.endmain, 1388) + + def test_mappinginfo(self): + """ + Running mappingInfo should give a Mapping object. + """ + r = self.client.service.mappingInfo('3.0-beta-06', 'hg19', 'NM_001008541.1', 'g.112039014G>T') + assert_equal(r.endoffset, 0) + assert_equal(r.start_g, 112039014) + assert_equal(r.startoffset, 0) + assert_equal(r.mutationType, 'subst') + assert_equal(r.end_g, 112039014) + assert_equal(r.startmain, 175) + assert_equal(r.endmain, 175) + + def test_mappinginfo_compound(self): + """ + Running mappingInfo with compound variant should give a Mapping object. + """ + r = self.client.service.mappingInfo('3.0-beta-06', 'hg19', 'NM_001008541.1', 'g.[112039014G>T;112039018T>A]') + assert_equal(r.endoffset, 0) + assert_equal(r.start_g, 112039014) + assert_equal(r.startoffset, 0) + assert_equal(r.mutationType, 'compound') + assert_equal(r.end_g, 112039018) + assert_equal(r.startmain, 175) + assert_equal(r.endmain, 179) + + def test_mappinginfo_reverse(self): + """ + Running mappingInfo on a reverse transcript should give a Mapping object. + """ + r = self.client.service.mappingInfo('3.0-beta-06', 'hg19', 'NM_000035.3', 'g.104184170_104184179del') + assert_equal(r.endoffset, 0) + assert_equal(r.start_g, 104184170) + assert_equal(r.startoffset, 0) + assert_equal(r.mutationType, 'del') + assert_equal(r.end_g, 104184179) + assert_equal(r.startmain, 1016) + assert_equal(r.endmain, 1007) + + def test_mappinginfo_compound_reverse(self): + """ + Running mappingInfo with compound variant on a reverse transcript should give a Mapping object. + """ + r = self.client.service.mappingInfo('3.0-beta-06', 'hg19', 'NM_000035.3', 'g.[104184170_104184179del;104184182_104184183del]') + assert_equal(r.endoffset, 0) + assert_equal(r.start_g, 104184170) + assert_equal(r.startoffset, 0) + assert_equal(r.mutationType, 'compound') + assert_equal(r.end_g, 104184183) + assert_equal(r.startmain, 1016) + assert_equal(r.endmain, 1003) + def test_info(self): """ Running the info method should give us some version information.