diff --git a/mutalyzer/entrypoints/mutalyzer.py b/mutalyzer/entrypoints/mutalyzer.py index c04a112a2ce1c8c2df0988f4ecd1a0e406d77d0e..73d01804173a088c492768852be55b420f11d033 100644 --- a/mutalyzer/entrypoints/mutalyzer.py +++ b/mutalyzer/entrypoints/mutalyzer.py @@ -115,8 +115,8 @@ def check_name(description): described_allele = extractor.describe_dna(reference_sequence, sample_sequence) #described_protein_allele = describe.describe( - # O.getIndexedOutput("oldprotein", 0), - # O.getIndexedOutput("newprotein", 0, default=""), + # O.getIndexedOutput("oldProtein", 0), + # O.getIndexedOutput("newProtein", 0, default=""), # DNA=False) described_protein_allele = "" diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index d0315fd138ab47ae6de018e663675ef3ec41b66f..fde784055806dbce4c3f26016e25f0306c46a4ef 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -840,8 +840,8 @@ class MutalyzerService(ServiceBase): result.origCDS = O.getIndexedOutput("origCDS", 0) result.newCDS = O.getIndexedOutput("newCDS", 0) - result.origProtein = O.getIndexedOutput("oldprotein", 0) - result.newProtein = O.getIndexedOutput("newprotein", 0) + result.origProtein = O.getIndexedOutput("oldProtein", 0) + result.newProtein = O.getIndexedOutput("newProtein", 0) result.altProtein = O.getIndexedOutput("altProtein", 0) result.chromDescription = \ diff --git a/mutalyzer/variantchecker.py b/mutalyzer/variantchecker.py index a3bc7a8d36e6fe59515653cf5dfb8dce6d592ffa..9da54db856fc344472f1a2cfff9eb6ac7229555f 100644 --- a/mutalyzer/variantchecker.py +++ b/mutalyzer/variantchecker.py @@ -1332,8 +1332,8 @@ def _add_transcript_info(mutator, transcript, output): # Data added to the output object: # - origCDS: Original CDS. # - newCDS: Variant CDS. - # - oldprotein: Original protein sequence, ending with '*'. - # - newprotein: + # - oldProtein: Original protein sequence, ending with '*'. + # - newProtein: # - If variant CDS could not be translated, this is '?'. # - If start codon was affected, this is '?'. # - If variant protein equals original protein, this is unset. @@ -1404,26 +1404,17 @@ def _add_transcript_info(mutator, transcript, output): output.addOutput('origCDS', unicode(cds_original)) output.addOutput("newCDS", unicode(cds_variant[:len(protein_variant) * 3])) - output.addOutput('oldprotein', unicode(protein_original)) + # Under which name to store the variant protein sequence. Can be: + # - 'new': Normal case. + # - 'alt': New start codon was created. + # - None: Don't show variant protein sequence. + protein_variant_output = None - # Todo: Don't generate the fancy HTML protein views here, do this in - # website.py. - # I think it would also be nice to include the mutated list of splice - # sites. - - if not protein_variant or unicode(cds_variant[:3]) != unicode(cds_original[:3]): + if (not protein_variant or + unicode(cds_variant[:3]) != unicode(cds_original[:3])): # Could not translate variant CDS or variant hits start codon. In # that case we predict p.? and see if a non-reference start codon # was created. - util.print_protein_html(unicode(protein_original), 0, 0, - output, 'oldProteinFancy') - util.print_protein_html(unicode(protein_original), 0, 0, - output, 'oldProteinFancyText', text=True) - output.addOutput('newprotein', '?') - util.print_protein_html('?', 0, 0, output, 'newProteinFancy') - util.print_protein_html('?', 0, 0, output, - 'newProteinFancyText', text=True) - if protein_variant: # Variant CDS could be translated, but start codon was # affected. @@ -1433,37 +1424,63 @@ def _add_transcript_info(mutator, transcript, output): if unicode(cds_variant[0:3]) in start_codons: # A non-reference start codon was created. output.addOutput('altStart', unicode(cds_variant[0:3])) - - if unicode(protein_original) != unicode(protein_variant): - # The resulting protein is actually different, so - # visualise the difference. - # Todo: Protein differences are not color-coded, - # use something like below in protein_description(). - output.addOutput('altProtein', unicode(protein_variant)) - util.print_protein_html(unicode(protein_variant), 0, - 0, output, 'altProteinFancy') - util.print_protein_html(unicode(protein_variant), 0, - 0, output, 'altProteinFancyText', text=True) + protein_variant_output = 'alt' else: # Variant CDS was translated and start codon is unchanged. + protein_variant_output = 'new' + + # Todo: Don't generate the fancy HTML protein views here, do this in + # website.py. + # I think it would also be nice to include the mutated list of splice + # sites. + + if protein_variant_output: + # Show protein sequence. We start by calculating offsets for diff + # coloring. cds_length = util.cds_length( mutator.shift_sites(transcript.CDS.positionList)) - descr, first, last_original, last_variant = \ - util.protein_description(cds_length, - unicode(protein_original), - unicode(protein_variant)) - - util.print_protein_html(unicode(protein_original), first, - last_original, output, 'oldProteinFancy') - util.print_protein_html(unicode(protein_original), first, - last_original, output, 'oldProteinFancyText', text=True) + descr, first, last_original, last_variant = util.protein_description( + cds_length, unicode(protein_original), unicode(protein_variant)) + + # Show original protein sequence. + output.addOutput('oldProtein', unicode(protein_original)) + util.print_protein_html( + unicode(protein_original), first, last_original, output, + 'oldProteinFancy') + util.print_protein_html( + unicode(protein_original), first, last_original, output, + 'oldProteinFancyText', text=True) + if unicode(protein_original) != unicode(protein_variant): - output.addOutput('newprotein', unicode(protein_variant)) - util.print_protein_html(unicode(protein_variant), first, - last_variant, output, 'newProteinFancy') - util.print_protein_html(unicode(protein_variant), first, - last_variant, output, 'newProteinFancyText', text=True) + # The resulting protein is actually different, so + # visualise the difference. + output.addOutput( + protein_variant_output + 'Protein', + unicode(protein_variant)) + util.print_protein_html( + unicode(protein_variant), first, last_variant, output, + protein_variant_output + 'ProteinFancy') + util.print_protein_html( + unicode(protein_variant), first, last_variant, output, + protein_variant_output + 'ProteinFancyText', text=True) + + else: + # Show original protein sequence, no diff. + output.addOutput('oldProtein', unicode(protein_original)) + util.print_protein_html(unicode(protein_original), 0, 0, + output, 'oldProteinFancy') + util.print_protein_html(unicode(protein_original), 0, 0, + output, 'oldProteinFancyText', text=True) + + if not protein_variant_output or protein_variant_output == 'alt': + # If we don't show a diff, or it is stored in + # altProtein/altProteinFancy, we should still populate the normal + # newProtein/newProteinFancy fields with a ?. + output.addOutput('newProtein', '?') + util.print_protein_html('?', 0, 0, output, 'newProteinFancy') + util.print_protein_html('?', 0, 0, output, 'newProteinFancyText', + text=True) #_add_transcript_info diff --git a/mutalyzer/website/templates/name-checker.html b/mutalyzer/website/templates/name-checker.html index 470c53093b50c3cd358ba8c9cb314887616a132e..f1db90967a778dd07f5f784eb0a680f2b6a5e0fa 100644 --- a/mutalyzer/website/templates/name-checker.html +++ b/mutalyzer/website/templates/name-checker.html @@ -171,7 +171,7 @@ {% endif %} {% if altStart %} - <h4>Alternative protein using start codon {{ altStart }}</h4> + <h4>Protein predicted from variant coding sequence using start codon {{ altStart }}</h4> {% if altProtein %} <pre> {%- for i in altProtein -%} diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py index f45fbb01d851f10e7b79438c7ea450280c1ae8f8..3fac19f9ecc45fe155065338ae5741c3a3708a0a 100644 --- a/tests/test_variantchecker.py +++ b/tests/test_variantchecker.py @@ -47,7 +47,7 @@ class TestVariantchecker(MutalyzerTest): in self.output.getOutput('descriptions') assert 'AL449423.14(CDKN2A_i001):p.(Met54_Gly55delinsSer)' \ in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('AL449423.14')) def test_insertion_in_frame(self): @@ -62,7 +62,7 @@ class TestVariantchecker(MutalyzerTest): in self.output.getOutput('descriptions') assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsIleSer)' \ in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('AL449423.14')) def test_insertion_list_in_frame(self): @@ -77,7 +77,7 @@ class TestVariantchecker(MutalyzerTest): in self.output.getOutput('descriptions') assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsIleSer)' \ in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('AL449423.14')) def test_deletion_insertion_in_frame(self): @@ -92,7 +92,7 @@ class TestVariantchecker(MutalyzerTest): in self.output.getOutput('descriptions') assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('AL449423.14')) def test_deletion_insertion_list_in_frame(self): @@ -107,7 +107,7 @@ class TestVariantchecker(MutalyzerTest): in self.output.getOutput('descriptions') assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('AL449423.14')) def test_deletion_insertion_in_frame_complete(self): @@ -122,7 +122,7 @@ class TestVariantchecker(MutalyzerTest): in self.output.getOutput('descriptions') assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('AL449423.14')) def test_deletion_insertion_list_in_frame_complete(self): @@ -138,7 +138,7 @@ class TestVariantchecker(MutalyzerTest): in self.output.getOutput('descriptions') assert 'AL449423.14(CDKN2A_i001):p.(Met54delinsAsnPro)' \ in self.output.getOutput('protDescriptions') - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('NM_003002.2')) def test_est_warning_nm_est(self): @@ -315,7 +315,7 @@ class TestVariantchecker(MutalyzerTest): Insertion on CDS start boundary should not be included in CDS. """ check_variant('NM_000143.3:c.-1_1insCAT', self.output) - assert self.output.getIndexedOutput("newprotein", 0) == None + assert self.output.getIndexedOutput("newProtein", 0) == None # Todo: Is this a good test? @fix(cache('NM_000143.3')) @@ -324,7 +324,7 @@ class TestVariantchecker(MutalyzerTest): Insertion after CDS start boundary should be included in CDS. """ check_variant('NM_000143.3:c.1_2insCAT', self.output) - assert self.output.getIndexedOutput("newprotein", 0) == '?' + assert self.output.getIndexedOutput("newProtein", 0) == '?' # Todo: Is this a good test? @fix(cache('NG_012772.1')) @@ -337,7 +337,7 @@ class TestVariantchecker(MutalyzerTest): assert self.output.getOutput('removedSpliceSites') == [] # Todo: For now, the following is how to check if no protein # prediction is done. - assert not self.output.getOutput('newprotein') + assert not self.output.getOutput('newProtein') @fix(cache('NG_012772.1')) def test_del_exon(self): @@ -349,7 +349,7 @@ class TestVariantchecker(MutalyzerTest): assert self.output.getOutput('removedSpliceSites') == [2] # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('NG_012772.1')) def test_del_exon_exact(self): @@ -361,7 +361,7 @@ class TestVariantchecker(MutalyzerTest): assert self.output.getOutput('removedSpliceSites') == [2] # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('NG_012772.1')) def test_del_exon_in_frame(self): @@ -378,7 +378,7 @@ class TestVariantchecker(MutalyzerTest): assert self.output.getOutput('removedSpliceSites') == [2] # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') # Todo: assert that protein products indeed have only this difference. @fix(cache('NG_012772.1')) @@ -391,7 +391,7 @@ class TestVariantchecker(MutalyzerTest): assert self.output.getOutput('removedSpliceSites') == [4] # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('NG_012772.1')) def test_del_intron(self): @@ -404,7 +404,7 @@ class TestVariantchecker(MutalyzerTest): assert self.output.getOutput('removedSpliceSites') == [2] # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('NG_012772.1')) def test_del_intron_exact(self): @@ -415,12 +415,12 @@ class TestVariantchecker(MutalyzerTest): check_variant('NG_012772.1(BRCA2_v001):c.681+1_682-1del', self.output) assert self.output.getMessagesWithErrorCode('WOVERSPLICE') == [] assert self.output.getOutput('removedSpliceSites') == [2] - # Note: The protein prediction is done, but 'newprotein' is not set + # Note: The protein prediction is done, but 'newProtein' is not set # because we have no change. So to check if the prediction is done, we - # check if 'oldprotein' is set and to check if the prediction is - # correct, we check if 'newprotein' is not set. - assert self.output.getOutput('oldprotein') - assert not self.output.getOutput('newprotein') + # check if 'oldProtein' is set and to check if the prediction is + # correct, we check if 'newProtein' is not set. + assert self.output.getOutput('oldProtein') + assert not self.output.getOutput('newProtein') @fix(cache('NG_012772.1')) def test_del_intron_in_frame(self): @@ -433,7 +433,7 @@ class TestVariantchecker(MutalyzerTest): assert self.output.getOutput('removedSpliceSites') == [2] # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') # Todo: assert that protein products indeed have only this difference. @fix(cache('NG_012772.1')) @@ -446,7 +446,7 @@ class TestVariantchecker(MutalyzerTest): assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') # Genomic positions should be centered in flanking introns and unsure. assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.(17550_19725)del' assert 'NG_012772.1(BRCA2_v001):c.632-?_681+?del' \ @@ -471,7 +471,7 @@ class TestVariantchecker(MutalyzerTest): assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') # Genomic positions should be centered in flanking introns and unsure. assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.(7324_11720)del' assert 'NG_012772.1(BRCA2_v001):c.68-?_316+?del' \ @@ -491,7 +491,7 @@ class TestVariantchecker(MutalyzerTest): assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') # Genomic positions should be centered in flanking introns and unsure. assert self.output.getIndexedOutput('genomicDescription', 0) == 'NG_012772.1:g.[(17550_19725)del;19017del]' assert 'NG_012772.1(BRCA2_v001):c.[632-?_681+?del;681+4del]' \ @@ -511,7 +511,7 @@ class TestVariantchecker(MutalyzerTest): assert len(self.output.getMessagesWithErrorCode('IDELSPLICE')) > 0 # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') # Genomic positions should be centered in flanking introns and unsure. assert self.output.getIndexedOutput('genomicDescription', 0) == 'AL449423.14:g.(60314_63683)del' assert 'AL449423.14(CDKN2A_v001):c.151-?_457+?del' \ @@ -533,7 +533,7 @@ class TestVariantchecker(MutalyzerTest): assert self.output.getOutput('removedSpliceSites') == [2] # Todo: For now, the following is how to check if protein # prediction is done. - assert self.output.getOutput('newprotein') + assert self.output.getOutput('newProtein') @fix(cache('NG_008939.1')) def test_ins_seq(self): @@ -1370,7 +1370,7 @@ class TestVariantchecker(MutalyzerTest): assert not self.output.getOutput('newProteinFancy') waltstart = self.output.getMessagesWithErrorCode('WALTSTART') assert len(waltstart) == 1 - assert self.output.getOutput('oldprotein')[0].startswith('M') + assert self.output.getOutput('oldProtein')[0].startswith('M') assert not self.output.getOutput('newProtein') assert not self.output.getOutput('altStart') assert not self.output.getOutput('altProteinFancy') @@ -1384,7 +1384,7 @@ class TestVariantchecker(MutalyzerTest): assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions') wstart = self.output.getMessagesWithErrorCode('WSTART') assert len(wstart) == 1 - assert self.output.getOutput('newprotein')[0] == '?' + assert self.output.getOutput('newProtein')[0] == '?' waltstart = self.output.getMessagesWithErrorCode('WALTSTART') assert len(waltstart) == 0 assert not self.output.getOutput('altStart') @@ -1399,7 +1399,7 @@ class TestVariantchecker(MutalyzerTest): assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions') west = self.output.getMessagesWithErrorCode('WSTART') assert len(west) == 1 - assert self.output.getOutput('newprotein')[0] == '?' + assert self.output.getOutput('newProtein')[0] == '?' waltstart = self.output.getMessagesWithErrorCode('WALTSTART') assert len(waltstart) == 1 assert not self.output.getOutput('altStart') @@ -1415,10 +1415,10 @@ class TestVariantchecker(MutalyzerTest): assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions') wstart = self.output.getMessagesWithErrorCode('WSTART') assert len(wstart) == 1 - assert self.output.getOutput('newprotein')[0] == '?' + assert self.output.getOutput('newProtein')[0] == '?' waltstart = self.output.getMessagesWithErrorCode('WALTSTART') assert len(waltstart) == 0 - assert self.output.getOutput('oldprotein')[0].startswith('M') + assert self.output.getOutput('oldProtein')[0].startswith('M') assert 'TTG' in self.output.getOutput('altStart') assert not self.output.getOutput('altProteinFancy') @@ -1433,10 +1433,10 @@ class TestVariantchecker(MutalyzerTest): assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions') west = self.output.getMessagesWithErrorCode('WSTART') assert len(west) == 1 - assert self.output.getOutput('newprotein')[0] == '?' + assert self.output.getOutput('newProtein')[0] == '?' waltstart = self.output.getMessagesWithErrorCode('WALTSTART') assert len(waltstart) == 1 - assert self.output.getOutput('oldprotein')[0].startswith('M') + assert self.output.getOutput('oldProtein')[0].startswith('M') assert 'ATG' in self.output.getOutput('altStart') assert not self.output.getOutput('altProteinFancy') @@ -1450,7 +1450,7 @@ class TestVariantchecker(MutalyzerTest): assert 'AB026906.1(SDHD_i001):p.?' in self.output.getOutput('protDescriptions') wstart = self.output.getMessagesWithErrorCode('WSTART') assert len(wstart) == 1 - assert self.output.getOutput('newprotein')[0] == '?' + assert self.output.getOutput('newProtein')[0] == '?' waltstart = self.output.getMessagesWithErrorCode('WALTSTART') assert len(waltstart) == 0 assert 'TTG' in self.output.getOutput('altStart') @@ -1467,9 +1467,9 @@ class TestVariantchecker(MutalyzerTest): assert 'NM_024426.4(WT1_i001):p.?' in self.output.getOutput('protDescriptions') west = self.output.getMessagesWithErrorCode('WSTART') assert len(west) == 1 - assert self.output.getOutput('newprotein')[0] == '?' + assert self.output.getOutput('newProtein')[0] == '?' waltstart = self.output.getMessagesWithErrorCode('WALTSTART') assert len(waltstart) == 1 - assert self.output.getOutput('oldprotein')[0].startswith('M') + assert self.output.getOutput('oldProtein')[0].startswith('M') assert 'ATG' in self.output.getOutput('altStart') assert self.output.getOutput('altProtein')[0].startswith('M')