From d07b822a39d7ca9104ae889f61f54e7469fe5248 Mon Sep 17 00:00:00 2001 From: Martijn Vermaat <martijn@vermaat.name> Date: Mon, 12 Mar 2012 12:42:13 +0000 Subject: [PATCH] Better descriptions on chromosome For UD slices we also generate g. descriptions on the chromosome reference. We now also apply the roll rule there and use correct ranges and sequences on the reverse strand. Fixes #75. git-svn-id: https://humgenprojects.lumc.nl/svn/mutalyzer/trunk@495 eb6bd6ab-9ccd-42b9-aceb-e2899b4a52f1 --- mutalyzer/GenRecord.py | 34 +++++++-- tests/test_variantchecker.py | 142 +++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 8 deletions(-) diff --git a/mutalyzer/GenRecord.py b/mutalyzer/GenRecord.py index 91ddb199..f4a8ad2a 100644 --- a/mutalyzer/GenRecord.py +++ b/mutalyzer/GenRecord.py @@ -645,11 +645,31 @@ class GenRecord() : forwardStop = stop_g reverseStart = stop_g reverseStop = start_g + + if self.record.orientation == 1: + chromStart = self.record.toChromPos(start_g) + chromStop = self.record.toChromPos(stop_g) + chromArg1 = arg1 + chromArg2 = arg2 + else: + chromStart = self.record.toChromPos(stop_g) + chromStop = self.record.toChromPos(start_g) + chromArg1 = Bio.Seq.reverse_complement(arg1) + chromArg2 = Bio.Seq.reverse_complement(arg2) + # Todo: Should we use arg1_reverse here? + if roll : forwardStart += roll[1] forwardStop += roll[1] reverseStart -= roll[0] reverseStop -= roll[0] + if chromStart is not None: + if self.record.orientation == 1: + chromStart += roll[1] + chromStop += roll[1] + else: + chromStart += roll[0] + chromStop += roll[0] #if if varType != "subst" : @@ -675,14 +695,12 @@ class GenRecord() : self.record.addToDescription("(%s_%s)%s%s" % ( forwardStart, forwardStop, varType, arg1)) self.record.addToChromDescription("(%s_%s)%s%s" % ( - self.record.toChromPos(forwardStart), - self.record.toChromPos(forwardStop), varType, arg1)) + chromStart, chromStop, varType, chromArg1)) else: self.record.addToDescription("%s_%s%s%s" % ( forwardStart, forwardStop, varType, arg1)) self.record.addToChromDescription("%s_%s%s%s" % ( - self.record.toChromPos(forwardStart), - self.record.toChromPos(forwardStop), varType, arg1)) + chromStart, chromStop, varType, chromArg1)) #if else : if start_fuzzy or stop_fuzzy: @@ -691,12 +709,12 @@ class GenRecord() : self.record.addToDescription("(%s)%s%s" % ( forwardStart, varType, arg1)) self.record.addToChromDescription("(%s)%s%s" % ( - self.record.toChromPos(forwardStart), varType, arg1)) + chromStart, varType, chromArg1)) else: self.record.addToDescription("%s%s%s" % ( forwardStart, varType, arg1)) self.record.addToChromDescription("%s%s%s" % ( - self.record.toChromPos(forwardStart), varType, arg1)) + chromStart, varType, chromArg1)) #else #if else : @@ -706,12 +724,12 @@ class GenRecord() : self.record.addToDescription("(%s)%c>%c" % ( forwardStart, arg1, arg2)) self.record.addToChromDescription("(%s)%c>%c" % ( - self.record.toChromPos(forwardStart), arg1, arg2)) + chromStart, chromArg1, chromArg2)) else: self.record.addToDescription("%s%c>%c" % ( forwardStart, arg1, arg2)) self.record.addToChromDescription("%s%c>%c" % ( - self.record.toChromPos(forwardStart), arg1, arg2)) + chromStart, chromArg1, chromArg2)) for i in self.record.geneList : for j in i.transcriptList : diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py index a078ec89..11557bff 100644 --- a/tests/test_variantchecker.py +++ b/tests/test_variantchecker.py @@ -528,6 +528,148 @@ class TestVariantchecker(): assert 'UD_127955523176(DMD_v001):c.=' \ in self.output.getOutput('descriptions') + @skip + def test_ud_reverse_sequence(self): + """ + Variant on UD from reverse strand should have reverse complement + sequence. + + Todo: We cannot use UD references in unit tests, unless we implement + a way to create them inside the unit test. + """ + check_variant('UD_132680290559(DPYD_v1):c.85C>T', self.output) + error_count, _, _ = self.output.Summary() + assert_equal(error_count, 0) + assert_equal(self.output.getIndexedOutput('genomicChromDescription', 0), + 'NC_000001.10:g.98348885G>A') + assert_equal(self.output.getIndexedOutput('genomicDescription', 0), + 'UD_132680290559:g.42731C>T') + assert 'UD_132680290559(DPYD_v001):c.85C>T' \ + in self.output.getOutput('descriptions') + + @skip + def test_ud_forward_sequence(self): + """ + Variant on UD from forward strand should have forward sequence. + + Todo: We cannot use UD references in unit tests, unless we implement + a way to create them inside the unit test. + """ + check_variant('UD_132680514783(MARK1_v001):c.400T>C', self.output) + error_count, _, _ = self.output.Summary() + assert_equal(error_count, 0) + assert_equal(self.output.getIndexedOutput('genomicChromDescription', 0), + 'NC_000001.10:g.220773181T>C') + assert_equal(self.output.getIndexedOutput('genomicDescription', 0), + 'UD_132680514783:g.76614T>C') + assert 'UD_132680514783(MARK1_v001):c.400T>C' \ + in self.output.getOutput('descriptions') + + @skip + def test_ud_reverse_range(self): + """ + Variant on UD from reverse strand should have reversed range + positions. + + This UD number is for DPYD. + + Todo: We cannot use UD references in unit tests, unless we implement + a way to create them inside the unit test. + """ + check_variant('UD_133130716532:g.10624_78132del', self.output) + error_count, _, _ = self.output.Summary() + assert_equal(error_count, 0) + assert_equal(self.output.getIndexedOutput('genomicChromDescription', 0), + 'NC_000009.11:g.32928508_32996016del') + assert_equal(self.output.getIndexedOutput('genomicDescription', 0), + 'UD_133130716532:g.10624_78132del') + + @skip + def test_ud_forward_range(self): + """ + Variant on UD from forward strand should have forward range positions. + + Todo: We cannot use UD references in unit tests, unless we implement + a way to create them inside the unit test. + """ + check_variant('UD_132680514783(MARK1_v001):c.400_415del', self.output) + error_count, _, _ = self.output.Summary() + assert_equal(error_count, 0) + assert_equal(self.output.getIndexedOutput('genomicChromDescription', 0), + 'NC_000001.10:g.220773181_220773196del') + assert_equal(self.output.getIndexedOutput('genomicDescription', 0), + 'UD_132680514783:g.76614_76629del') + + @skip + def test_ud_reverse_del_length(self): + """ + Variant on UD from reverse strand should have reversed range + positions, but not reverse complement of first argument (it is not a + sequence, but a length). + + This UD number is for DPYD. + + Todo: We cannot use UD references in unit tests, unless we implement + a way to create them inside the unit test. + """ + check_variant('UD_133130716532:g.10624_78132del67509', self.output) + error_count, _, _ = self.output.Summary() + assert_equal(error_count, 0) + assert_equal(self.output.getIndexedOutput('genomicChromDescription', 0), + 'NC_000009.11:g.32928508_32996016del') + assert_equal(self.output.getIndexedOutput('genomicDescription', 0), + 'UD_133130716532:g.10624_78132del') + + @skip + def test_ud_reverse_roll(self): + """ + Variant on UD from reverse strand should roll the oposite direction. + + The situation is as follows: + + G A A A T T + c. 102 103 104 105 106 107 + g. 748 749 750 751 752 753 + chr g. 868 867 866 865 864 863 + + Todo: We cannot use UD references in unit tests, unless we implement + a way to create them inside the unit test. + """ + check_variant('UD_132680290559(DPYD_v001):c.104del', self.output) + error_count, _, _ = self.output.Summary() + assert_equal(error_count, 0) + assert_equal(self.output.getIndexedOutput('genomicChromDescription', 0), + 'NC_000001.10:g.98348867del') + assert_equal(self.output.getIndexedOutput('genomicDescription', 0), + 'UD_132680290559:g.42751del') + assert 'UD_132680290559(DPYD_v001):c.105del' \ + in self.output.getOutput('descriptions') + + @skip + def test_ud_forward_roll(self): + """ + Variant on UD from forward strand should roll the same. + + The situation is as follows: + + A T T T A + c. 398 399 400 401 402 + g. 612 613 614 615 616 + chr g. 179 180 181 182 183 + + Todo: We cannot use UD references in unit tests, unless we implement + a way to create them inside the unit test. + """ + check_variant('UD_132680514783(MARK1_v001):c.400del', self.output) + error_count, _, _ = self.output.Summary() + assert_equal(error_count, 0) + assert_equal(self.output.getIndexedOutput('genomicChromDescription', 0), + 'NC_000001.10:g.220773182del') + assert_equal(self.output.getIndexedOutput('genomicDescription', 0), + 'UD_132680514783:g.76615del') + assert 'UD_132680514783(MARK1_v001):c.401del' \ + in self.output.getOutput('descriptions') + def test_deletion_with_sequence_forward_genomic(self): """ Specify the deleted sequence in a deletion. -- GitLab