diff --git a/extras/cron.d/mutalyzer-mapping-update b/extras/cron.d/mutalyzer-mapping-update index cac1a9767741c0a42a372408ac65cb7c58d1647d..496571df76d7578bf69d5e5e5e4a18ff6bdd8fd9 100644 --- a/extras/cron.d/mutalyzer-mapping-update +++ b/extras/cron.d/mutalyzer-mapping-update @@ -2,4 +2,4 @@ #25 3 * * 7 www-data wget "ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/mapview/seq_gene.md.gz" -O - | zcat > /tmp/seq_gene.md; <MUTALYZER_BIN_MAPPING_UPDATE> hg18 /tmp/seq_gene.md reference #25 4 * * 7 www-data wget "ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.37.2/mapview/seq_gene.md.gz" -O - | zcat > /tmp/seq_gene.md; <MUTALYZER_BIN_MAPPING_UPDATE> hg19 /tmp/seq_gene.md 'GRCh37.p2-Primary Assembly' -##25 4 * * 7 www-data wget "ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/mapview/seq_gene.md.gz" -O - | zcat > /tmp/seq_gene.md; <MUTALYZER_BIN_MAPPING_UPDATE> hg19 /tmp/seq_gene.md 'GRCh37.p2-Primary Assembly' +##25 4 * * 7 www-data wget "ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/mapview/seq_gene.md.gz" -O - | zcat > /tmp/seq_gene.md; <MUTALYZER_BIN_MAPPING_UPDATE> hg19 /tmp/seq_gene.md 'GRCh37.p5-Primary Assembly' diff --git a/mutalyzer/Db.py b/mutalyzer/Db.py index bc6c76252ab083275b0c6a84da1f2036ad48c3b3..b8a6ff9c0037e346f7a5ba27accf436fc73ae51b 100644 --- a/mutalyzer/Db.py +++ b/mutalyzer/Db.py @@ -644,7 +644,7 @@ class Mapping(Db) : T.orientation as orientation, MIN(T.start) as start, MAX(T.stop) as stop, - MAX(E.cds_start) as cds_start, + REPLACE(MIN(COALESCE(E.cds_start, 1000000000)), 1000000000, NULL) as cds_start, MAX(E.cds_stop) as cds_stop, GROUP_CONCAT(DISTINCT E.start ORDER BY E.start ASC) as exon_starts, GROUP_CONCAT(DISTINCT E.stop ORDER BY E.stop ASC) as exon_stops, diff --git a/mutalyzer/mapping.py b/mutalyzer/mapping.py index 3c70668d47081d76df750f2d873a943456d40258..d93ff17e791dc0163e72a7e6cfdcbf53d3e0eee8 100644 --- a/mutalyzer/mapping.py +++ b/mutalyzer/mapping.py @@ -833,6 +833,8 @@ class NCBIUpdater(Updater): entry['start'] = int(entry['start']) entry['stop'] = int(entry['stop']) entry['protein'] = entry['feature_name'] if cds else None + entry['cds_start'] = entry['start'] if cds else None + entry['cds_stop'] = entry['stop'] if cds else None entry['cds'] = cds self._import_exon_backlog(entry['start'] - 1) @@ -841,12 +843,9 @@ class NCBIUpdater(Updater): previous = self.exon_backlog[entry['transcript']] if previous['cds'] != entry['cds'] \ and previous['stop'] == entry['start'] - 1: - if entry['cds']: - entry['cds_start'] = entry['start'] - else: - entry['cds_stop'] = previous['stop'] - if 'cds_start' in previous: - entry['cds_start'] = previous['cds_start'] + if previous['cds']: + entry['cds_start'] = previous['cds_start'] + entry['cds_stop'] = previous['cds_stop'] entry['protein'] = previous['protein'] entry['start'] = previous['start'] except KeyError: @@ -870,9 +869,7 @@ class NCBIUpdater(Updater): del exon['cds'] self.db.ncbi_import_exon( exon['transcript'], exon['chromosome'], exon['start'], exon['stop'], - exon['cds_start'] if 'cds_start' in exon else None, - exon['cds_stop'] if 'cds_stop' in exon else None, - exon['protein'] or None) + exon['cds_start'], exon['cds_stop'], exon['protein'] or None) #_import_exon_backlog def _aggregate_mapping(self): diff --git a/tests/test_mapping.py b/tests/test_mapping.py index 3180d521a79a053501ee4fa34e357e0d0ee1a62b..72a21f3cf96568eb04fedfad251f0d49fe145ce5 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -60,3 +60,19 @@ class TestConverter(): coding = converter.chrom2c('NC_000022.10:g.51016285_51017117del123456789', 'list') assert 'NM_001145134.1:c.-138-u21_60del123456789' in coding assert 'NR_021492.1:c.1-u5170_1-u4338del123456789' in coding + + def test_S_Venkata_Suresh_Kumar(self): + """ + Test for correct mapping information on genes where CDS start or stop + is exactly on the border of an exon. + + Bug reported February 24 by S Venkata Suresh Kumartest. + """ + converter = self._converter('hg19') + coding = converter.chrom2c('NC_000001.10:g.115259837_115259837delT', 'list') + assert 'NM_001007553.1:c.3863delA' not in coding + assert 'NM_001007553.2:c.3863delA' not in coding + assert 'NM_001007553.1:c.*953delA' in coding + assert 'NM_001130523.1:c.*953delA' in coding + assert 'NM_001007553.2:c.*953delA' in coding + assert 'NM_001130523.2:c.*953delA' in coding