diff --git a/mutalyzer/File.py b/mutalyzer/File.py index 118d6ca913945d41fc79b34e09949b4ae7830920..b95f03012205f4ec02832d610833f39797d43a15 100644 --- a/mutalyzer/File.py +++ b/mutalyzer/File.py @@ -101,7 +101,7 @@ class File() : return ret #__tempFileWrapper - def __parseCsvFile(self, handle) : + def __parseCsvFile(self, handle_) : """ Parse a CSV file. The stream is not rewinded after use. @@ -112,6 +112,14 @@ class File() : @return: list of lists @rtype: list """ + # We wrap the file in a temporary file just to have universal newlines + # which is not always possible to have on incoming files (thinks web + # and rpc frontends). This transparently solves the problem of Unix + # versus Windows versus Mac style newlines. + handle = tempfile.TemporaryFile('rU+w') + for chunk in handle_: + handle.write(chunk) + handle.seek(0) buf = handle.read(BUFFER_SIZE) diff --git a/mutalyzer/Scheduler.py b/mutalyzer/Scheduler.py index ebec512a302b57cfd417a8a22ededabd8cd5aea8..f8d0dee86b886b52ee1fc3a712cc58ccac942028 100644 --- a/mutalyzer/Scheduler.py +++ b/mutalyzer/Scheduler.py @@ -91,10 +91,7 @@ class Scheduler() : @arg url: The url containing the results @type url: string """ - # Mail is set to 'test@test.test" if the batch job was submitted from - # a unit test. - # Todo: Use `settings.TESTING` instead. - if mailTo == 'test@test.test': + if settings.TESTING: return # Mail is set to 'job@webservice' if the batch job was submitted using diff --git a/mutalyzer/parsers/genbank.py b/mutalyzer/parsers/genbank.py index ca9982824df6d42421c4b8da1509bd118c7e1844..e82836638af058a695bf19145cf5e28bad8b9a13 100644 --- a/mutalyzer/parsers/genbank.py +++ b/mutalyzer/parsers/genbank.py @@ -10,7 +10,6 @@ from itertools import izip_longest from Bio import SeqIO, Entrez from Bio.Alphabet import ProteinAlphabet -from sqlalchemy import and_, or_ from mutalyzer.config import settings from mutalyzer.db import queries diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index 362b6639c530073d47606f37f524bd37d93b5ea4..6a287117a800b9c041693f06a646013b7a82e3e5 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -16,6 +16,7 @@ from spyne.model.complex import Array from spyne.model.fault import Fault import os import socket +from cStringIO import StringIO import tempfile from operator import itemgetter, attrgetter from sqlalchemy import and_, or_ @@ -96,24 +97,15 @@ class MutalyzerService(ServiceBase): # Todo: Set maximum request size by specifying the max_content_length # argument for spyne.server.wsgi.WsgiApplication in all webservice # instantiations. - max_size = settings.MAX_FILE_SIZE - - batch_file = tempfile.TemporaryFile() - size = 0 - try: - for chunk in data: - size += len(chunk) - if size > max_size: - raise Fault('EMAXSIZE', - 'Only files up to %s megabytes are accepted.' % (float(max_size) / 1048576)) - batch_file.write(chunk) - batch_file.seek(0) - job, columns = file_instance.parseBatchFile(batch_file) - finally: - try: - batch_file.close() - except IOError: - pass + if sum(len(s) for s in data) > settings.MAX_FILE_SIZE: + raise Fault('EMAXSIZE', + 'Only files up to %d megabytes are accepted.' + % (settings.MAX_FILE_SIZE // 1048576)) + + batch_file = StringIO(data) + + job, columns = file_instance.parseBatchFile(batch_file) + batch_file.close() if job is None: raise Fault('EPARSE', 'Could not parse input file, please check your file format.') diff --git a/mutalyzer/util.py b/mutalyzer/util.py index 702584c03974c9e67d17dcdbb628c651fd966a69..936f0812b6abb077cb17dcb252a146cb3a5285f5 100644 --- a/mutalyzer/util.py +++ b/mutalyzer/util.py @@ -809,42 +809,6 @@ def format_usage(usage=None, keywords={}): #format_usage -def slow(f): - """ - Decorator for slow tests. This makes them to pass immediately, without - running them. But only if the environment variable MUTALYZER_QUICK_TEST - is 1. - - @todo: I don't think this actually belongs here (a separate util module - for the unit tests?). - """ - @wraps(f) - def slow_f(*args, **kwargs): - if 'MUTALYZER_QUICK_TEST' in os.environ \ - and os.environ['MUTALYZER_QUICK_TEST'] == '1': - return - else: - return f(*args, **kwargs) - return slow_f -#slow - - -def skip(f): - """ - Decorator to disable a unit test. This makes it pass immediately, without - running them. - - @todo: Perhaps it's possible to indicate to nose that the test is skipped? - @todo: I don't think this actually belongs here (a separate util module - for the unit tests?). - """ - @wraps(f) - def disabled_f(*args, **kwargs): - return - return disabled_f -#skip - - def singleton(cls): """ Decorator to define a class with a singleton instance. diff --git a/mutalyzer/website/templates/reference-loader.html b/mutalyzer/website/templates/reference-loader.html index 44045ce4cad817d497078dae9ee904f14cb18139..6631298c2a77867d0f6f38755bccf379c5ece44c 100644 --- a/mutalyzer/website/templates/reference-loader.html +++ b/mutalyzer/website/templates/reference-loader.html @@ -179,7 +179,7 @@ sequence (maximum size is {{ max_file_size }} megabytes): reference: <b>{{ ud }}</b> </p> <p> - <a href="{{ url_for('.reference', filename=ud + '.gb') }}">Download this reference sequence.</a> + <a href="{{ url_for('.reference', filename=ud + '.gb') }}" id="reference_download">Download this reference sequence.</a> </p> {% endif %} diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py index 63ae9031796b211e9a0dcc347b89fefc9450ffbf..9d85548dba88c350145e943dede74eefb10892e5 100644 --- a/mutalyzer/website/views.py +++ b/mutalyzer/website/views.py @@ -292,7 +292,7 @@ def name_checker(): 'cdsStop_c' : output.getIndexedOutput('cdsStop_c', 0), 'restrictionSites' : output.getOutput('restrictionSites'), 'legends' : output.getOutput('legends'), - 'reference_filename' : reference_filename, + 'reference_filename' : reference_filename, # Todo: Download link is not shown... 'browserLink' : browser_link, 'extractedDescription': extracted, 'extractedProtein' : extractedProt, diff --git a/requirements.txt b/requirements.txt index 5e5f2130dd594cc048f4bb6dab356c45e726705c..9ff2792de21b7f19262e2bee566d1d82cf8c16fe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,8 +10,6 @@ spyne==2.10.9 suds==0.4 wsgiref==0.1.2 xlrd==0.9.2 -WebOb==1.1.1 -WebTest==1.4.2 cssselect==0.9.1 Jinja2==2.7.1 -e git+https://github.com/mammadori/magic-python.git#egg=Magic_file_extensions @@ -19,3 +17,4 @@ Flask==0.10.1 SQLAlchemy==0.9.1 redis==2.8.0 mockredispy==2.8.0.2 +mock==1.0.1 diff --git a/tests/data/AA010203.1.gb.bz2 b/tests/data/AA010203.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..fceb1e828997fbfb732029db9961f3184d2b4ab3 Binary files /dev/null and b/tests/data/AA010203.1.gb.bz2 differ diff --git a/tests/data/AB026906.1.gb b/tests/data/AB026906.1.gb deleted file mode 100644 index 634426657cd37708d92af19c826eefbf41cbb88d..0000000000000000000000000000000000000000 --- a/tests/data/AB026906.1.gb +++ /dev/null @@ -1,369 +0,0 @@ -LOCUS AB026906 18678 bp DNA linear PRI 08-DEC-1999 -DEFINITION Homo sapiens SDHD gene for small subunit of cytochrome b of - succinate dehydrogenase, complete cds. -ACCESSION AB026906 -VERSION AB026906.1 GI:5295993 -KEYWORDS small subunit of cytochrome b of succinate dehydrogenase. -SOURCE Homo sapiens (human) - ORGANISM Homo sapiens - Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; - Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini; - Catarrhini; Hominidae; Homo. -REFERENCE 1 - AUTHORS Hirawake,H., Taniwaki,M., Tamura,A., Kojima,S. and Kita,K. - TITLE Cytochrome b in human complex II (succinate-ubiquinone - oxidoreductase): cDNA cloning of the components in liver - mitochondria and chromosome assignment of the genes for the large - (SDHC) and small (SDHD) subunits to 1q21 and 11q23 - JOURNAL Cytogenet. Cell Genet. 79 (1-2), 132-138 (1997) - PUBMED 9533030 -REFERENCE 2 - AUTHORS Hirawake,H., Taniwaki,M., Tamura,A., Amino,H., Tomitsuka,E. and - Kita,K. - TITLE Characterization of the human SDHD gene encoding the small subunit - of cytochrome b (cybS) in mitochondrial succinate-ubiquinone - oxidoreductase - JOURNAL Biochim. Biophys. Acta 1412 (3), 295-300 (1999) - PUBMED 10482792 -REFERENCE 3 (bases 1 to 18678) - AUTHORS Kita,K. - TITLE Direct Submission - JOURNAL Submitted (01-MAY-1999) Kiyoshi Kita, The University of Tokyo, - Department of Biomedical Science, Graduate School of Medicine; - 7-3-1 Hongo, Bunkyo-ku, Tokyo 113-0033, Japan - (E-mail:kitak@m.u-tokyo.ac.jp, Tel:81-3-5841-3526, - Fax:81-3-5841-3444) -FEATURES Location/Qualifiers - source 1..18678 - /organism="Homo sapiens" - /mol_type="genomic DNA" - /db_xref="taxon:9606" - /chromosome="11" - /map="11q23" - gene join(5809..5860,6758..6874,7768..7912,13710..13875) - /gene="SDHD" - CDS join(5809..5860,6758..6874,7768..7912,13710..13875) - /gene="SDHD" - /codon_start=1 - /product="small subunit of cytochrome b of succinate - dehydrogenase" - /protein_id="BAA81889.1" - /db_xref="GI:5295994" - /translation="MAVLWRLSAVCGALGGRALLLRTPVVRPAHISAFLQDRPIPEWC - GVQHIHLSPSHHSGSKAASLHWTSERVVSVLLLGLLPAAYLNPCSAMDYSLAAALTLH - GHWGLGQVVTDYVHGDALQKAAKAGLLALSALTFAGLCYFNYHDVGICKAVAMLWKL" -ORIGIN - 1 gatcacgagg tcaagagatt gagaccatcc tagccaacat ggtgaaaccc cagctctact - 61 aaaaatacaa aaattagccg ggcgtggtgg catgcgcctg tagtcccagc tactcgggag - 121 gctgaggcag gagaatcact tgaacccagg aggtggaggt tgcagttgag ccgagattgc - 181 accactgcac tccagcctgg caactgagtg agactctgtc tcaagaaaaa aagaaaaaaa - 241 taataataag gctgggcgcg gtggctcatg cttgtaatcc cagcactttg ggaggccaaa - 301 gcgggcagat ccatgaggtc aggagtttga gaccagcctg gccaacataa tgaaacccca - 361 tctctactaa aaatacaaaa attagccagg catggtagca tgtgcctgta gtcccagcta - 421 ctcgggaggc taaggcagga gaatcacttg aaatcgggag gcggaggttg cagtgagctg - 481 agatcacgcc actgcactcc agcctgggca agagtgagac tccatctcaa aaaaaaaaaa - 541 aaaaaattgt ccccatgtag acagcatgat attatgaagt gtttttggaa aagtgcttca - 601 tagctaaaga gcagagtgta agagttcgtt agtgtttctt aaggaaagca atggcaagtt - 661 ctcttacaaa tggacaaaat aatcttgtat gtctgaagtg atagtgtaca gtacaggaaa - 721 tgtagctgtt actgagctat aatgggggct tggttctacc atatctctac tttgtgttta - 781 tgtttgtgta tgcatgtact ccaaagtctt tctaatgttg ctttaatttc caaaaatgta - 841 tgcattgctt taacaataca tgtgatgtgt catattacag atggggtcac agtggttata - 901 aagagttata ccctgaagaa tttgaaacag acaggtaagg aaaataggct tactgaaaga - 961 aactaagatg gtacaaaatc tgtattataa attgatttct taacttttac gaagaatata - 1021 cttgccatca aaaatgtagc tagagaaaca ataggtgatt tagttaggag gtgatccctg - 1081 ttttcccatt ctctggttga tgtttggcat gtctgtaagc attttggttt ttatatatag - 1141 tattccatac agtaactcag tatggcagct tagaattttt accttcattt taaagatgag - 1201 gaaacaaaaa ctcaatgaga atattaaagt gttaaagtat acattaaagt gcttatttaa - 1261 aattcagatg ttaacctcaa ttttttaatc tagaatgcaa aatattaaaa taatacgctt - 1321 tttttttaca taaaagcttc tattttttaa cttttcttat tagtagtgat cagcaagata - 1381 ttaccaacgg gaagaaaaca tctccccagg taaagtcatc tacccatgaa tcccgcaaac - 1441 acaagaagtc aaagaaatcc cacaaaaaaa agcagaaaaa aaggtcacac aaaaaacaga - 1501 agaaaagcaa aaaggaagcc acagatataa cagcagattc ctcgagtgag ttctcagaag - 1561 aaactggggc ttctggtaca aggaaaggga aacaaccaca taaacgcaag aaaaaatcca - 1621 ggaaaaagtc tctcaaaaaa cctgctttat tcttagaggc agaaagtaac acttcacatt - 1681 cagatgattc agcatccagc agttctgagg aaagtgagga aagagacact aagaaaacca - 1741 aaaggaaaaa gagagagaaa aaagcccata cctctgtagc caacaatgaa atacaggaga - 1801 ggacaaacaa acgcacaaat tggaaagtag ctacagatga aaggtctgct gagagctcag - 1861 aggatgacta aatgggaaac acttttgttt tccacatgac tgtggatatt tacagttctt - 1921 actccttgtg gttttgccag tgactcttgt tcagcacggg gcctgaggtc agagctgtct - 1981 tgtgccatct gtatgttctg acagacgtct tgtcttctat tttggcgtta agcttgatcc - 2041 ccttttcttg ttaaaaggga atctggtatt ttgttatgaa ggtttcttga agagattatt - 2101 tttttttgca attaattacg tttagtgtag agtgcatata cagcaaatta aaggacccag - 2161 aaagctggat ccaatagtga cctgggtaca ccaatcggaa tattgaattt ggggaagtca - 2221 agggctggga tcaagaggtg gattggaact aatgccatgt aggatggtat gacaaggcaa - 2281 cactgtattg ctctctgttt atatagcagg tgtcacaact aacttgtctt tagccttggt - 2341 gctttgatcc ttctatattt tgaccccaca ggtgtggtcc ggtttactta atcaggacat - 2401 gggcctaaga acaaaccttt tcccttcatg ataacatcca tagacaactt attagaaggg - 2461 actagagttt ttgcaaattt ccctgctgga tggggcctat agctatactt agtatatgcc - 2521 taaacatggt aattggatag taaatggttt tctagttcca ttgctgtata tttgcctaaa - 2581 tggacttgtg ttcaaattat ttcttcaatt gtcatagata atcctgtacc aaatggggaa - 2641 gaattaggaa ataatcatgt tgtctaatgg tactctggat tcagggcagc aactgccatt - 2701 taaatgttgt cttgttcatt tctaaatctg ttcatgaagt ttaggttttc cctgaaacta - 2761 agttgaatta tttccaaaat gaaacaggct tctcagggac atatccactt cttcccagtc - 2821 tgcctttgga ttaaagcacc aagcagagac cacattaatt ccctttgcta tactgtgatc - 2881 cttagtatgt taattcttaa gaaaccaaca tatcactgaa agaaggctgg cagaacgcaa - 2941 gtgcattttt tcactgtggg aagaaagatc aagtgacgta ttattttttc ctggttgtca - 3001 cttaatgggc tgagtaaaaa gcttgaaaac tcagactttc ggtcttggtt ctgccactca - 3061 ttggttatga ggaggcccag agcaggtaag ttcaccttcc tggccttact ttcctgatgt - 3121 gtaatacgga attacttcac agtagcatga cagtataaga caccagcagt agatacaact - 3181 atgatgacat tccatgagtt ggtattttta gttctaactg ctaaatttgt tctctttacg - 3241 ggacagattt ctaataaagt gcttggtctt aaaatacatg gttggacaga gtgccctatc - 3301 ccttaactat gagcaggtgc taccttttgg gatatttatt ttaaatttta atactttggt - 3361 actcaattgt cagtgttcca tggtgtgtat ttttattttt gggattagtg ggggtctaaa - 3421 gggagaagaa tagtctctaa ttactacctc ttaacctaaa gcaattattt tgttcctgga - 3481 gcaagttaaa tctttgttgg aaggagcttt ggccatatat tttttagcat gcattgtttc - 3541 tgtgccctga aagtacctga aaggttttaa gcacagactc aggaaaatgt gccagtagaa - 3601 caggccatct ccaggaaatt ggctctattt gggtcctgac cttcccttcc tcccaagtta - 3661 gcaggcttgt tcttttgcaa ggaatacaca tcttgccttt tttttttttt tttttttgcc - 3721 atgttttcct tttcttggtc atgtataagc aataaagctg ttttttgttc ttcatctttc - 3781 ttaaccccaa attttcttct atgccttagg cttcgatggt tcttccaacc cccttaatat - 3841 ggcttagggt ggtttttcaa aacctacaat cccccatttg cactactggc catggaacat - 3901 ttatttctag tgttcctgcc aatcagagat ctctatatta aattctaaaa tgggattaaa - 3961 agaagagttg gagaattcac acttattgag taactgatgt catacaacct ggaatttctg - 4021 aattccaaat aaataaattt cactctttga acatttcatc ttttactttt tagcaccaac - 4081 agacttgata acagcctgat gctgatctga caatgggttg atagccttcc cccactgacc - 4141 cttaaatctg cttagtaaca agtcctttgc ttctgtcatt ctcctggggg atggcctact - 4201 gccctccttt ctgtacaatc tgggcaaacc gactggtgat ggcaagagtg gtgtcaatga - 4261 agcggtctac acagctggag agacaatttt cagtgcgaga gtctaggcga ttccctggct - 4321 tctccacaca tttatcccaa cataactcca tgaagtgatg cacctaaaag gaaagaaaaa - 4381 tagtaaccat tggggtctgc agataggcct aactcttgtt tattaacttc tcactttttg - 4441 acacctgaca aaaccattct ttaaatcaac cacggagtca ctttgggctc ttctctttct - 4501 tcatttccta attagtcacc aatattgatc cagatcctcg tccctacaga ttgtacattc - 4561 tcaggtagga ctaataacct tttatattcc ccaaagtagc tagcaccatt gtgaaccaac - 4621 tagggaacca attaattttc ccaaactatt gcactcattt tattccaaac ctatccccta - 4681 gacaaggtaa ttcttccatt aaccttttgg gggttatgga tccactcccc agaaaaatgc - 4741 acatacaggc acagaaatta tgtgcactgt cattagaatg ttcagggccc aaagatttct - 4801 agccagggtt cttgctggtc atccgagcac cctcttcctc ccacccaaaa ggaaaaaacc - 4861 attttaccaa gagcttacta tttgtgaggc tgggtactaa gaaatcagtt ctctctaagg - 4921 tcctcaagga tagctgtcat cacctcccat ttaagaggcg tgattatgta gtccaaggtc - 4981 atgtagccag caagaagtca ggccgcgtta gaaccatgtc cgaagggctc caaacccttg - 5041 ttctacatcc atagtctaca gcgactactt cagagtccac cttcctccga taatgttcta - 5101 gtcgttttca aatacattgt cgcatatgat cctcacaatc cagtgaggca gtggggtgac - 5161 ggcgaaacca aaacccacag tggatgagta tcttttctac gggcacgtgg cctgcaagga - 5221 gcagagggag gatgagaacc cagatctttc gaatgccagc ccagtcatgt cgccggctaa - 5281 ctagtctccc gtacccttgc gtcgcccaaa tcttccctgc tttacacctt ttcttttctc - 5341 cacgctacgc ttatatacct ggcacctgag cactaccggt caccagggac aggagagcca - 5401 taactttgtc tttcgtgagg ggaatgggat gcagccggga tcgagcacca gtgagccgcc - 5461 agtgtacaga cctccgagcg tgcccaggac caccaaggaa ggtgaaactt cctttccctt - 5521 caccctcccc gtccccgcac ctgtgcagta aactgcgcct tctgctgctc ggcggccacc - 5581 aggcgctgca actccgcttc atcggcttcg cccagctccg ccattgttcg cctcaggctc - 5641 gccaccttcc gacagctgtg tttgcgcatg cgcgacgggt gtgcaccgcc tctcgacttc - 5701 cggttcaccc agcatttcct cttccctgtt ttctttcgtc gtcgtgggtg ggaattgtcg - 5761 cctaagtggt tccgggttgg tggatgacct tgagccctca ggaacgagat ggcggttctc - 5821 tggaggctga gtgccgtttg cggtgcccta ggaggccgag gtgaggggtc ttcccaccct - 5881 gaggtgctta gcgtagcctc cagccaggga aggggatgga agtgaggact catctgccgg - 5941 gtgggagatc tcttgaggag aagaaaatac cgaaatcaca gcaatgacca ctgtagtcta - 6001 ggggtccaga tgtttacccg aaggtatatt tcacttgctg tgagctgacg agttgaggga - 6061 ataatcagaa agagagctcc ctctggaagt cgcagtcctg atgaggctaa tccacatagc - 6121 agttctgttt tctccccgtt cactgtccct agaatgctcc ccactcgctc ccaccctgag - 6181 tcgggaaaga gggttaggag cttgcccatt tcttctggag ttggtgtgtt tggatgtggg - 6241 agtggagggg ggatcagttc gaaaatcatt taacctgggc atttgtgtta cctcaggtac - 6301 tgtagtaatg ctaggataca aatgtgaaga aaacgtagtt cccgccctca acgagctttc - 6361 attctgatga ggaaaacact gtcatagtag tccgggcaag atgaggtcct gaactaggaa - 6421 agtagcacaa agaaaagaag attgtctcaa gaactgttaa agagatacgc caataggatt - 6481 tggcgattga atttagggga aaagtctctc atgatttcca gattactgag ttaggttaaa - 6541 gaatgcctga ggtgtcatta aataaagtag ggaacatgca gatgttccct ggtcttaact - 6601 tcacagtaac cccagtgaaa tagatgctat cttcatttta caaataagat gttatcccct - 6661 atttattgtt aagtagctta cctatggtca tttagaaagt ttgtcagtcc tgttaaagga - 6721 gaggttctta tgatcatcct aatgactctt tcctcagctc tgttgcttcg aactccagtg - 6781 gtcagacctg ctcatatctc agcatttctt caggaccgac ctatcccaga atggtgtgga - 6841 gtgcagcaca tacacttgtc accgagccac cattgtatgt tctctccatc gctgctgctt - 6901 tctgggctct agccatcttt accttcacta atggtcatgc ctttagcagg acttcctacc - 6961 tgtagggggg actcttgtgt ccaactttgt caaatgaaga cctagtttac acctttgggc - 7021 agacagtgcc attatggttg aatgatgcca tttataatca tagaagacct tctagcctaa - 7081 gtctttacaa atttttttct tttgttttct ttttttttga gacagagtct cgctctgtta - 7141 tgcaggctgg agtgcagtgg tgcgatctcg gctcactgca acctccacct tctgagttca - 7201 agcaattctc ctgcccctgc ctcccgacta gctgggatta caggcgccca ccaccatgcc - 7261 cagctaagtt ttgtactttt agtagagacg ggtttcaccg tgttggccgg gctggtctca - 7321 aactcctgac ctcctcaaac tcctgacctc aggtgatctg cccacctcag cctcccaaag - 7381 tgctgggatt gcaggcgtga gccaccacgc ctggaccact aacttacatt cataaggtgt - 7441 ctgttcactt cactgaacca agactggctt aattctgtag caatacagca attctgtttt - 7501 ttatccatct tcttggatct gtcttctaat caatatgtag gcattgagat acccttgtgc - 7561 taaaagactt caaaaaacag agataaagcc ttcaaaaaac agagatagct tctctcaact - 7621 actattttga tattttactt cctttgtact cagagttata tcctatatgt acactgcctg - 7681 tcagtttggg ttactgtgtg gcatatgttg aacatgaaag atgtgtgttt ctcacatcaa - 7741 cttttatgaa tctggtcctt tttgtagctg gctccaaggc tgcatctctc cactggacta - 7801 gcgagagggt tgtcagtgtt ttgctcctgg gtctgcttcc ggctgcttat ttgaatcctt - 7861 gctctgcgat ggactattcc ctggctgcag ccctcactct tcatggtcac tggcaagtat - 7921 agcaattcca aatatagttg tctgctcagt ttgtttgctg tgagcttgtc ttatgtatta - 7981 tatatgaggg agaagttgat tgaaatgccc taaatttgtt gaaaacttta aaatatatat - 8041 aaaatatgta tatgcctaga tttatatatc tgcctgccta ttcaatgtct ttcaattcga - 8101 tttccgtatc taatagcatt ttgaatgtag cctgtcaaac tgaatttctg atctccccta - 8161 atctgttcct cctgcactct tgccccatct gaattaatgg catttccagt tttctctttg - 8221 ttcaagtcta aacctggtaa gctccaaact tggtcattat ccttgactct agtagtagtc - 8281 tcctaactgg ttcccatttg tgttcattcc caacacagca tacccagtga tcctgttaaa - 8341 atgttaatta gaataagaca ttccccttct caaaacccta ttttgtctct tcatgttact - 8401 caaagtcaga attcttaggg ttgtctgtag actctacatg attgaaccac cctgtttctt - 8461 ccctcatttt tagctgctct ctctctcatt cacactgcct tccttgagat tccttgaaca - 8521 tgccagggac attccaggtc tcagggcttc tcttaactgc ttctgcctga aactcttttt - 8581 ctagctatac acttggcttg ttccttcatc tccttcaggt cttgaatgcc attctctcaa - 8641 caaagccttc cctaaccact tattcaaaat tgcagtttct gtcctgccat tccttatctc - 8701 tcttgcttgt tttatttttc tcaatagtac ttactgcctt ctaatatact acatcatctg - 8761 ttcattattt tatatagtaa ttgcttcctt gcattttatc acccaaatgt atatccctaa - 8821 atagtatagt ttattcttgc ctcttaaaaa aaaaaaagag atgtatgtgg gtggattatt - 8881 ttgttatttt tactgtattt gtttgctgtt gtgcctctag agcctagaac tactctctgg - 8941 tacgtgacat atagcactca gtaaatactt gctgaatgaa tgagcatcca gcgcctgttt - 9001 gtttgtttgt ttgtttgttt attttttgag acagagtctc gctctgttgc ccaggctgga - 9061 gtgcaatggt gtgatctcag ctcaccacaa cctccgcctc ccaggtacaa gcgattctcc - 9121 ctgcctcagc ctcccaagta gctgggatta caggcgtgca ccaccacatc cggctaattt - 9181 ttttgtattt ttagtagaga cggggtttcg ccattttggc caggctggtc tggaactcct - 9241 gacctcaggt gatctgcccg cttcagcctc ctaaagtgct gggattacag gcgtgagcca - 9301 ccatgtccgg cccagctttt gtttatttta tctggtatag tttgtatgac tttctacgta - 9361 atgaaccttc taaaatgagg gtaaatgaca ggttgggttc cttgagttgt tctttttaag - 9421 ttttgcaagg gacaatatgt tgtcattgtg gggggtaggg agccactttc ggattgttga - 9481 aactggaagt actttggaga gtatcttaac caactctttt attttacaga tttggaaact - 9541 aagacccagg gaggttaagt gacttgtcag aatcattttc tagctagtgt taaagatttt - 9601 ttttttcttt tttttttggg acggagtctc actctcaccc aggctggagt gcagtggtgt - 9661 gatcttggtt cactgcaacc tctgcctcct gggttcaagc aattctcatg cctcagcctc - 9721 ccaagcagct gggattacag gcatgcgcca gcacacctgg ctcagttttg tattttaaat - 9781 cgagacgggg tttcaccatg ttgtccaggc tggtcttgga ctcctgacct caagtgatcc - 9841 tcccaccttg gcctcccaaa gtgctgggat tataggtgtg aaccaccgtg cccagcctaa - 9901 aggggtttta attatctggt tttgtaggac tggttaggag aatgagttgt tgttgttgat - 9961 gttgtttttt aaccacacag acctttccaa ataaaagatt ccagttgcat atgaaatatt - 10021 agatcacaag tacagtaagt aatatttctc taacatgtca tccctcttga aggagctgtc - 10081 tataatgtgc tccacttcct caacactgag tctcttttag cctgtattaa ttggggtctt - 10141 atcccaacta tataacttag gttattctta ccaagggcac tgaaggcctt catctttcca - 10201 aatccggttg tctattttct gccctccact tgcttgaagt ctcagccgcc ttcaactcaa - 10261 ttaacaattc tccccataag tcacttttct ttggctttcc agatgcatag aagtctcctc - 10321 tgccagatcc ttctcctctt gtctgacctc gagatgacaa aatctccagg gctgagtcat - 10381 ggccttctta atttctccat ctgtaccctt ttttaggtga gctcagatct gacctgtttt - 10441 tctgagctgc agacttgttt atctaattgt ctaattgaca tccacttgga tgtctgatag - 10501 ttatcccaga tctaacattg gccaaatcgc tcttttttcc ccccaaatct cccttgattt - 10561 ctcctttaaa acccccttct caaagctatg ctcaaactaa aattcttagg agtcattcta - 10621 gatatttctt tttgttctta ccccacttat tccatcagtt ctgttctttc tttaaaaaat - 10681 agtctgaatc ggccaggcgt ggtctcacgc ctgtaatcct agcactttgg gaggccgagg - 10741 cgggcggatc atgaggtcag gagtttgaga ccagcctggc caatatggtg aaaccttgtc - 10801 tctactaata atacaaaaat taagctgggc gtggtggcac acgcctttag tcccagctac - 10861 ttgggagact gaggcagaag aaccgctgga acccaggagg tggaggttgt agtgagccaa - 10921 gatcgtgcca ctgcactcca gcctgggtga cagagtgaga ctctgtctca aaaaaacaaa - 10981 actctgaatc ggtcgagggg aaggggaggg agagcactag gataaatacc taatgcacgc - 11041 ggagcttaaa ccctagatga cgggttgata ggtgcagcaa accaccatgg cacatgtata - 11101 cctgtgtaac aaacctgcac gttcagcaca tgtatcccag aacttaaagt acaagaagaa - 11161 aaaaaaaaag gaaaatggac attattagaa aaaaaaatag tctgaattgt cactttgcac - 11221 cctgtcacca gcctaggtta agacaacagt ctcacgtagg ctacttcggt tgccttctaa - 11281 ctagtttcct cttcagttgt tgctctgtcc cccacccact ctgttcattc atcacactac - 11341 agccatagtg acctttttaa aacgtaaaat tatattcctc caaagtagat atacaaatgg - 11401 ccaataagca catcagaaat tgctcagcat catttgtcat tagagaaatg caaatcaaca - 11461 ccacaatgag atgccacctc acaccccatt aggatggctg taataaaaga tgggcagttt - 11521 taagtgttgc tgaggacttg gagaaattgg aaccctcgtg gttctgctgg gaatataaaa - 11581 cgtttcaatc actttggaaa acattctggc agttacaaca aaaagttaac cttagagtta - 11641 ccatctgact cggcgatcta cttttaggtg tatacccaag agaactgaaa acgtgtccac - 11701 gcaaaaaact tgtacaggaa tgatcatagc agcattattt ataatagcca aagagtggaa - 11761 acaatccaaa tggctgtcag tggatgaata gctaaacaaa atatggtgta ttcatacaat - 11821 agaatattat tcagccatac aaaggaatga tgtattgata aatgctatga catggatgaa - 11881 cattgaaaac attatgctaa gttgacacaa aggccatata ttgtatggct tttttttttt - 11941 tttttttttt tttgagacag agttttgctc ttgtcgctca ggctggagtg caatggtgcg - 12001 atcttggctc ggcacgatct cggctcagca cgatctcggc ttactgcaac ctccgccttt - 12061 cgggttcaag cgattctcct gcctcagcct cccaagtagc tggcattaca ggcatgtgcc - 12121 accacgcctg gctaattttg tattttttgt agagatgggg tttctccttg ttggtcaggc - 12181 tggtcttgaa ctcccaacat caggtgatcc gcccacttca cccacccaaa gtgctgggat - 12241 tacaggtgtg agccaccacg cccagtggtg tatgggttct tttatatgaa atgtctagaa - 12301 caggcaaagc catagagaca gaaagtaaat ttgtagttgg gtagtgggct aagaggtaga - 12361 gggtttcttt ttggggtaat gagaatgttc tggaattaaa tgatgattgc acaactcaat - 12421 atactaaaaa ccagtggatt atatactaaa aagcgtgaat tttttctggc gtttgactta - 12481 cattgcaata aaactgttat aaaaatagta aaattaggtc actcctaatc tcaggcactc - 12541 cctgcttaaa atgtatcagt ggtttcctat tatatttcat ataaaattca gattccttat - 12601 catgaccttc aaagacccta tgcaaccgac ccctgccttc ctctgatctc tgcttcaccc - 12661 atgtgctcca gccacacaaa atcttactat tcattagaga tgccactttg gtttggatgt - 12721 tctcattcat gtcttccttt ctaaagtgtt tcttctcagt aacatatata tttacttttt - 12781 ccttctcatt cactagtata ttccataagg gtagggatgc tgtctcactt actatactcc - 12841 caacatttat gacagattct ggcacatgat atctacttaa taaattcttt ttgaattaat - 12901 taaaaatact gtagagtttt agttattttt agttattggg gcgaaaggat cacttgcagc - 12961 caagagttca agaccagcct gggcatcatg gtgagaccgc atctttaaag aaataaaaaa - 13021 caggccaggc gcagtagctc acgcctgtag tcccaacact ttgggaggct gaggtgggcg - 13081 gatcacaagg tcaggagttc gagaccagcc tggccaatat ggtgaaaccc tgtctctact - 13141 aaaaatacaa aaattagctg ggtgtggtgg tgggcgcctg tagtcccagc tacttgggag - 13201 gctggggcag gagaatcact tgaacccggg aggcggaggt tgtagtgagc cgagatcgtg - 13261 ccactgcact ccagcctggg caaaagagcg agactctgtc tcaataaata aataaataaa - 13321 taaataaata gaaggaaata aaaaacaatt agccgggcat ggaggcatgc tcctatagtc - 13381 ccagctactt gggaggctga ggcaggagga tcacttgagc tcaggagttc gaggctgtag - 13441 tgagctatga tcaggccact gcactccagc gtgggcaaca gagtaagact ttgtctctaa - 13501 agaaaagtca ctagaaatag acttactgac atacacacgc aaaaggctat acagaatccc - 13561 ctaaagaagc aaacagtgac agtggagtgg caaatggaga cattgcattt gaacttgaca - 13621 gattgttttt ttgcagccaa gttatctgta tagtcttcta atttcactgt ggttttttat - 13681 tgatgttatg attttttctt tttctttagg ggccttggac aagttgttac tgactatgtt - 13741 catggggatg ccttgcagaa agctgccaag gcagggcttt tggcactttc agctttaacc - 13801 tttgctgggc tttgctattt caactatcac gatgtgggca tctgcaaagc tgttgccatg - 13861 ctgtggaagc tctgaccttt ttgacttcat actttgaaga attgatgtat gcctctttgc - 13921 ctctgctttg tcatgccatt aagctcacaa taaggaagaa ataacagata agtccattgg - 13981 tggacagcct tcttctctta atcacaagat tattttcaga atttaatctt tgaggaaaag - 14041 gtttgagagg aattatatct aagttgtgag actgagttct atattctggt gagttaatgg - 14101 ggttgcctcc cagcttctta taagactcac agtataacta aacatgatat atcagctttt - 14161 gcctttcaat ttatcaatct cttaaagaga atccaacttt attacgatta gtatatgatc - 14221 aaacttccat atttgccttg ggaataatgg acaaagggaa atactcttaa ttcatgaata - 14281 aaaactttgc agaaaattag acagtgttta attttcgaaa acttccctct ctagacagta - 14341 gataccacct actgatggtt acatatacta gggaaatttt aaaattagga aatgctgata - 14401 gctcatatta taaatttcta aatcctagga agaaacgctt ggagtgcttc tgaatataca - 14461 gaagttccat ttaagggcaa gtttccccgt agatgtatca aaatactacc aactgtaaat - 14521 tgagatttaa ttcccaaatg tattctactt gttctaaaac aatctgtcca caaatataaa - 14581 actataagta ataaattgtt attttcgcac aatgggaatc tctaatgtga aaatgtattc - 14641 tatgaaaata atttttttaa ataaaatgtt atataataaa agtgtcttct atgcttttat - 14701 atattagcta tcagtagttt tattcattag aattaggtgt ccattgcatc cacagcatga - 14761 aaacaaaatt gggttttttt tttttttttg agacggagtc gcactctgtc acccaggttg - 14821 gagtgcagtg gcatgatctc ggagcactgc aagctccgcc tcccaggttc atgtcattct - 14881 cctgcctcag ccccccgagt aactgggatt acaggcaccc accaccacgc ccagctaata - 14941 ttttgtattt tcagtggaca tggggtttca ccgtgttagc aaggatggtc tcgaactgct - 15001 gacctcgtga tccacccatc ttggcctccc agagtgctgg gattacaggc atgagccacc - 15061 ccgcccagcc aaaattgagt gtttaaattt tcttctgggg cttagcagtt aggtaaatca - 15121 gacacaaagt actggaaaac aaaaaggact tgtaataggc agaaaaatac caagtgtata - 15181 agtaatttgg tataaacaga gtataaggtt tagtatttag gaataagaac tatgtccaga - 15241 ttatggtgtt ctgtgaatac cagcctttgg aattcatatt atgttctatg tgtatttagt - 15301 ctggtctaag agttgagtgt aaggcatgat gctgtcttct atcaatatag agtaactggg - 15361 actaagcaca ttttactcac caaggacctt actgggaagc tgaggcaaag gaagagtatg - 15421 tgtttatgaa tgtgtactag gggttaaaga catgctctcc ttgatgtgcc tgtggtcttg - 15481 tagagactca agtgtgcatt agggtttagg aagaggtgat taattctgtt ttggggtggg - 15541 taaaaggttt caaaagaggg tgatgcctga atagcagtgg cacaccaaca gatagtggtg - 15601 gtgggaccag tccaccctgg tgggagagaa tatataaaca atattctgac attgtttaga - 15661 attgctgatg cttgatgatg ataaaaaaca aatggactaa aagtcaattt tattgtttta - 15721 aaattctcta tagacgatcc cctccccacc ttgtagtgtc attccttccc ttccctagct - 15781 ctcagtatac tgctgctgta gagtggagtg atgaaatgga gtcttccata aagtcaaaat - 15841 ggggatggca gtgactctgg ataatatggg aatccatagc tatctctagt accatcagat - 15901 cctagatata caaggctgtc tagtgctaat aagcttcatt agatggtgaa cgaagttgga - 15961 gtggaggctg gttgggcact gcctgcccca tgtccctgct ggtttggtca ttaggatgga - 16021 gctgagctgt ctttgggaag tcagtaggtt gctgtgagct gctggaaaat gaggcaagat - 16081 tccggaaggg tgctgtggct catgcctgta atcccagcac tttgggaggc tgaggcagta - 16141 ggattgcttg aggccaggag tttaaaacca gcgtgggcaa catagcaaga ccctgtctct - 16201 acaaaaatac aataaaacaa aaaaaagatt agccggcatc actagcagtg gcatgtgcct - 16261 gtagttctag ctaacttggg agccgagtgg gagaattgtt tgagtcccag gagtttgaga - 16321 ttacaatgag ctatgatcat accaccgcac tccagcttag gtgacagagt gaaatcttgt - 16381 ctctttaaaa aaaaaaaaaa tccaagggct aacagcaata gtagtttctg acggtaagca - 16441 ggacatgatc agaactgtgc tttagaaata atttgggtac cattaggtgg gttaataagc - 16501 tagaggtaga aggacatgtg aggggataga gacagaataa agggcctgaa ctttgagagt - 16561 agcagtggaa gtaggagtca atctcaaaag gtattgtaga agtcaaaaac attaattcat - 16621 tcatttgata gatactaagt acctcatgta atcagcactg tgccaggtac tgtggataac - 16681 gataaaattt tacctgcctt tgaagggaat tcatagtcta attggggcaa cagaagaatg - 16741 actgcaatac ggtgtagtaa gtgcagtgat ggtgaagcac gtcatgcgct gtgatgtaac - 16801 aaagggtgca gtaaggattt gcctgcaagt ttcctggaag agaagatttg taacctgggt - 16861 tttgaaagat aaggagctga cagtggctgg ggaagctggg atgaggtgta gggttgactg - 16921 gaggatggca gatcaagcaa aagatgatat gtagtttggt atgacccagc acagggaggg - 16981 actgattgta gaggacctta tatgccacga taagtaattg aaactttatc tttttttttt - 17041 ttttttgaga cagagtctga ctctgtggcc caggctggaa tgtagtggcg cgatcttggc - 17101 tcactgcaac ctctgcctcc catactcaag acatcctccc acctcagcct cttgagtagc - 17161 tgggactaca ggcacatgcc accatccctg gctaattttt gtattttttg tagagatggg - 17221 gtcttgctct gttgcccagg ctggtctcaa actcctggcc tcaagtgatt cacccacctc - 17281 cgcctcccaa agggctggga tttcaggcgt gagccactgt gcctggcctg cgactttatc - 17341 ttgacagcaa tatggagtta ctgaagactt aattatgcaa acggcatgat ccaattttta - 17401 ttttaaatag gttttgctga cagctatgta aagaatggat ttgtgggagg taagcatgga - 17461 aacagattat tttagtgacc cagaaaagaa ataatgaatt gtgaaggcaa ttgtagctag - 17521 aatgaaggag agaacaaatt taagaaatat ttagaactta gaatcaatga gagttatact - 17581 aagatgtgaa cgcttgaggg aggaaggagc tttgctcaat tttttgttgt aaataggtgg - 17641 atgggaggta gtgtcattaa agaaaaatgt tgaaggaaaa ttagatatgg tactggaatc - 17701 agtaactact gaagttttgg atttgtgaga cggctaggaa gagatagaat ttgtagactt - 17761 ctattttttg gcaatatggc agactcaatt ttagaaagta tttactttga tatgaagcct - 17821 gttagaatgt taaacaagaa cacctttaaa aatatttcat ttatttattt gggacggagt - 17881 ttcactcttg ttgcccaggc tggagtgcaa tggcatgatc tcggctcact gcaacctccg - 17941 cttcctgggt tcaagcggtt ctcctgcctc aacctcctga gtagctggga ttacaggtgc - 18001 ccacaagcac acctggctaa tttttgtatt tttagtagag atggggtttc agctatgttg - 18061 gccaggctgg tctcaaaact cctgacctca ggtgatccac ccacctcggc ctcccaaagt - 18121 gctgggatta cagaccactg cacccggcca aaacatatta tttagtttgc aaaaagaaat - 18181 cttctggagc cagaccaaag taagtctgca gctagggagg taagcaagtg cttagggtat - 18241 cagttgcccc taagagtatc tactaagccc aggtggccca gagttctagt cttcttcttc - 18301 ttattattat tattatatat atatttttga gacagagtct cgctctgtca cccaggctgg - 18361 agtgcagtgg tgcgatcttg gctcactgca agctccgcct gccaggttca caccattctc - 18421 ccacctcagc ctctggagta gctgggacta caggcgcctg ccaccatgcc cggctaattt - 18481 tgttttcata tttttagtag agacggggtt tcaccgtgtc agccaggatg gtcttgatct - 18541 cctgacctca tgatccgcct gcctcggcct cccaaagtgc taggattaca ggcatgagcc - 18601 accacgccta gccattatta tatttttttg agtcagaatc acgctctgtt gtccaggttg - 18661 gagtgcagtg gcgtgatc -// - diff --git a/tests/data/AB026906.1.gb.bz2 b/tests/data/AB026906.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..b1ce78c47fa86a001123ec7a8517267b1bf5f68b Binary files /dev/null and b/tests/data/AB026906.1.gb.bz2 differ diff --git a/tests/data/AF230870.1.gb.bz2 b/tests/data/AF230870.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..9fa0b2a749dbafd87cd90ad4032dd8f25f7fca8d Binary files /dev/null and b/tests/data/AF230870.1.gb.bz2 differ diff --git a/tests/data/L41870.1.gb.bz2 b/tests/data/L41870.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a097763c3f967f6e991f859433082734c02df119 Binary files /dev/null and b/tests/data/L41870.1.gb.bz2 differ diff --git a/tests/data/LRG_1.xml.bz2 b/tests/data/LRG_1.xml.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..caa51bb8d1dc8ca90fdecf6994b4b4456c6bcdca Binary files /dev/null and b/tests/data/LRG_1.xml.bz2 differ diff --git a/tests/data/NG_008939.1.gb.bz2 b/tests/data/NG_008939.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..e43a28805c3dfd03d81bb57699bf56aba43a6ec2 Binary files /dev/null and b/tests/data/NG_008939.1.gb.bz2 differ diff --git a/tests/data/NG_009105.1.gb.bz2 b/tests/data/NG_009105.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..55ffa72ab9b1639308fa75041384b80f2629f1bf Binary files /dev/null and b/tests/data/NG_009105.1.gb.bz2 differ diff --git a/tests/data/NG_012337.1.gb.bz2 b/tests/data/NG_012337.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..6e7b9156cc3c0592f82b1b0b57006ca99a974e21 Binary files /dev/null and b/tests/data/NG_012337.1.gb.bz2 differ diff --git a/tests/data/NG_012772.1.gb.bz2 b/tests/data/NG_012772.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f84de8136bb5d958966a28ef9a46321670cefb4f Binary files /dev/null and b/tests/data/NG_012772.1.gb.bz2 differ diff --git a/tests/data/NM_000059.3.gb.bz2 b/tests/data/NM_000059.3.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..5750f91b66d31320a68e511c3e4219d0c22ac3a6 Binary files /dev/null and b/tests/data/NM_000059.3.gb.bz2 differ diff --git a/tests/data/NM_000088.3.gb.bz2 b/tests/data/NM_000088.3.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..85a76854d756a2b929f6186f86d53e220d9e8006 Binary files /dev/null and b/tests/data/NM_000088.3.gb.bz2 differ diff --git a/tests/data/NM_000132.3.gb.bz2 b/tests/data/NM_000132.3.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..ee2dea38b44319cf66aa067edc2807e7c05ef542 Binary files /dev/null and b/tests/data/NM_000132.3.gb.bz2 differ diff --git a/tests/data/NM_000143.3.gb.bz2 b/tests/data/NM_000143.3.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..9f2e4b78ed405226779fa56fdfa2ca4d9a1f74a9 Binary files /dev/null and b/tests/data/NM_000143.3.gb.bz2 differ diff --git a/tests/data/NM_000193.2.gb.bz2 b/tests/data/NM_000193.2.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..0ff673494f3bd5f0d6e4e32407e405e9b4b8c280 Binary files /dev/null and b/tests/data/NM_000193.2.gb.bz2 differ diff --git a/tests/data/NM_002001.2.gb.bz2 b/tests/data/NM_002001.2.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..b66995153bf9dacf06407c09dd6fd90525bd902d Binary files /dev/null and b/tests/data/NM_002001.2.gb.bz2 differ diff --git a/tests/data/NM_003002.2.gb.bz2 b/tests/data/NM_003002.2.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..b672d34c93e8758a13da6a34c4f14b3bfe71bcf4 Binary files /dev/null and b/tests/data/NM_003002.2.gb.bz2 differ diff --git a/tests/data/NM_203473.1.gb.bz2 b/tests/data/NM_203473.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..9890fec6f38a2239fb09a10300818f99aeb76fdd Binary files /dev/null and b/tests/data/NM_203473.1.gb.bz2 differ diff --git a/tests/data/NP_064445.1.gb.bz2 b/tests/data/NP_064445.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..0250af7cb52014813aa3a757e5b0f3e3ef9b1d2e Binary files /dev/null and b/tests/data/NP_064445.1.gb.bz2 differ diff --git a/tests/data/UD_139015194859.gb.bz2 b/tests/data/UD_139015194859.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..74a412ed679809021b903d4c31bc2df8d34a0050 Binary files /dev/null and b/tests/data/UD_139015194859.gb.bz2 differ diff --git a/tests/data/UD_139015208095.gb.bz2 b/tests/data/UD_139015208095.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..cb683fb365ae19f5bf8a90e9bc572426fea9553a Binary files /dev/null and b/tests/data/UD_139015208095.gb.bz2 differ diff --git a/tests/data/UD_139015213982.gb.bz2 b/tests/data/UD_139015213982.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a8749e6794a9e3d9457f68c63ab23d57dde3b3d6 Binary files /dev/null and b/tests/data/UD_139015213982.gb.bz2 differ diff --git a/tests/data/UD_139015218717.gb.bz2 b/tests/data/UD_139015218717.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..5babb1b503fb05d1add0aff5a2e98c5713038eb8 Binary files /dev/null and b/tests/data/UD_139015218717.gb.bz2 differ diff --git a/tests/data/UD_139015349377.gb.bz2 b/tests/data/UD_139015349377.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..5f27e7c7fa820d05e22f4132059c4f7a979fa869 Binary files /dev/null and b/tests/data/UD_139015349377.gb.bz2 differ diff --git a/tests/data/UD_139022298843.gb.bz2 b/tests/data/UD_139022298843.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..583bd847a22f3d152c3f31e77ba0ed76197257ad Binary files /dev/null and b/tests/data/UD_139022298843.gb.bz2 differ diff --git a/tests/data/rs9919552.xml.bz2 b/tests/data/rs9919552.xml.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f6ace4bf53a49762fd82562910dafe41715eca3e Binary files /dev/null and b/tests/data/rs9919552.xml.bz2 differ diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 0000000000000000000000000000000000000000..6d6d4ffeba2905f05d68ca04866379b4c7eeab11 --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,641 @@ +""" +Fixtures for unit tests. + +Each fixture is defined by a function which when called sets up the fixture. +The order of calling can be important (e.g., fixtures using the database such +as :func:`hg19` must be called after the :func:`database` fixture). +""" + + +import os +import shutil + +from mutalyzer.config import settings +from mutalyzer.db import Base, session +from mutalyzer.db.models import (Assembly, Chromosome, Reference, + TranscriptMapping, TranscriptProteinLink) + + +#: Reference definitions for use with the :func:`cache` fixture. +REFERENCES = { + 'AB026906.1': {'filename': 'AB026906.1.gb.bz2', + 'checksum': '29b003d5a71af74dc61a92d2ef5cd5d9', + 'geninfo_id': '5295993'}, + 'AL449423.14': {'filename': 'AL449423.14.gb.bz2', + 'checksum': '00a014242818a3b003b4c077af9e10e0', + 'geninfo_id': '16944057'}, + 'NM_000059.3': {'filename': 'NM_000059.3.gb.bz2', + 'checksum': 'f93216b3a596adab279ebd7903258548', + 'geninfo_id': '119395733'}, + 'NM_003002.2': {'filename': 'NM_003002.2.gb.bz2', + 'checksum': '990aa672364937335365609617df3050', + 'geninfo_id': '222352156'}, + 'NG_012772.1': {'filename': 'NG_012772.1.gb.bz2', + 'checksum': '163881f00c9c26516d52a4ddb34f941f', + 'geninfo_id': '256574794', + 'links': [('NM_052818', 'NP_438169'), + ('NM_001079691', 'NP_001073159'), + ('NM_000059', 'NP_000050'), + ('NM_001136571', 'NP_001130043')]}, + 'AA010203.1': {'filename': 'AA010203.1.gb.bz2', + 'checksum': '57cee03becb77ce68a225b9c844afb24', + 'geninfo_id': '1471230'}, + 'NM_000088.3': {'filename': 'NM_000088.3.gb.bz2', + 'checksum': '5d1f23e3c1799bdb5586c6786b5d5744', + 'geninfo_id': '110349771'}, + 'NM_000143.3': {'filename': 'NM_000143.3.gb.bz2', + 'checksum': 'c91799f40fdc0466bf7702af14cf070a', + 'geninfo_id': '299758401'}, + 'NM_002001.2': {'filename': 'NM_002001.2.gb.bz2', + 'checksum': '7fd5aa4fe864fd5193f224fca8cea70d', + 'geninfo_id': '31317229'}, + 'NG_008939.1': {'filename': 'NG_008939.1.gb.bz2', + 'checksum': '114a03e16ad2f63531d796c2fb0d7039', + 'geninfo_id': '211938431', + 'links': [('NM_000532', 'NP_000523')]}, + 'NM_000193.2': {'filename': 'NM_000193.2.gb.bz2', + 'checksum': '86d03e1cf38c1387d90116539ea0678f', + 'geninfo_id': '21071042'}, + 'NP_064445.1': {'filename': 'NP_064445.1.gb.bz2', + 'checksum': '33ea9315882b4a9d8c33018a201be2fa', + 'geninfo_id': '9910526'}, + 'L41870.1': {'filename': 'L41870.1.gb.bz2', + 'checksum': '91b1e539a053f731f95d230a06710897', + 'geninfo_id': '793994'}, + 'NG_009105.1': {'filename': 'NG_009105.1.gb.bz2', + 'checksum': 'f2579e6c4a8ead4566e485aad493ef7e', + 'geninfo_id': '216548283', + 'links': [('NM_020061', 'NP_064445')]}, + 'AF230870.1': {'filename': 'AF230870.1.gb.bz2', + 'checksum': '9fefa34f40d94910edb5de34a3f98910', + 'geninfo_id': '7739657'}, + 'NG_012337.1': {'filename': 'NG_012337.1.gb.bz2', + 'checksum': 'ad712f4f225398d2b11b4f08110c70e6', + 'geninfo_id': '254039638', + 'links': [('NM_018195', 'NP_060665'), + ('NM_001082969', 'NP_001076438'), + ('NM_001082970', 'NP_001076439'), + ('NM_003002', 'NP_002993'), + ('NM_012459', 'NP_036591')]}, + 'NM_203473.1': {'filename': 'NM_203473.1', + 'checksum': 'ec8fbdeda11ef8ec953e4ed39e9a84e5', + 'geninfo_id': '45439330'}, + 'NM_000132.3': {'filename': 'NM_000132.3.gb.bz2', + 'checksum': '94569bee76d7c8b1168e17df4fe1dcb4', + 'geninfo_id': '192448441'}, + 'LRG_1': {'filename': 'LRG_1.xml.bz2', + 'checksum': '5b8f5a39fcd9e3005688eddffd482746'}, + 'DMD': {'accession': 'UD_139015194859', + 'filename': 'UD_139015194859.gb.bz2', + 'checksum': '2cc769c3f636c722142c0aae12662bd4', + 'links': [('NM_000109', 'NP_000100'), + ('NM_004006', 'NP_003997'), + ('NM_004009', 'NP_004000'), + ('NM_004010', 'NP_004001'), + ('NM_004007', None), + ('NM_004011', 'NP_004002'), + ('NM_004012', 'NP_004003'), + ('NM_004023', 'NP_004014'), + ('NM_004020', 'NP_004011'), + ('NM_004022', 'NP_004013'), + ('NM_004021', 'NP_004012'), + ('NM_004013', 'NP_004004'), + ('NM_004014', 'NP_004005'), + ('NM_004018', 'NP_004009'), + ('NM_004017', 'NP_004008'), + ('NM_004016', 'NP_004007'), + ('NM_004015', 'NP_004006'), + ('NM_004019', 'NP_004010')]}, + 'DPYD': {'accession': 'UD_139015208095', + 'filename': 'UD_139015208095.gb.bz2', + 'checksum': 'b2b9d402a6e43f80ce1e9bbb72a3c0c6', + 'links': [('NR_046590', None), + ('XM_005270562', 'XP_005270619'), + ('NM_000110', 'NP_000101'), + ('XM_005270561', 'XP_005270618'), + ('XM_005270563', 'XP_005270620'), + ('XM_005270564', 'XP_005270621'), + ('NM_001160301', 'NP_001153773')]}, + 'MARK1': {'accession': 'UD_139015213982', + 'filename': 'UD_139015213982.gb.bz2', + 'checksum': '0d63a8fe5beddeb793940f6ae194b985', + 'links': [('NM_018650', 'NP_061120'), + ('XM_005273133', None), + ('XM_005273134', 'XP_005273191'), + ('XM_005273135', None), + ('XM_005273136', None)]}, + 'A1BG': {'accession': 'UD_139015218717', + 'filename': 'UD_139015218717.gb.bz2', + 'checksum': 'e179de8b248806815394c4f7496ba872', + 'links': [('NM_001207009', 'NP_001193938'), + ('NM_198458', 'NP_940860'), + ('XM_005258578', 'XP_005258635'), + ('XM_005258577', 'XP_005258634'), + ('NR_015380', None), + ('NM_130786', 'NP_570602'), + ('XM_005258393', 'XP_005258450')]}, + 'chr9_reverse': {'accession': 'UD_139015349377', + 'filename': 'UD_139015349377.gb.bz2', + 'checksum': 'd21f92d09116c4831ce8d3ef832aa281', + 'links': [('NM_001195250', 'NP_001182179'), + ('NR_036576', None), + ('NR_036577', None), + ('NM_001195252', 'NP_001182181'), + ('NM_001195248', 'NP_001182177'), + ('NM_175069', 'NP_778239'), + ('NM_175073', 'NP_778243'), + ('NM_001195251', 'NP_001182180'), + ('NM_001195254', 'NP_001182183'), + ('NR_036578', None), + ('NR_036579', None), + ('NM_001195249', 'NP_001182178')]}, + 'COL1A1': {'accession': 'UD_139022298843', + 'filename': 'UD_139022298843.gb.bz2', + 'checksum': '815517e36fb380b52842ace6a6e78637', + 'links': [('XM_005257059', 'XP_005257116'), + ('XM_005257058', 'XP_005257115'), + ('NM_000088', 'NP_000079')]}} + + +def database(): + """ + Fixture for database table definitions. + """ + Base.metadata.create_all(session.get_bind()) + session.commit() + + +def hg19(): + """ + Fixture for GRCh37/hg19 genome assembly with chromosomes. + """ + assembly = Assembly('GRCh37', 9606, 'Homo sapiens', alias='hg19') + session.add(assembly) + + session.add_all(Chromosome(assembly, name, accession, organelle_type) + for accession, name, organelle_type in [ + ('NC_000001.10', 'chr1', 'chromosome'), + ('NC_000002.11', 'chr2', 'chromosome'), + ('NC_000003.11', 'chr3', 'chromosome'), + ('NC_000004.11', 'chr4', 'chromosome'), + ('NC_000005.9', 'chr5', 'chromosome'), + ('NC_000006.11', 'chr6', 'chromosome'), + ('NC_000007.13', 'chr7', 'chromosome'), + ('NC_000008.10', 'chr8', 'chromosome'), + ('NC_000009.11', 'chr9', 'chromosome'), + ('NC_000010.10', 'chr10', 'chromosome'), + ('NC_000011.9', 'chr11', 'chromosome'), + ('NC_000012.11', 'chr12', 'chromosome'), + ('NC_000013.10', 'chr13', 'chromosome'), + ('NC_000014.8', 'chr14', 'chromosome'), + ('NC_000015.9', 'chr15', 'chromosome'), + ('NC_000016.9', 'chr16', 'chromosome'), + ('NC_000017.10', 'chr17', 'chromosome'), + ('NC_000018.9', 'chr18', 'chromosome'), + ('NC_000019.9', 'chr19', 'chromosome'), + ('NC_000020.10', 'chr20', 'chromosome'), + ('NC_000021.8', 'chr21', 'chromosome'), + ('NC_000022.10', 'chr22', 'chromosome'), + ('NC_000023.10', 'chrX', 'chromosome'), + ('NC_000024.9', 'chrY', 'chromosome'), + ('NT_167244.1', 'chr6_apd_hap1', 'chromosome'), + ('NT_113891.2', 'chr6_cox_hap2', 'chromosome'), + ('NT_167245.1', 'chr6_dbb_hap3', 'chromosome'), + ('NT_167246.1', 'chr6_mann_hap4', 'chromosome'), + ('NT_167247.1', 'chr6_mcf_hap5', 'chromosome'), + ('NT_167248.1', 'chr6_qbl_hap6', 'chromosome'), + ('NT_167249.1', 'chr6_ssto_hap7', 'chromosome'), + ('NT_167250.1', 'chr4_ctg9_hap1', 'chromosome'), + ('NT_167251.1', 'chr17_ctg5_hap1', 'chromosome'), + ('NC_012920.1', 'chrM', 'mitochondrion')]) + + session.commit() + + +def hg19_transcript_mappings(): + """ + Fixture for some selected transcript mappings in the GRCh37/hg19 genome + assembly. Depends on the :func:`hg19` fixture. + """ + chromosome_1 = Chromosome.query.filter_by(accession='NC_000001.10').one() + chromosome_3 = Chromosome.query.filter_by(accession='NC_000003.11').one() + chromosome_6 = Chromosome.query.filter_by(accession='NC_000006.11').one() + chromosome_7 = Chromosome.query.filter_by(accession='NC_000007.13').one() + chromosome_8 = Chromosome.query.filter_by(accession='NC_000008.10').one() + chromosome_11 = Chromosome.query.filter_by(accession='NC_000011.9').one() + chromosome_20 = Chromosome.query.filter_by(accession='NC_000020.10').one() + chromosome_22 = Chromosome.query.filter_by(accession='NC_000022.10').one() + chromosome_x = Chromosome.query.filter_by(accession='NC_000023.10').one() + chromosome_mt = Chromosome.query.filter_by(accession='NC_012920.1').one() + + session.add_all([chromosome_1, chromosome_6, chromosome_8, chromosome_11, + chromosome_20, chromosome_22, chromosome_mt]) + + session.add(TranscriptMapping( + chromosome_11, + 'refseq', + 'NM_003002', + 'SDHD', + 'forward', + 111957571, + 111966518, + [111957571, 111958581, 111959591, 111965529], + [111957683, 111958697, 111959735, 111966518], + 'ncbi', + transcript=1, + cds=(111957632, 111965694), + select_transcript=False, + version=2)) + session.add(TranscriptMapping( + chromosome_11, + 'refseq', + 'NR_028383', + 'TIMM8B', + 'reverse', + 111955524, + 111957522, + [111955524, 111956702, 111957364], + [111956186, 111957034, 111957522], + 'ncbi', + transcript=1, + cds=None, + select_transcript=False, + version=1)) + session.add(TranscriptMapping( + chromosome_6, + 'refseq', + 'NM_000500', + 'CYP21A2', + 'forward', + 32006082, + 32009419, + [32006082, 32006499, 32006871, 32007133, 32007323, 32007526, + 32007782, 32008183, 32008445, 32008646], + [32006401, 32006588, 32007025, 32007234, 32007424, 32007612, + 32007982, 32008361, 32008548, 32009419], + 'ncbi', + transcript=1, + cds=(32006200, 32008911), + select_transcript=False, + version=5)) + session.add(TranscriptMapping( + chromosome_22, + 'refseq', + 'NM_001145134', + 'CPT1B', + 'reverse', + 51007290, + 51017096, + [51007290, 51007765, 51008005, 51008722, 51009320, 51009587, + 51009804, 51010435, 51010632, 51011304, 51011949, 51012764, + 51012922, 51014464, 51014627, 51015286, 51015753, 51016204, + 51016978], + [51007510, 51007850, 51008097, 51008835, 51009472, 51009721, + 51009968, 51010551, 51010737, 51011489, 51012144, 51012848, + 51013029, 51014541, 51014764, 51015463, 51015892, 51016363, + 51017096], + 'ncbi', + transcript=1, + cds=(51007767, 51016344), + select_transcript=False, + version=1)) + session.add(TranscriptMapping( + chromosome_22, + 'refseq', + 'NR_021492', + 'LOC100144603', + 'forward', + 51021455, + 51022356, + [51021455, 51022027], + [51021752, 51022356], + 'ncbi', + transcript=1, + cds=None, + select_transcript=False, + version=1)) + session.add(TranscriptMapping( + chromosome_1, + 'refseq', + 'NM_001007553', + 'CSDE1', + 'reverse', + 115259538, + 115300624, + [115259538, 115261234, 115262200, 115263160, 115266504, 115267842, + 115268832, 115269604, 115272879, 115273129, 115275225, 115276353, + 115276610, 115277063, 115279379, 115280092, 115280584, 115282313, + 115292442, 115300546], + [115260837, 115261366, 115262363, 115263338, 115266623, 115267954, + 115269007, 115269711, 115273043, 115273269, 115275437, 115276478, + 115276738, 115277144, 115279476, 115280184, 115280693, 115282511, + 115292828, 115300624], + 'ncbi', + transcript=1, + cds=(115260790, 115282511), + select_transcript=False, + version=1)) + session.add(TranscriptMapping( + chromosome_1, + 'refseq', + 'NM_001130523', + 'CSDE1', + 'reverse', + 115259538, + 115300671, + [115259538, 115261234, 115262200, 115263160, 115266504, 115267842, + 115268832, 115269604, 115272879, 115273129, 115275225, 115276353, + 115276610, 115277063, 115279379, 115280584, 115282313, 115284148, + 115292442, 115300546], + [115260837, 115261366, 115262363, 115263338, 115266623, 115267954, + 115269007, 115269711, 115273043, 115273269, 115275437, 115276478, + 115276738, 115277144, 115279476, 115280693, 115282511, 115284294, + 115292828, 115300671], + 'ncbi', + transcript=1, + cds=(115260790, 115284285), + select_transcript=False, + version=1)) + session.add(TranscriptMapping( + chromosome_1, + 'refseq', + 'NM_002241', + 'KCNJ10', + 'reverse', + 160007257, + 160040051, + [160007257, 160039812], + [160012322, 160040051], + 'ncbi', + transcript=1, + cds=(160011183, 160012322), + select_transcript=False, + version=4)) + session.add(TranscriptMapping( + chromosome_20, + 'refseq', + 'NM_001162505', + 'TMEM189', + 'reverse', + 48740274, + 48770335, + [48740274, 48744512, 48746083, 48747402, 48760039, 48770054], + [48741716, 48744724, 48746227, 48747484, 48760158, 48770335], + 'ncbi', + transcript=1, + cds=(48741595, 48770174), + select_transcript=False, + version=1)) + session.add(TranscriptMapping( + chromosome_8, + 'refseq', + 'NM_017780', + 'CHD7', + 'forward', + 61591339, + 61779465, + [61591339, 61653818, 61693559, 61707545, 61712947, 61714087, + 61720776, 61728946, 61732566, 61734349, 61734583, 61735062, + 61736399, 61741222, 61742881, 61748632, 61749376, 61750227, + 61750635, 61754203, 61754406, 61757423, 61757809, 61761074, + 61761610, 61763052, 61763591, 61763821, 61764578, 61765057, + 61765388, 61766922, 61768534, 61769004, 61773463, 61774755, + 61775107, 61777575], + [61591641, 61655656, 61693989, 61707686, 61713084, 61714152, + 61720831, 61729060, 61732649, 61734486, 61734704, 61735305, + 61736575, 61741365, 61743136, 61748842, 61749571, 61750394, + 61750814, 61754313, 61754611, 61757622, 61757968, 61761163, + 61761713, 61763181, 61763663, 61763878, 61764806, 61765265, + 61766059, 61767082, 61768761, 61769447, 61773684, 61774895, + 61775211, 61779465], + 'ncbi', + transcript=1, + cds=(61653992, 61778492), + select_transcript=False, + version=2)) + session.add(TranscriptMapping( + chromosome_mt, + 'refseq', + 'NC_012920', + 'ND4', + 'forward', + 10760, + 12137, + [10760], + [12137], + 'reference', + transcript=1, + cds=(10760, 12137), + select_transcript=True, + version=1)) + session.add(TranscriptMapping( + chromosome_1, + 'refseq', + 'NM_002001', + 'FCER1A', + 'forward', + 159259504, + 159278014, + [159259504, 159272096, 159272644, 159273718, 159275778, 159277538], + [159259543, 159272209, 159272664, 159273972, 159276035, 159278014], + 'ncbi', + transcript=1, + cds=(159272155, 159277722), + select_transcript=False, + version=2)) + session.add(TranscriptMapping( + chromosome_7, + 'refseq', + 'XM_001715131', + 'LOC100132858', + 'reverse', + 19828, + 36378, + [19828, 20834, 31060, 32957, 35335, 36224], + [19895, 21029, 31437, 33107, 35541, 36378], + 'ncbi', + transcript=1, + cds=(19828, 36378), + select_transcript=False, + version=2)) + session.add(TranscriptMapping( + chromosome_x, + 'refseq', + 'NM_004011', + 'DMD', + 'reverse', + 31137345, + 32430371, + [31137345, 31144759, 31152219, 31164408, 31165392, 31187560, + 31190465, 31191656, 31196049, 31196786, 31198487, 31200855, + 31222078, 31224699, 31227615, 31241164, 31279072, 31341715, + 31366673, 31462598, 31496223, 31497100, 31514905, 31525398, + 31645790, 31676107, 31697492, 31747748, 31792077, 31838092, + 31854835, 31893305, 31947713, 31950197, 31986456, 32235033, + 32305646, 32328199, 32360217, 32361251, 32364060, 32366523, + 32380905, 32382699, 32383137, 32398627, 32404427, 32407618, + 32408188, 32429869, 32430279], + [31140047, 31144790, 31152311, 31164531, 31165635, 31187718, + 31190530, 31191721, 31196087, 31196922, 31198598, 31201021, + 31222235, 31224784, 31227816, 31241238, 31279133, 31341775, + 31366751, 31462744, 31496491, 31497220, 31515061, 31525570, + 31645979, 31676261, 31697703, 31747865, 31792309, 31838200, + 31854936, 31893490, 31947862, 31950344, 31986631, 32235180, + 32305818, 32328393, 32360399, 32361403, 32364197, 32366645, + 32381075, 32382827, 32383316, 32398797, 32404582, 32407791, + 32408298, 32430030, 32430371], + 'ncbi', + transcript=1, + cds=(31140036, 32430326), + select_transcript=False, + version=3)) + session.add(TranscriptMapping( + chromosome_x, + 'refseq', + 'NM_004019', + 'DMD', + 'reverse', + 31196312, + 31285024, + [31196312, 31198487, 31200855, 31222078, 31224699, 31227615, + 31241164, 31279072, 31284927], + [31196922, 31198598, 31201021, 31222235, 31224784, 31227816, + 31241238, 31279133, 31285024], + 'ncbi', + transcript=1, + cds=(31196782, 31284946), + select_transcript=False, + version=2)) + session.add(TranscriptMapping( + chromosome_x, + 'refseq', + 'NM_004007', + 'DMD', + 'reverse', + 31137345, + 33038317, + [31137345, 31144759, 31152219, 31164408, 31165392, 31187560, + 31190465, 31191656, 31196049, 31196786, 31198487, 31200855, + 31222078, 31224699, 31227615, 31241164, 31279072, 31341715, + 31366673, 31462598, 31496223, 31497100, 31514905, 31525398, + 31645790, 31676107, 31697492, 31747748, 31792077, 31838092, + 31854835, 31893305, 31947713, 31950197, 31986456, 32235033, + 32305646, 32328199, 32360217, 32361251, 32364060, 32366523, + 32380905, 32382699, 32383137, 32398627, 32404427, 32407618, + 32408188, 32429869, 32456358, 32459297, 32466573, 32472779, + 32481556, 32482703, 32486615, 32490281, 32503036, 32509394, + 32519872, 32536125, 32563276, 32583819, 32591647, 32591862, + 32613874, 32632420, 32662249, 32663081, 32715987, 32717229, + 32827610, 32834585, 32841412, 32862900, 32867845, 33038256], + [31140047, 31144790, 31152311, 31164531, 31165635, 31187718, + 31190530, 31191721, 31196087, 31196922, 31198598, 31201021, + 31222235, 31224784, 31227816, 31241238, 31279133, 31341775, + 31366751, 31462744, 31496491, 31497220, 31515061, 31525570, + 31645979, 31676261, 31697703, 31747865, 31792309, 31838200, + 31854936, 31893490, 31947862, 31950344, 31986631, 32235180, + 32305818, 32328393, 32360399, 32361403, 32364197, 32366645, + 32381075, 32382827, 32383316, 32398797, 32404582, 32407791, + 32408298, 32430030, 32456507, 32459431, 32466755, 32472949, + 32481711, 32482816, 32486827, 32490426, 32503216, 32509635, + 32519959, 32536248, 32563451, 32583998, 32591754, 32591963, + 32613993, 32632570, 32662430, 32663269, 32716115, 32717410, + 32827728, 32834757, 32841504, 32862977, 32867937, 33038317], + 'ncbi', + transcript=1, + cds=(31140036, 32834745), + select_transcript=False, + version=2)) + session.add(TranscriptMapping( + chromosome_x, + 'refseq', + 'NM_203473', + 'PORCN', + 'forward', + 48367371, + 48379202, + [48367371, 48368172, 48369683, 48370280, 48370714, 48370977, + 48371223, 48372628, 48372913, 48374105, 48374278, 48374449, + 48375571, 48378763], + [48367491, 48368344, 48369875, 48370323, 48370895, 48371107, + 48371240, 48372753, 48373013, 48374181, 48374341, 48374534, + 48375681, 48379202], + 'ncbi', + transcript=1, + cds=(48368209, 48378864), + select_transcript=False, + version=1)) + session.add(TranscriptMapping( + chromosome_x, + 'refseq', + 'NM_000132', + 'F8', + 'reverse', + 154064063, + 154250998, + [154064063, 154088707, 154089993, 154091358, 154124352, 154128141, + 154129646, 154130326, 154132181, 154132571, 154133086, 154134695, + 154156846, 154175973, 154182167, 154185232, 154189350, 154194245, + 154194701, 154197606, 154212962, 154215512, 154221211, 154225248, + 154227754, 154250685], + [154066027, 154088883, 154090141, 154091502, 154124507, 154128226, + 154129717, 154130442, 154132363, 154132799, 154133298, 154134848, + 154159951, 154176182, 154182317, 154185446, 154189443, 154194416, + 154194962, 154197827, 154213078, 154215580, 154221423, 154225370, + 154227875, 154250998], + 'ncbi', + transcript=1, + cds=(154065872, 154250827), + select_transcript=False, + version=3)) + session.add(TranscriptMapping( + chromosome_3, + 'refseq', + 'NM_000249', + 'MLH1', + 'forward', + 37034841, + 37092337, + [37034841, 37038110, 37042446, 37045892, 37048482, 37050305, + 37053311, 37053502, 37055923, 37058997, 37061801, 37067128, + 37070275, 37081677, 37083759, 37089010, 37090008, 37090395, + 37091977], + [37035154, 37038200, 37042544, 37045965, 37048554, 37050396, + 37053353, 37053590, 37056035, 37059090, 37061954, 37067498, + 37070423, 37081785, 37083822, 37089174, 37090100, 37090508, + 37092337], + 'ncbi', + transcript=1, + cds=(37035039, 37092144), + select_transcript=False, + version=3)) + + session.commit() + + +def cache(*references): + """ + Returns a cache fixture for the given references. + """ + def cache_with_references(): + for reference in references: + entry = REFERENCES[reference] + try: + accession = entry['accession'] + except KeyError: + accession = reference + geninfo_id = entry.get('geninfo_id') + + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + entry['filename']) + shutil.copy(path, settings.CACHE_DIR) + + session.add(Reference(accession, entry['checksum'], + geninfo_identifier=geninfo_id)) + + for transcript, protein in entry.get('links', []): + session.add(TranscriptProteinLink(transcript, protein)) + + session.commit() + + return cache_with_references diff --git a/tests/test_crossmap.py b/tests/test_crossmap.py index 2fdeffadb4ed6ae843623cd4f5273326b72bf6d8..d15009994edc64a6a64dd8d86528f8c162e57446 100644 --- a/tests/test_crossmap.py +++ b/tests/test_crossmap.py @@ -8,19 +8,13 @@ from nose.tools import * from mutalyzer.Crossmap import Crossmap -import utils +from utils import MutalyzerTest -class TestCrossmap(): +class TestCrossmap(MutalyzerTest): """ Test the Crossmap class. """ - def setup(self): - utils.create_test_environment(database=True) - - def teardown(self): - utils.destroy_environment() - def test_splice_sites(self): """ Check whether the gene on the forward strand has the right splice diff --git a/tests/test_describe.py b/tests/test_describe.py index c922deed7a4e71a559c09875c2d1459c4eb49d75..8a7967a29f615007a460e09158d79fb7e0797a1d 100644 --- a/tests/test_describe.py +++ b/tests/test_describe.py @@ -10,19 +10,13 @@ from nose.tools import * import mutalyzer from mutalyzer import describe -import utils +from utils import MutalyzerTest -class TestDescribe(): +class TestDescribe(MutalyzerTest): """ Test the mytalyzer.describe module. """ - def setup(self): - utils.create_test_environment() - - def teardown(self): - utils.destroy_environment() - def test1(self): """ Test 1. diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 3253ad773cba96293a6e6cbfb1e7c8eb06581e7c..f9bf9ec9c637355bfa8abcef5a73afc353b8b38f 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -11,24 +11,18 @@ import mutalyzer from mutalyzer.grammar import Grammar from mutalyzer.output import Output -import utils +from utils import MutalyzerTest -class TestGrammar(): +class TestGrammar(MutalyzerTest): """ Test the mytalyzer.grammar module. """ def setup(self): - """ - Initialize test Grammar instance. - """ - utils.create_test_environment() + super(TestGrammar, self).setup() self.output = Output(__file__) self.grammar = Grammar(self.output) - def teardown(self): - utils.destroy_environment() - def _parse(self, description): """ Parse a variant description. diff --git a/tests/test_mapping.py b/tests/test_mapping.py index 4287d2f49e3654a4895c22c5e8f1d3f46eb2d8a9..4b2af7d4107dd44309012ee0f40ebbd5dcd2d8f1 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -5,33 +5,34 @@ Tests for the mapping module. #import logging; logging.basicConfig() from nose.tools import * +from sqlalchemy import or_ from mutalyzer.db.models import Assembly from mutalyzer.output import Output from mutalyzer.mapping import Converter -import utils +from fixtures import database, hg19, hg19_transcript_mappings +from utils import MutalyzerTest -class TestConverter(): +class TestConverter(MutalyzerTest): """ Test the Converter class. """ + fixtures = (database, hg19, hg19_transcript_mappings) + def setup(self): - """ - Initialize test converter module. - """ - utils.create_test_environment(database=True) + super(TestConverter, self).setup() self.output = Output(__file__) - def teardown(self): - utils.destroy_environment() - - def _converter(self, build): + def _converter(self, assembly_name_or_alias): """ - Create a Converter instance for a given build. + Create a Converter instance for a given genome assembly. """ - assembly = Assembly.query.first() + assembly = Assembly.query \ + .filter(or_(Assembly.name == assembly_name_or_alias, + Assembly.alias == assembly_name_or_alias)) \ + .one() return Converter(assembly, self.output) def test_converter(self): @@ -80,6 +81,11 @@ class TestConverter(): See also bug #58. """ + # Todo: This test is bogus now that we use a fixture that has just the + # mapping to chromosome 6. However, I think we only get this mapping + # from our current source (NCBI seq_gene.md) anyway, so I'm not sure + # where we got the other mappings from in the past (but haven't + # investigated really). converter = self._converter('hg19') genomic = converter.c2chrom('NM_000500.5:c.92C>T') assert_equal(genomic, 'NC_000006.11:g.32006291C>T') @@ -105,15 +111,10 @@ class TestConverter(): is exactly on the border of an exon. Bug reported February 24, 2012 by S Venkata Suresh Kumar. - - Note: You need the full hg18 and hg19 databases for these tests to - pass (i.e. the one used on the production server, possibly - updated with newer mappings from the NCBI). """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000001.10:g.115259837_115259837delT', 'list') assert 'NM_001007553.1:c.3863delA' not in coding - assert 'NM_001007553.2:c.3863delA' not in coding assert 'NM_001007553.1:c.*953delA' in coding assert 'NM_001130523.1:c.*953delA' in coding @@ -127,9 +128,7 @@ class TestConverter(): converter = self._converter('hg19') coding = converter.chrom2c('NC_000001.10:g.160012314_160012329del16', 'list') assert 'NM_002241.4:c.-27250-7_-27242del16' not in coding - #assert 'NM_002241.3:c.-27340-7_-27332del16' not in coding assert 'NM_002241.4:c.1-7_9del16' in coding - #assert 'NM_002241.3:c.1-7_9del16' in coding def test_range_order_forward_correct(self): """ diff --git a/tests/test_mutator.py b/tests/test_mutator.py index cc15f0e3e91b38a03994782fad51aef09a9eb9d8..43117c693df524ef43bd4ea3c839417e570c8043 100644 --- a/tests/test_mutator.py +++ b/tests/test_mutator.py @@ -11,11 +11,10 @@ from nose.tools import * from Bio.Seq import Seq import mutalyzer -from mutalyzer.util import skip from mutalyzer.output import Output from mutalyzer import mutator -import utils +from utils import MutalyzerTest def _seq(length): @@ -28,20 +27,14 @@ def _seq(length): return Seq(sequence) -class TestMutator(): +class TestMutator(MutalyzerTest): """ Test the mutator module. """ def setup(self): - """ - Initialize test mutator module. - """ - utils.create_test_environment() + super(TestMutator, self).setup() self.output = Output(__file__) - def teardown(self): - utils.destroy_environment() - def _mutator(self, sequence): """ Create a Mutator instance for a given sequence. @@ -202,13 +195,14 @@ class TestMutator(): m.deletion(12, 13) # g.12_13del assert_equal(m.shift_sites(sites), [4, 9, 12, 15, 23, 25]) - @skip def test_shift_sites_acc_del2_on(self): """ Deletion of 2 in intron/exon. @note: This hits a splice site, so we don't really support it. """ + return + l = 30 sites = [4, 9, 14, 17, 25, 27] m = self._mutator(_seq(l)) @@ -235,13 +229,14 @@ class TestMutator(): m.deletion(16, 17) # g.16_17del assert_equal(m.shift_sites(sites), [4, 9, 14, 15, 23, 25]) - @skip def test_shift_sites_don_del2_on(self): """ Deletion of 2 in exon/intron. @note: This hits a splice site, so we don't really support it. """ + return + l = 30 sites = [4, 9, 14, 17, 25, 27] m = self._mutator(_seq(l)) @@ -595,7 +590,6 @@ class TestMutator(): m.deletion(16, 17) # g.16_17del assert_equal(m.shift_sites(sites), [4, 9, 10, 15, 16, 25]) - @skip def test_shift_sites_adj_del2_on(self): """ Adjacent exons: deletion of 2 at exon/exon boundary. @@ -603,6 +597,8 @@ class TestMutator(): @todo: This is a special case of bug #????. Once fixed, the two exons will be joined to one new exon. """ + return + l = 30 sites = [4, 9, 10, 17, 18, 27] m = self._mutator(_seq(l)) diff --git a/tests/test_parsers_genbank.py b/tests/test_parsers_genbank.py index ec8083b04c34c0394dfc4bdada5cf503f81bbf5d..d66ad1af3d519a615cd6b57b7137829c51da048a 100644 --- a/tests/test_parsers_genbank.py +++ b/tests/test_parsers_genbank.py @@ -8,23 +8,17 @@ from nose.tools import * from mutalyzer.parsers import genbank -import utils +from utils import MutalyzerTest -class TestMutator(): +class TestMutator(MutalyzerTest): """ Test the mutator module. """ def setup(self): - """ - Initialize test mutator module. - """ - utils.create_test_environment(database=True) + super(TestMutator, self).setup() self.gb_parser = genbank.GBparser() - def teardown(self): - utils.destroy_environment() - def test_product_lists_mismatch(self): """ Test finding mismatches in some product lists. diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 9c7467f2576b2fb458887c38a18273e556675dde..1fc3d5eeb95658330dc2895bf5e3d36f6ee5e1fb 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -3,10 +3,13 @@ Tests for the Scheduler module. """ +import bz2 import os import StringIO #import logging; logging.basicConfig() +from Bio import Entrez +from mock import patch from nose.tools import * from mutalyzer.config import settings @@ -15,18 +18,16 @@ from mutalyzer import File from mutalyzer import output from mutalyzer import Scheduler -import utils +from fixtures import database, cache +from utils import MutalyzerTest +from utils import fix -class TestScheduler(): +class TestScheduler(MutalyzerTest): """ Test the Scheduler class. """ - def setup(self): - utils.create_test_environment(database=True) - - def teardown(self): - utils.destroy_environment() + fixtures = (database, ) @staticmethod def _batch_job(variants, expected, job_type, argument=None): @@ -36,23 +37,19 @@ class TestScheduler(): batch_file = StringIO.StringIO('\n'.join(variants) + '\n') job, columns = file_instance.parseBatchFile(batch_file) result_id = scheduler.addJob('test@test.test', job, columns, - None, job_type, argument) + job_type) + + batch_job = BatchJob.query.filter_by(result_id=result_id).one() - left = BatchQueueItem.query \ - .join(BatchJob) \ - .filter_by(result_id=result_id) \ - .count() + left = batch_job.batch_queue_items.count() assert_equal(left, len(variants)) scheduler.process() - left = BatchQueueItem.query \ - .join(BatchJob) \ - .filter_by(result_id=result_id) \ - .count() + left = batch_job.batch_queue_items.count() assert_equal(left, 0) - filename = 'Results_%s.txt' % result_id + filename = 'batch-job-%s.txt' % result_id result = open(os.path.join(settings.CACHE_DIR, filename)) next(result) # Header. @@ -69,8 +66,9 @@ class TestScheduler(): 'OK'], ['AL449423.14(CDKN2A_v002):c.5_400del', 'OK']] - self._batch_job(variants, expected, 'SyntaxChecker') + self._batch_job(variants, expected, 'syntax-checker') + @fix(cache('AB026906.1', 'NM_000059.3')) def test_name_checker(self): """ Simple name checker batch job. @@ -114,7 +112,7 @@ class TestScheduler(): 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', '', 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] - self._batch_job(variants, expected, 'NameChecker') + self._batch_job(variants, expected, 'name-checker') def test_name_checker_altered(self): """ @@ -179,8 +177,21 @@ class TestScheduler(): 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', '', 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] - self._batch_job(variants, expected, 'NameChecker') + # Patch GenBankRetriever.fetch to return the contents of NM_000059.3 + # for NM_000059. + def mock_efetch(*args, **kwargs): + if kwargs.get('id') != 'NM_000059': + return Entrez.efetch(*args, **kwargs) + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'NM_000059.3.gb.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + self._batch_job(variants, expected, 'name-checker') + + @fix(cache('NM_000059.3')) def test_name_checker_skipped(self): """ Name checker job with skipped entries. @@ -211,4 +222,12 @@ class TestScheduler(): 'NM_000059.3(BRCA2_i001):p.(Asp224Tyr)', '', 'BspHI,CviAII,FatI,Hpy188III,NlaIII']] - self._batch_job(variants, expected, 'NameChecker') + + # Patch GenBankRetriever.fetch to fail on NM_1234567890.3. + def mock_efetch(*args, **kwargs): + if kwargs.get('id') != 'NM_1234567890.3': + return Entrez.efetch(*args, **kwargs) + raise IOError() + + with patch.object(Entrez, 'efetch', mock_efetch): + self._batch_job(variants, expected, 'name-checker') diff --git a/tests/test_services_json.py b/tests/test_services_json.py index 9b2f4e8051bbf8baa5c061d4f2019205325c9cfa..f5b1a42c902f11ad78dc91ca3ed338c3a638ed1a 100644 --- a/tests/test_services_json.py +++ b/tests/test_services_json.py @@ -9,7 +9,9 @@ from spyne.server.null import NullServer import mutalyzer from mutalyzer.services.json import application -import utils +from fixtures import database, hg19, hg19_transcript_mappings +from utils import MutalyzerTest +from utils import fix # Todo: We currently have no way of testing POST requests to the JSON API. We @@ -20,20 +22,14 @@ import utils # [2] https://github.com/LUMC/spyne/commit/58660dec28d47b1c3bf1e46d20f55a913ad036cd -class TestServicesJson(): +class TestServicesJson(MutalyzerTest): """ Test the Mutalyzer HTTP/RPC+JSON interface. """ def setup(self): - """ - Initialize test server. - """ - utils.create_test_environment(database=True) + super(TestServicesJson, self).setup() self.server = NullServer(application, ostr=True) - def teardown(self): - utils.destroy_environment() - def _call(self, method, *args, **kwargs): r = getattr(self.server.service, method)(*args, **kwargs) return json.loads(''.join(r)) @@ -64,6 +60,7 @@ class TestServicesJson(): #assert_equal(r['faultcode'], 'Client.ValidationError') pass + @fix(database, hg19, hg19_transcript_mappings) def test_transcriptinfo_valid(self): """ Running transcriptInfo with valid arguments should get us a Transcript diff --git a/tests/test_services_soap.py b/tests/test_services_soap.py index 13d0335c3e025413105dec6e614d475dce60c456..53344ca56e89efebfc8dc5204b51c0b1e57aa15b 100644 --- a/tests/test_services_soap.py +++ b/tests/test_services_soap.py @@ -3,24 +3,30 @@ Tests for the SOAP interface to Mutalyzer. """ +import bz2 import datetime import logging import os import tempfile -import time +from Bio import Entrez +from mock import patch from nose.tools import * from spyne.server.null import NullServer from spyne.model.fault import Fault from suds.client import Client import mutalyzer +from mutalyzer.config import settings from mutalyzer.output import Output from mutalyzer.services.soap import application from mutalyzer.sync import CacheSync -from mutalyzer.util import skip, slow +from mutalyzer import Scheduler + +from fixtures import database, cache, hg19, hg19_transcript_mappings +from utils import MutalyzerTest +from utils import fix -import utils # Suds logs an awful lot of things with level=DEBUG, including entire WSDL # files and SOAP responses. On any error, this is all dumped to the console, @@ -44,15 +50,12 @@ def _write_wsdl(server): return wsdl_filename -class TestServicesSoap(): +class TestServicesSoap(MutalyzerTest): """ Test the Mutalyzer SOAP interface. """ def setup(self): - """ - Initialize test server. - """ - utils.create_test_environment(database=True) + super(TestServicesSoap, self).setup() self.server = NullServer(application, ostr=True) # Unfortunately there's no easy way to just give a SUDS client a # complete WSDL string, it only accepts a URL to it. So we create one. @@ -60,10 +63,7 @@ class TestServicesSoap(): self.client = Client('file://%s' % self.wsdl, cache=None) def teardown(self): - """ - Remove temporary file used for WSDL. - """ - utils.destroy_environment() + super(TestServicesSoap, self).teardown() os.unlink(self.wsdl) def _call(self, method, *args, **kwargs): @@ -106,6 +106,7 @@ class TestServicesSoap(): # See https://github.com/arskom/spyne/issues/318 self._call('checkSyntax') + @fix(database, hg19, hg19_transcript_mappings) def test_transcriptinfo_valid(self): """ Running transcriptInfo with valid arguments should get us a Transcript @@ -117,6 +118,7 @@ class TestServicesSoap(): assert_equal(r.trans_stop, 1066) assert_equal(r.CDS_stop, 774) + @fix(database, hg19, hg19_transcript_mappings) def test_numberconversion_gtoc_valid(self): """ Running numberConversion with valid g variant should give a list of @@ -127,6 +129,7 @@ class TestServicesSoap(): assert_equal(type(r.string), list) assert 'NM_002001.2:c.1del' in r.string + @fix(database, hg19, hg19_transcript_mappings) def test_numberconversion_ctog_valid(self): """ Running numberConversion with valid c variant should give a list of @@ -137,22 +140,20 @@ class TestServicesSoap(): assert_equal(type(r.string), list) assert 'NC_000001.10:g.159272155del' in r.string + @fix(database, hg19, hg19_transcript_mappings) def test_numberconversion_gtoc_gene(self): """ Running numberConversion with valid g variant and a gene name should give a list of c variant names on transcripts for the given gene. """ r = self._call('numberConversion', - build='hg19', variant='NC_000011.9:g.111959693G>T', gene='C11orf57') + build='hg19', variant='NC_000023.10:g.32827640G>A', gene='DMD') assert_equal(type(r.string), list) - # Fix for r536: disable the -u and +d convention. - #assert 'NM_001082969.1:c.*2178+d3819G>T' in r.string - #assert 'NM_001082970.1:c.*2178+d3819G>T' in r.string - #assert 'NM_018195.3:c.*2178+d3819G>T' in r.string - assert 'NM_001082969.1:c.*5997G>T' in r.string - assert 'NM_001082970.1:c.*5997G>T' in r.string - assert 'NM_018195.3:c.*5997G>T' in r.string + assert 'NM_004007.2:c.250C>T' in r.string + assert 'NM_004011.3:c.-397314C>T' in r.string + assert 'NM_004019.2:c.-1542694C>T' in r.string + @fix(database, hg19, hg19_transcript_mappings) def test_numberconversion_gtoc_no_transcripts(self): """ Running numberConversion with valid g variant but no transcripts @@ -162,6 +163,7 @@ class TestServicesSoap(): build='hg19', variant='chr7:g.345T>C') assert_false(r) + @fix(database, hg19, hg19_transcript_mappings) def test_numberconversion_gtoc_required_gene(self): """ Running numberConversion with valid g variant but no transcripts @@ -175,6 +177,7 @@ class TestServicesSoap(): #assert 'XM_001715131.2:c.1155+d19483A>G' in r.string assert 'XM_001715131.2:c.*19483A>G' in r.string + @fix(database, hg19, hg19_transcript_mappings) def test_gettranscriptsbygenename_valid(self): """ Running getTranscriptsByGeneName with valid gene name should give a @@ -183,15 +186,12 @@ class TestServicesSoap(): r = self._call('getTranscriptsByGeneName', build='hg19', name='DMD') assert_equal(type(r.string), list) - for t in ['NM_004006.2', - 'NM_000109.3', - 'NM_004021.2', - 'NM_004009.3', - 'NM_004007.2', - 'NM_004018.2', - 'NM_004022.2']: + for t in ['NM_004011.3', + 'NM_004019.2', + 'NM_004007.2']: assert t in r.string + @fix(database, hg19, hg19_transcript_mappings) def test_gettranscriptsbygenename_invalid(self): """ Running getTranscriptsByGeneName with invalid gene name should not @@ -201,6 +201,7 @@ class TestServicesSoap(): build='hg19', name='BOGUSGENE') assert_false(r) + @fix(database, cache('AF230870.1')) def test_gettranscriptsandinfo_valid(self): """ Running getTranscriptsAndInfo with a valid genomic reference should @@ -213,7 +214,7 @@ class TestServicesSoap(): 'mtmB2_v001']: assert t in names - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database, cache('AL449423.14')) def test_gettranscriptsandinfo_restricted_valid(self): """ Running getTranscriptsAndInfo with a valid genomic reference and a @@ -232,21 +233,22 @@ class TestServicesSoap(): 'C9orf53_v001']: assert_false(t in names) + @fix(database, hg19, hg19_transcript_mappings) def test_gettranscriptsmapping(self): """ Running getTranscriptsMapping should give a list of TranscriptMappingInfo objects. """ r = self._call('getTranscriptsMapping', - 'hg19', 'chr16', 70680470, 70807150, 1) + 'hg19', 'chrX', 31200000, 31210000, 1) assert_equal(type(r.TranscriptMappingInfo), list) names = [t.name for t in r.TranscriptMappingInfo] - for t in ('NM_152456', - 'NM_138383', - 'NM_018052', - 'NR_034083'): + for t in ('NM_004011', + 'NM_004019', + 'NM_004007'): assert t in names + @fix(database, hg19, hg19_transcript_mappings) def test_mappinginfo(self): """ Running mappingInfo should give a Mapping object. @@ -261,61 +263,68 @@ class TestServicesSoap(): assert_equal(r.startmain, 1388) assert_equal(r.endmain, 1388) + @fix(database, hg19, hg19_transcript_mappings) def test_mappinginfo(self): """ Running mappingInfo should give a Mapping object. """ r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_001008541.1', 'g.112039014G>T') + '3.0-beta-06', 'hg19', 'NM_002001.2', 'g.159272168G>T') assert_equal(r.endoffset, 0) - assert_equal(r.start_g, 112039014) + assert_equal(r.start_g, 159272168) assert_equal(r.startoffset, 0) assert_equal(r.mutationType, 'subst') - assert_equal(r.end_g, 112039014) - assert_equal(r.startmain, 175) - assert_equal(r.endmain, 175) + assert_equal(r.end_g, 159272168) + assert_equal(r.startmain, 14) + assert_equal(r.endmain, 14) + @fix(database, hg19, hg19_transcript_mappings) def test_mappinginfo_compound(self): """ - Running mappingInfo with compound variant should give a Mapping object. + Running mappingInfo with compound variant should give a Mapping + object. """ r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_001008541.1', 'g.[112039014G>T;112039018T>A]') + '3.0-beta-06', 'hg19', 'NM_002001.2', 'g.[159272168G>T;159272174T>A]') assert_equal(r.endoffset, 0) - assert_equal(r.start_g, 112039014) + assert_equal(r.start_g, 159272168) assert_equal(r.startoffset, 0) assert_equal(r.mutationType, 'compound') - assert_equal(r.end_g, 112039018) - assert_equal(r.startmain, 175) - assert_equal(r.endmain, 179) + assert_equal(r.end_g, 159272174) + assert_equal(r.startmain, 14) + assert_equal(r.endmain, 20) + @fix(database, hg19, hg19_transcript_mappings) def test_mappinginfo_reverse(self): """ - Running mappingInfo on a reverse transcript should give a Mapping object. + Running mappingInfo on a reverse transcript should give a Mapping + object. """ r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_000035.3', 'g.104184170_104184179del') + '3.0-beta-06', 'hg19', 'NM_004011.3', 'g.31152229_31152239del') assert_equal(r.endoffset, 0) - assert_equal(r.start_g, 104184170) + assert_equal(r.start_g, 31152229) assert_equal(r.startoffset, 0) assert_equal(r.mutationType, 'del') - assert_equal(r.end_g, 104184179) - assert_equal(r.startmain, 1016) - assert_equal(r.endmain, 1007) + assert_equal(r.end_g, 31152239) + assert_equal(r.startmain, 6981) + assert_equal(r.endmain, 6971) + @fix(database, hg19, hg19_transcript_mappings) def test_mappinginfo_compound_reverse(self): """ - Running mappingInfo with compound variant on a reverse transcript should give a Mapping object. + Running mappingInfo with compound variant on a reverse transcript + should give a Mapping object. """ r = self._call('mappingInfo', - '3.0-beta-06', 'hg19', 'NM_000035.3', 'g.[104184170_104184179del;104184182_104184183del]') + '3.0-beta-06', 'hg19', 'NM_004011.3', 'g.[31152229_31152232del;31152235_31152239del]') assert_equal(r.endoffset, 0) - assert_equal(r.start_g, 104184170) + assert_equal(r.start_g, 31152229) assert_equal(r.startoffset, 0) assert_equal(r.mutationType, 'compound') - assert_equal(r.end_g, 104184183) - assert_equal(r.startmain, 1016) - assert_equal(r.endmain, 1003) + assert_equal(r.end_g, 31152239) + assert_equal(r.startmain, 6981) + assert_equal(r.endmain, 6971) def test_info(self): """ @@ -325,6 +334,7 @@ class TestServicesSoap(): assert_equal(type(r.versionParts.string), list) assert_equal(r.version, mutalyzer.__version__) + @fix(database, cache('AB026906.1', 'AL449423.14', 'NM_003002.2')) def test_getcache(self): """ Running the getCache method should give us the expected number of @@ -334,23 +344,31 @@ class TestServicesSoap(): output = Output(__file__) sync = CacheSync(output) - cache = sync.local_cache(created_since) r = self._call('getCache', created_since) - if len(cache) > 0: - assert_equal(len(r.CacheEntry), len(cache)) + assert_equal(len(r.CacheEntry), 3) def test_getdbsnpdescriptions(self): """ Running getdbSNPDescriptions method should give us the expected HGVS descriptions for the given dbSNP id. """ - r = self._call('getdbSNPDescriptions', 'rs9919552') + # Patch Retriever.snpConvert to return rs9919552. + def mock_efetch(*args, **kwargs): + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'rs9919552.xml.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + r = self._call('getdbSNPDescriptions', 'rs9919552') + assert 'NC_000011.9:g.111959625C>T' in r.string assert 'NG_012337.2:g.7055C>T' in r.string assert 'NM_003002.3:c.204C>T' in r.string assert 'NP_002993.1:p.Ser68=' in r.string + @fix(database, hg19, hg19_transcript_mappings) def test_gettranscripts(self): """ Running getTranscripts should give a list of transcripts. @@ -358,15 +376,11 @@ class TestServicesSoap(): r = self._call('getTranscripts', build='hg19', chrom='chrX', pos=32237295) assert_equal(type(r.string), list) - for t in ['NM_000109', - 'NM_004006', - 'NM_004007', - 'NM_004009', - 'NM_004010', - 'NM_004011', - 'NM_004012']: + for t in ['NM_004011', + 'NM_004007']: assert t in r.string + @fix(database, hg19, hg19_transcript_mappings) def test_gettranscripts_with_versions(self): """ Running getTranscripts with versions=True should give a list @@ -375,15 +389,11 @@ class TestServicesSoap(): r = self._call('getTranscripts', build='hg19', chrom='chrX', pos=32237295, versions=True) assert_equal(type(r.string), list) - for t in ['NM_000109.3', - 'NM_004006.2', - 'NM_004007.2', - 'NM_004009.3', - 'NM_004010.3', - 'NM_004011.3', - 'NM_004012.3']: + for t in ['NM_004011.3', + 'NM_004007.2']: assert t in r.string + @fix(database, cache('NM_003002.2')) def test_runmutalyzer(self): """ Just a runMutalyzer test. @@ -393,19 +403,33 @@ class TestServicesSoap(): assert_equal(r.genomicDescription, 'NM_003002.2:n.335G>T') assert 'NM_003002.2(SDHD_v001):c.274G>T' in r.transcriptDescriptions.string + @fix(database) def test_runmutalyzer_reference_info_nm(self): """ Get reference info for an NM variant without version. """ - r = self._call('runMutalyzer', 'NM_003002:c.274G>T') + # Patch GenBankRetriever.fetch to return the contents of NM_003002.2 + # for NM_003002. + def mock_efetch(*args, **kwargs): + if kwargs.get('id') != 'NM_003002': + return Entrez.efetch(*args, **kwargs) + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'NM_003002.2.gb.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + r = self._call('runMutalyzer', 'NM_003002:c.274G>T') + assert_equal(r.errors, 0) - assert_equal(r.referenceId, 'NM_003002.3') - assert_equal(r.sourceId, 'NM_003002.3') + assert_equal(r.referenceId, 'NM_003002.2') + assert_equal(r.sourceId, 'NM_003002.2') assert_equal(r.sourceAccession, 'NM_003002') - assert_equal(r.sourceVersion, '3') - assert_equal(r.sourceGi, '452405284') + assert_equal(r.sourceVersion, '2') + assert_equal(r.sourceGi, '222352156') assert_equal(r.molecule, 'n') + @fix(database, cache('NM_003002.2')) def test_runmutalyzer_reference_info_nm_version(self): """ Get reference info for an NM variant with version. @@ -419,23 +443,7 @@ class TestServicesSoap(): assert_equal(r.sourceGi, '222352156') assert_equal(r.molecule, 'n') - def test_runmutalyzer_reference_info_ud(self): - """ - Get reference info for a UD variant after creating it. - - UD_129433404385: NC_000023.10 31135344 33362726 2 NULL 2011-10-04 13:15:04 - """ - ud = str(self._call('sliceChromosome', - 'NC_000023.10', 31135344, 33362726, 2)) - r = self._call('runMutalyzer', ud + ':g.1del') - assert_equal(r.errors, 0) - assert_equal(r.referenceId, ud) - assert_equal(r.sourceId, 'NC_000023.10') - assert_equal(r.sourceAccession, 'NC_000023') - assert_equal(r.sourceVersion, '10') - assert_equal(r.sourceGi, '224589822') - assert_equal(r.molecule, 'g') - + @fix(database, cache('LRG_1')) def test_runmutalyzer_reference_info_lrg(self): """ Get reference info for an LRG variant. @@ -446,37 +454,51 @@ class TestServicesSoap(): assert_equal(r.sourceId, 'LRG_1') assert_equal(r.molecule, 'g') + @fix(database, cache('NG_012772.1')) def test_runmutalyzer_reference_info_ng(self): """ Get reference info for an NG variant without version. """ - r = self._call('runMutalyzer', 'NG_012772:g.18964del') + # Patch GenBankRetriever.fetch to return the contents of NG_012772.1 + # for NG_012772. + def mock_efetch(*args, **kwargs): + if kwargs.get('id') != 'NG_012772': + return Entrez.efetch(*args, **kwargs) + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'NG_012772.1.gb.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + r = self._call('runMutalyzer', 'NG_012772:g.18964del') + assert_equal(r.errors, 0) - assert_equal(r.referenceId, 'NG_012772.3') - assert_equal(r.sourceId, 'NG_012772.3') + assert_equal(r.referenceId, 'NG_012772.1') + assert_equal(r.sourceId, 'NG_012772.1') assert_equal(r.sourceAccession, 'NG_012772') - assert_equal(r.sourceVersion, '3') - assert_equal(r.sourceGi, '388428999') + assert_equal(r.sourceVersion, '1') + assert_equal(r.sourceGi, '256574794') assert_equal(r.molecule, 'g') + @fix(database, cache('NG_009105.1')) def test_runmutalyzer_reference_info_ng_version(self): """ Get reference info for an NG variant with version. """ - r = self._call('runMutalyzer', 'NG_012772.3:g.18964del') + r = self._call('runMutalyzer', 'NG_009105.1:g.18964del') assert_equal(r.errors, 0) - assert_equal(r.referenceId, 'NG_012772.3') - assert_equal(r.sourceId, 'NG_012772.3') - assert_equal(r.sourceAccession, 'NG_012772') - assert_equal(r.sourceVersion, '3') - assert_equal(r.sourceGi, '388428999') + assert_equal(r.referenceId, 'NG_009105.1') + assert_equal(r.sourceId, 'NG_009105.1') + assert_equal(r.sourceAccession, 'NG_009105') + assert_equal(r.sourceVersion, '1') + assert_equal(r.sourceGi, '216548283') assert_equal(r.molecule, 'g') + @fix(database, cache('NG_012772.1')) def test_runmutalyzer_reference_info_gi(self): """ Get reference info for a GI variant. """ - self._call('runMutalyzer', 'NG_012772.1:g.1del') # Make sure the server has this reference cached r = self._call('runMutalyzer', 'gi256574794:g.18964del') assert_equal(r.errors, 0) assert_equal(r.referenceId, 'NG_012772.1') @@ -486,85 +508,121 @@ class TestServicesSoap(): assert_equal(r.sourceGi, '256574794') assert_equal(r.molecule, 'g') + @fix(database, cache('NM_000143.3')) def test_runmutalyzer_exons(self): """ Exon table in runMutalyzer output. """ - r = self._call('runMutalyzer', 'NM_004959.4:c.630_636del') + r = self._call('runMutalyzer', 'NM_000143.3:c.630_636del') assert_equal(r.errors, 0) - expected_exons = [(1, 172, '-187', '-16'), - (173, 289, '-15', '102'), - (290, 431, '103', '244'), - (432, 1057, '245', '870'), - (1058, 1177, '871', '990'), - (1178, 1325, '991', '1138'), - (1326, 3095, '1139', '*1522')] + expected_exons = [(1, 195, '-63', '132'), + (196, 330, '133', '267'), + (331, 441, '268', '378'), + (442, 618, '379', '555'), + (619, 801, '556', '738'), + (802, 967, '739', '904'), + (968, 1171, '905', '1108'), + (1172, 1299, '1109', '1236'), + (1300, 1453, '1237', '1390'), + (1454, 1867, '1391', '*271')] assert_equal(len(r.exons.ExonInfo), len(expected_exons)) for exon, expected_exon in zip(r.exons.ExonInfo, expected_exons): assert_equal((exon.gStart, exon.gStop, exon.cStart, exon.cStop), expected_exon) - def test_gettranscriptsandinfo_slice(self): - """ - Running getTranscriptsAndInfo on a chromosomal slice should include - chromosomal positions. - - slice: 48284003 - 48259456 (COL1A1 with 5001 and 2001 borders) - translation start: 48284003 - 5001 + 1 = 48279003 - translation end: 48259456 + 2001 = 48261457 - """ - ud = str(self._call('sliceChromosomeByGene', - 'COL1A1', 'human', 5000, 2000)) - r = self._call('getTranscriptsAndInfo', ud) - assert_equal(type(r.TranscriptInfo), list) - names = [t.name for t in r.TranscriptInfo] - assert 'COL1A1_v001' in names - for t in r.TranscriptInfo: - if t.name != 'COL1A1_v001': - continue - assert_equal(t.cTransStart, '-129') - assert_equal(t.gTransStart, 5001) - assert_equal(t.chromTransStart, 48279003) - assert_equal(t.cTransEnd, '*1406') - assert_equal(t.gTransEnd, 22547) - assert_equal(t.chromTransEnd, 48261457) - assert_equal(t.sortableTransEnd, 4883) - assert_equal(t.cCDSStart, '1') - assert_equal(t.gCDSStart, 5130) - assert_equal(t.chromCDSStart, 48278874) - assert_equal(t.cCDSStop, '3477') - assert_equal(t.gCDSStop, 21141) - assert_equal(t.chromCDSStop, 48262863) - - @skip # Todo: AL449423.14 no longer contains gene annotations. - @slow + @fix(database, cache('AB026906.1', 'NM_003002.2', 'AL449423.14')) def test_batchjob(self): """ Submit a batch job. """ variants = ['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', + 'NM_003002.2:c.3_4insG', 'AL449423.14(CDKN2A_v002):c.5_400del'] - data = '\n'.join(variants).encode('base64') + data = '\n'.join(variants) + '\n' #.encode('base64') result = self._call('submitBatchJob', data, 'NameChecker') - job_id = int(result) - - for _ in range(50): - try: - result = self._call('getBatchJob', job_id) - break - except Fault: - result = self._call('monitorBatchJob', job_id) - assert int(result) <= len(variants) - time.sleep(1) - else: - assert False + job_id = str(result) + + result = self._call('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + result = self._call('monitorBatchJob', job_id) + assert int(result) == 0 + + result = self._call('getBatchJob', job_id) assert_equal(len(result.decode('base64').strip().split('\n')) - 1, len(variants)) - @slow + @fix(database) + def test_batchjob_newlines_unix(self): + """ + Submit a batch job with UNIX newlines. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.2:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + data = '\n'.join(variants) + '\n' + + result = self._call('submitBatchJob', data, 'SyntaxChecker') + job_id = str(result) + + result = self._call('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = self._call('monitorBatchJob', job_id) + assert int(result) == 0 + + @fix(database) + def test_batchjob_newlines_mac(self): + """ + Submit a batch job with Mac newlines. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.2:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + data = '\r'.join(variants) + '\r' + + result = self._call('submitBatchJob', data, 'SyntaxChecker') + job_id = str(result) + + result = self._call('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = self._call('monitorBatchJob', job_id) + assert int(result) == 0 + + @fix(database) + def test_batchjob_newlines_windows(self): + """ + Submit a batch job with Windows newlines. + """ + variants = ['AB026906.1(SDHD):g.7872G>T', + 'NM_003002.2:c.3_4insG', + 'AL449423.14(CDKN2A_v002):c.5_400del'] + data = '\r\n'.join(variants) + '\r\n' + + result = self._call('submitBatchJob', data, 'SyntaxChecker') + job_id = str(result) + + result = self._call('monitorBatchJob', job_id) + assert int(result) == len(variants) + + scheduler = Scheduler.Scheduler() + scheduler.process() + + result = self._call('monitorBatchJob', job_id) + assert int(result) == 0 + + @fix(database) def test_batchjob_toobig(self): """ Submit the batch name checker with a too big input file. @@ -579,8 +637,8 @@ feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi.""" data = seed - # Very crude way of creating something at least 6MB in size - while len(data) < 6000000: + # Very crude way of creating something big. + while len(data) <= settings.MAX_FILE_SIZE: data += data try: diff --git a/tests/test_variantchecker.py b/tests/test_variantchecker.py index a2ac64b847fd73974e8e874172e73b6f8b33860c..7049fc3fa1a93ec03f5377021b575857a1e81e5d 100644 --- a/tests/test_variantchecker.py +++ b/tests/test_variantchecker.py @@ -9,47 +9,33 @@ from nose.tools import * from mutalyzer.output import Output from mutalyzer.Retriever import GenBankRetriever from mutalyzer.variantchecker import check_variant -from mutalyzer.util import slow, skip -import utils +from fixtures import REFERENCES +from fixtures import database, cache, hg19, hg19_transcript_mappings +from utils import MutalyzerTest +from utils import fix -class TestVariantchecker(): +# Todo: We had a test for checking a variant on a CONTIG RefSeq reference +# (NG_005990.1), but instead we should have separate tests for the retriever +# module, including a test for fetching a CONTIG RefSeq reference. + + +class TestVariantchecker(MutalyzerTest): """ Test the variantchecker module. """ + fixtures = (database, ) + def setup(self): """ Initialize test variantchecker module. """ - utils.create_test_environment(database=True) + super(TestVariantchecker, self).setup() self.output = Output(__file__) self.retriever = GenBankRetriever(self.output) - def teardown(self): - utils.destroy_environment() - - def _slice(self, chromosome, start, stop, orientation): - """ - Get a UD slice. - - Orientation: 1 for forward, 2 for reverse. - """ - return self.retriever.retrieveslice(chromosome, start, stop, orientation) - - def _slice_gene(self, gene, organism='human', upstream=5000, downstream=2000): - """ - Get a UD slice for a gene. - """ - return self.retriever.retrievegene(gene, organism, upstream, downstream) - - def _load_record(self, identifier): - """ - Load a record in the database and cache. - """ - return self.retriever.loadrecord(identifier) - - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_deletion_in_frame(self): """ Simple in-frame deletion should give a simple description on protein @@ -64,7 +50,7 @@ class TestVariantchecker(): in self.output.getOutput('protDescriptions') assert self.output.getOutput('newprotein') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_insertion_in_frame(self): """ Simple in-frame insertion should give a simple description on protein @@ -79,7 +65,7 @@ class TestVariantchecker(): in self.output.getOutput('protDescriptions') assert self.output.getOutput('newprotein') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_deletion_insertion_in_frame(self): """ Simple in-frame deletion/insertion should give a simple description on @@ -95,7 +81,7 @@ class TestVariantchecker(): in self.output.getOutput('protDescriptions') assert self.output.getOutput('newprotein') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_deletion_insertion_in_frame_complete(self): """ Simple in-frame deletion/insertion should give a simple description on @@ -111,6 +97,7 @@ class TestVariantchecker(): in self.output.getOutput('protDescriptions') assert self.output.getOutput('newprotein') + @fix(cache('NM_003002.2')) def test_est_warning_nm_est(self): """ Warning for EST positioning on NM reference. @@ -119,6 +106,7 @@ class TestVariantchecker(): west = self.output.getMessagesWithErrorCode('WEST') assert len(west) == 1 + @fix(cache('NM_003002.2')) def test_no_est_warning_nm_c(self): """ No EST warning for c. positioning on NM reference. @@ -127,6 +115,7 @@ class TestVariantchecker(): west = self.output.getMessagesWithErrorCode('WEST') assert len(west) == 0 + @fix(cache('NM_003002.2')) def test_no_est_warning_nm_n(self): """ No EST warning for n. positioning on NM reference. @@ -135,6 +124,7 @@ class TestVariantchecker(): west = self.output.getMessagesWithErrorCode('WEST') assert len(west) == 0 + @fix(cache('NG_012772.1')) def test_est_warning_ng_est(self): """ Warning for EST positioning on NG reference. @@ -143,6 +133,7 @@ class TestVariantchecker(): west = self.output.getMessagesWithErrorCode('WEST') assert len(west) == 1 + @fix(cache('NG_012772.1')) def test_no_est_warning_ng_g(self): """ No EST warning for g. positioning on NG reference. @@ -151,6 +142,7 @@ class TestVariantchecker(): west = self.output.getMessagesWithErrorCode('WEST') assert len(west) == 0 + @fix(cache('AA010203.1')) def test_no_est_warning_est_est(self): """ No warning for EST positioning on EST reference. @@ -159,6 +151,7 @@ class TestVariantchecker(): west = self.output.getMessagesWithErrorCode('WEST') assert len(west) == 0 + @fix(cache('NM_003002.2')) def test_roll(self): """ Just a variant where we should roll. @@ -167,6 +160,7 @@ class TestVariantchecker(): wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') assert len(wroll) > 0 + @fix(cache('NM_003002.2')) def test_no_roll(self): """ Just a variant where we cannot roll. @@ -175,6 +169,7 @@ class TestVariantchecker(): wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') assert_equal(len(wroll), 0) + @fix(cache('NM_000088.3')) def test_no_roll_splice(self): """ Here we can roll but should not, because it is over a splice site. @@ -185,6 +180,7 @@ class TestVariantchecker(): wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') assert_equal(len(wroll), 0) + @fix(cache('NM_000088.3')) def test_partial_roll_splice(self): """ Here we can roll two positions, but should roll only one because @@ -196,6 +192,7 @@ class TestVariantchecker(): wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') assert len(wroll) > 0 + @fix(cache('NM_000088.3')) def test_roll_after_splice(self): """ Here we can roll and should, we stay in the same exon. @@ -204,7 +201,7 @@ class TestVariantchecker(): wroll = self.output.getMessagesWithErrorCode('WROLLFORWARD') assert len(wroll) > 0 - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_roll_both_ins(self): """ Insertion that rolls should not use the same inserted sequence in @@ -238,7 +235,7 @@ class TestVariantchecker(): assert_equal ('AL449423.14:g.65471_65472insACT', self.output.getIndexedOutput('genomicDescription', 0, '')) assert_equal(len(self.output.getMessagesWithErrorCode('WROLLFORWARD')), 1) - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_roll_reverse_ins(self): """ Insertion that rolls on the reverse strand should not use the same @@ -249,7 +246,7 @@ class TestVariantchecker(): assert_equal ('AL449423.14:g.65471_65472insACT', self.output.getIndexedOutput('genomicDescription', 0, '')) assert_equal(len(self.output.getMessagesWithErrorCode('WROLLFORWARD')), 0) - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_roll_message_forward(self): """ Roll warning message should only be shown for currently selected @@ -259,7 +256,7 @@ class TestVariantchecker(): assert_equal(len(self.output.getMessagesWithErrorCode('WROLLFORWARD')), 1) assert_equal(len(self.output.getMessagesWithErrorCode('WROLLREVERSE')), 0) - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_roll_message_reverse(self): """ Roll warning message should only be shown for currently selected @@ -269,6 +266,7 @@ class TestVariantchecker(): assert_equal(len(self.output.getMessagesWithErrorCode('WROLLFORWARD')), 0) assert_equal(len(self.output.getMessagesWithErrorCode('WROLLREVERSE')), 1) + @fix(cache('NM_000143.3')) def test_ins_cds_start(self): """ Insertion on CDS start boundary should not be included in CDS. @@ -277,6 +275,7 @@ class TestVariantchecker(): assert_equal(self.output.getIndexedOutput("newprotein", 0), None) # Todo: Is this a good test? + @fix(cache('NM_000143.3')) def test_ins_cds_start_after(self): """ Insertion after CDS start boundary should be included in CDS. @@ -285,6 +284,7 @@ class TestVariantchecker(): assert_equal(self.output.getIndexedOutput("newprotein", 0), '?') # Todo: Is this a good test? + @fix(cache('NG_012772.1')) def test_del_splice_site(self): """ Deletion hitting one splice site should not do a protein prediction. @@ -296,6 +296,7 @@ class TestVariantchecker(): # prediction is done. assert not self.output.getOutput('newprotein') + @fix(cache('NG_012772.1')) def test_del_exon(self): """ Deletion of an entire exon should be possible. @@ -307,6 +308,7 @@ class TestVariantchecker(): # prediction is done. assert self.output.getOutput('newprotein') + @fix(cache('NG_012772.1')) def test_del_exon_exact(self): """ Deletion of exactly an exon should be possible. @@ -318,6 +320,7 @@ class TestVariantchecker(): # prediction is done. assert self.output.getOutput('newprotein') + @fix(cache('NG_012772.1')) def test_del_exon_in_frame(self): """ Deletion of an entire exon with length a triplicate should give a @@ -335,6 +338,7 @@ class TestVariantchecker(): assert self.output.getOutput('newprotein') # Todo: assert that protein products indeed have only this difference. + @fix(cache('NG_012772.1')) def test_del_exons(self): """ Deletion of two entire exons should be possible. @@ -346,6 +350,7 @@ class TestVariantchecker(): # prediction is done. assert self.output.getOutput('newprotein') + @fix(cache('NG_012772.1')) def test_del_intron(self): """ Deletion of an entire intron should be possible (fusion of remaining @@ -358,6 +363,7 @@ class TestVariantchecker(): # prediction is done. assert self.output.getOutput('newprotein') + @fix(cache('NG_012772.1')) def test_del_intron_exact(self): """ Deletion of exactly an intron should be possible (fusion of flanking @@ -373,6 +379,7 @@ class TestVariantchecker(): assert self.output.getOutput('oldprotein') assert not self.output.getOutput('newprotein') + @fix(cache('NG_012772.1')) def test_del_intron_in_frame(self): """ Deletion of an entire intron should be possible (fusion of remaining @@ -386,6 +393,7 @@ class TestVariantchecker(): assert self.output.getOutput('newprotein') # Todo: assert that protein products indeed have only this difference. + @fix(cache('NG_012772.1')) def test_del_exon_unknown_offsets(self): """ Deletion of an entire exon with unknown offsets should be possible. @@ -406,6 +414,7 @@ class TestVariantchecker(): # Todo: .c notation should still be c.632-?_681+?del, but what about # other transcripts? + @fix(cache('NG_012772.1')) def test_del_exon_unknown_offsets_in_frame(self): """ Deletion of an entire exon with unknown offsets and length a @@ -429,6 +438,7 @@ class TestVariantchecker(): # Todo: .c notation should still be c.632-?_681+?del, but what about # other transcripts? + @fix(cache('NG_012772.1')) def test_del_exon_unknown_offsets_composed(self): """ Deletion of an entire exon with unknown offsets and another composed @@ -449,7 +459,7 @@ class TestVariantchecker(): # Todo: .c notation should still be c.632-?_681+?del, but what about # other transcripts? - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_del_exon_unknown_offsets_reverse(self): """ Deletion of an entire exon with unknown offsets should be possible, @@ -470,6 +480,7 @@ class TestVariantchecker(): # Todo: .c notation should still be c.632-?_681+?del, but what about # other transcripts? + @fix(cache('NM_000143.3')) def test_del_exon_transcript_reference(self): """ Deletion of entire exon on a transcript reference should remove the @@ -477,13 +488,15 @@ class TestVariantchecker(): of the flanking exons (as would happen using the mechanism for genomic references). """ - check_variant('NM_018723.3:c.758_890del', self.output) + #check_variant('NM_018723.3:c.758_890del', self.output) + check_variant('NM_000143.3:c.739_904del', self.output) assert_equal(len(self.output.getMessagesWithErrorCode('WOVERSPLICE')), 0) assert_equal(self.output.getOutput('removedSpliceSites'), [2]) # Todo: For now, the following is how to check if protein # prediction is done. assert self.output.getOutput('newprotein') + @fix(cache('AB026906.1')) def test_ins_range(self): """ Insertion of a range is not implemented yet. @@ -491,6 +504,7 @@ class TestVariantchecker(): check_variant('AB026906.1:c.274_275ins262_268', self.output) assert_equal(len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')), 1) + @fix(cache('AB026906.1')) def test_delins_range(self): """ Deletion/insertion of a range is not implemented yet. @@ -498,14 +512,6 @@ class TestVariantchecker(): check_variant('AB026906.1:c.274delins262_268', self.output) assert_equal(len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')), 1) - def test_contig_reference(self): - """ - Variant description on a CONTIG RefSeq reference. - """ - check_variant('NG_005990.1:g.1del', self.output) - assert_equal(self.output.getIndexedOutput('genomicDescription', 0), - 'NG_005990.1:g.1del') - def test_no_reference(self): """ Variant description without a reference. @@ -513,6 +519,7 @@ class TestVariantchecker(): check_variant('g.244355733del', self.output) assert_equal(len(self.output.getMessagesWithErrorCode('ENOREF')), 1) + @fix(cache('NM_003002.2'), hg19, hg19_transcript_mappings) def test_chromosomal_positions(self): """ Variants on transcripts in c. notation should have chromosomal positions @@ -522,6 +529,7 @@ class TestVariantchecker(): assert_equal(self.output.getIndexedOutput('rawVariantsChromosomal', 0), ('chr11', '+', [('274G>T', (111959695, 111959695))])) + @fix(cache('NM_002001.2')) def test_ex_notation(self): """ Variant description using EX notation should not crash but deletion of @@ -530,6 +538,7 @@ class TestVariantchecker(): check_variant('NM_002001.2:c.EX1del', self.output) assert_equal(len(self.output.getMessagesWithErrorCode('IDELSPLICE')), 1) + @fix(cache('LRG_1')) def test_lrg_reference(self): """ We should be able to use LRG reference sequence without error. @@ -540,30 +549,11 @@ class TestVariantchecker(): assert_equal(self.output.getIndexedOutput('genomicDescription', 0), 'LRG_1:g.6855G>T') - def test_lrg_reference_new(self): - """ - We should be able to use new LRG reference sequence without error. - - Note that all LRG sequences are now in a new format and essentially - this test is no different from the previous, except that LRG_218 was - not yet in our cache which makes it easier to test the new format. - """ - check_variant('LRG_218:c.1786_1788delAAT', self.output) - error_count, _, _ = self.output.Summary() - assert_equal(error_count, 0) - - def test_non_numeric_locus_tag_ending(self): - """ - Locus tag in NC_002128 does not end in an underscore and three digits - but we should not crash on it. - """ - check_variant('NC_002128(tagA):c.3del', self.output) - + @fix(cache('NM_002001.2')) def test_gi_reference_plain(self): """ Test reference sequence notation with GI number. """ - assert self._load_record('NM_002001.2') # Make sure it's in our database check_variant('31317229:c.6del', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -572,11 +562,11 @@ class TestVariantchecker(): assert '31317229(FCER1A_v001):c.6del' \ in self.output.getOutput('descriptions') + @fix(cache('NM_002001.2')) def test_gi_reference_prefix(self): """ Test reference sequence notation with GI number and prefix. """ - assert self._load_record('NM_002001.2') # Make sure it's in our database check_variant('GI31317229:c.6del', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -585,11 +575,11 @@ class TestVariantchecker(): assert '31317229(FCER1A_v001):c.6del' \ in self.output.getOutput('descriptions') + @fix(cache('NM_002001.2')) def test_gi_reference_prefix_colon(self): """ Test reference sequence notation with GI number and prefix with colon. """ - assert self._load_record('NM_002001.2') # Make sure it's in our database check_variant('GI:31317229:c.6del', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -598,6 +588,7 @@ class TestVariantchecker(): assert '31317229(FCER1A_v001):c.6del' \ in self.output.getOutput('descriptions') + @fix(cache('NM_002001.2')) def test_nop_nm(self): """ Variant on NM without effect should be described as '='. @@ -610,11 +601,12 @@ class TestVariantchecker(): assert 'NM_002001.2(FCER1A_v001):c.=' \ in self.output.getOutput('descriptions') + @fix(cache('DMD')) def test_nop_ud(self): """ Variant on UD without effect should be described as '='. """ - ud = self._slice_gene('DMD') + ud = REFERENCES['DMD']['accession'] check_variant(ud + ':g.5T>T', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -625,12 +617,13 @@ class TestVariantchecker(): assert ud + '(DMD_v001):c.=' \ in self.output.getOutput('descriptions') + @fix(cache('DPYD')) def test_ud_reverse_sequence(self): """ Variant on UD from reverse strand should have reverse complement sequence. """ - ud = self._slice_gene('DPYD') + ud = REFERENCES['DPYD']['accession'] check_variant(ud + '(DPYD_v1):c.85C>T', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -641,11 +634,12 @@ class TestVariantchecker(): assert ud + '(DPYD_v001):c.85C>T' \ in self.output.getOutput('descriptions') + @fix(cache('MARK1')) def test_ud_forward_sequence(self): """ Variant on UD from forward strand should have forward sequence. """ - ud = self._slice_gene('MARK1') + ud = REFERENCES['MARK1']['accession'] check_variant(ud + '(MARK1_v001):c.400T>C', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -656,12 +650,14 @@ class TestVariantchecker(): assert ud + '(MARK1_v001):c.400T>C' \ in self.output.getOutput('descriptions') + @fix(cache('chr9_reverse')) def test_ud_reverse_range(self): """ Variant on UD from reverse strand should have reversed range positions. """ - ud = self._slice('NC_000009.11', 32922603, 33006639, 2) + # This is just some slice on from the reverse strand of hg19 chr9. + ud = REFERENCES['chr9_reverse']['accession'] check_variant(ud + ':g.10624_78132del', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -670,11 +666,12 @@ class TestVariantchecker(): assert_equal(self.output.getIndexedOutput('genomicDescription', 0), ud + ':g.10624_78132del') + @fix(cache('MARK1')) def test_ud_forward_range(self): """ Variant on UD from forward strand should have forward range positions. """ - ud = self._slice_gene('MARK1') + ud = REFERENCES['MARK1']['accession'] check_variant(ud + '(MARK1_v001):c.400_415del', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -683,13 +680,15 @@ class TestVariantchecker(): assert_equal(self.output.getIndexedOutput('genomicDescription', 0), ud + ':g.76614_76629del') + @fix(cache('chr9_reverse')) def test_ud_reverse_del_length(self): """ Variant on UD from reverse strand should have reversed range positions, but not reverse complement of first argument (it is not a sequence, but a length). """ - ud = self._slice('NC_000009.11', 32922603, 33006639, 2) + # This is just some slice on from the reverse strand of hg19 chr9. + ud = REFERENCES['chr9_reverse']['accession'] check_variant(ud + ':g.10624_78132del67509', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -698,6 +697,7 @@ class TestVariantchecker(): assert_equal(self.output.getIndexedOutput('genomicDescription', 0), ud + ':g.10624_78132del') + @fix(cache('DPYD')) def test_ud_reverse_roll(self): """ Variant on UD from reverse strand should roll the oposite direction. @@ -709,7 +709,7 @@ class TestVariantchecker(): g. 748 749 750 751 752 753 chr g. 868 867 866 865 864 863 """ - ud = self._slice_gene('DPYD') + ud = REFERENCES['DPYD']['accession'] check_variant(ud + '(DPYD_v001):c.104del', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -720,6 +720,7 @@ class TestVariantchecker(): assert ud + '(DPYD_v001):c.105del' \ in self.output.getOutput('descriptions') + @fix(cache('MARK1')) def test_ud_forward_roll(self): """ Variant on UD from forward strand should roll the same. @@ -731,7 +732,7 @@ class TestVariantchecker(): g. 612 613 614 615 616 chr g. 179 180 181 182 183 """ - ud = self._slice_gene('MARK1') + ud = REFERENCES['MARK1']['accession'] check_variant(ud + '(MARK1_v001):c.400del', self.output) error_count, _, _ = self.output.Summary() assert_equal(error_count, 0) @@ -742,7 +743,7 @@ class TestVariantchecker(): assert ud + '(MARK1_v001):c.401del' \ in self.output.getOutput('descriptions') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_deletion_with_sequence_forward_genomic(self): """ Specify the deleted sequence in a deletion. @@ -753,7 +754,7 @@ class TestVariantchecker(): assert 'AL449423.14(CDKN2A_v001):c.98_99del' \ in self.output.getOutput('descriptions') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_deletion_with_length_forward_genomic(self): """ Specify the deleted sequence length in a deletion. @@ -764,7 +765,7 @@ class TestVariantchecker(): assert 'AL449423.14(CDKN2A_v001):c.98_99del' \ in self.output.getOutput('descriptions') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_deletion_with_sequence_reverse_coding(self): """ Specify the deleted sequence in a deletion on the reverse strand. @@ -775,7 +776,7 @@ class TestVariantchecker(): assert 'AL449423.14(CDKN2A_v001):c.161_163del' \ in self.output.getOutput('descriptions') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(cache('AL449423.14')) def test_deletion_with_length_reverse_coding(self): """ Specify the deleted sequence length in a deletion on the reverse strand. @@ -786,6 +787,7 @@ class TestVariantchecker(): assert 'AL449423.14(CDKN2A_v001):c.161_163del' \ in self.output.getOutput('descriptions') + @fix(cache('NG_008939.1')) def test_deletion_with_sequence_reverse_ng_coding(self): """ Specify the deleted sequence in a deletion on the reverse strand @@ -797,6 +799,7 @@ class TestVariantchecker(): assert 'NG_008939.1(PCCB_v001):c.155_157del' \ in self.output.getOutput('descriptions') + @fix(cache('NG_008939.1')) def test_deletion_with_length_reverse_ng_coding(self): """ Specify the deleted sequence length in a deletion on the reverse strand @@ -808,6 +811,7 @@ class TestVariantchecker(): assert 'NG_008939.1(PCCB_v001):c.155_157del' \ in self.output.getOutput('descriptions') + @fix(cache('AB026906.1')) def test_inversion(self): """ Inversion variant. @@ -818,6 +822,7 @@ class TestVariantchecker(): assert 'AB026906.1(SDHD_v001):c.274_275inv' \ in self.output.getOutput('descriptions') + @fix(cache('NM_000193.2')) def test_delins_with_length(self): """ Delins with explicit length of deleted sequence (bug #108). @@ -825,6 +830,7 @@ class TestVariantchecker(): check_variant('NM_000193.2:c.108_109del2insG', self.output) assert 'NM_000193.2(SHH_i001):p.(Lys38Serfs*2)' in self.output.getOutput('protDescriptions') + @fix(cache('NG_009105.1')) def test_protein_level_description(self): """ Currently protein level descriptions are not implemented. @@ -832,6 +838,7 @@ class TestVariantchecker(): check_variant('NG_009105.1(OPN1LW):p.=', self.output) assert_equal(len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')), 1) + @fix(cache('NP_064445.1')) def test_protein_reference(self): """ Currently protein references are not implemented. @@ -839,28 +846,33 @@ class TestVariantchecker(): check_variant('NP_064445.1:p.=', self.output) assert_equal(len(self.output.getMessagesWithErrorCode('ENOTIMPLEMENTED')), 1) + @fix(cache('A1BG')) def test_wnomrna_other(self): """ Warning for no mRNA field on other than currently selected transcript should give WNOMRNA_OTHER warning. """ - ud = self._slice_gene('A1BG') # Contains ZNF497 (v1 and v2) with no mRNA + # Contains ZNF497 (v1 and v2) with no mRNA + ud = REFERENCES['A1BG']['accession'] check_variant(ud + '(A1BG_v001):c.13del', self.output) wnomrna_other = self.output.getMessagesWithErrorCode('WNOMRNA_OTHER') assert len(wnomrna_other) == 3 + @fix(cache('A1BG')) def test_wnomrna(self): """ Warning for no mRNA field on currently selected transcript should give WNOMRNA warning. """ - ud = self._slice_gene('A1BG') # Contains ZNF497 (v1 and v2) with no mRNA + # Contains ZNF497 (v1 and v2) with no mRNA + ud = REFERENCES['A1BG']['accession'] check_variant(ud + '(ZNF497_v001):c.13del', self.output) wnomrna = self.output.getMessagesWithErrorCode('WNOMRNA') wnomrna_other = self.output.getMessagesWithErrorCode('WNOMRNA_OTHER') assert len(wnomrna) == 1 assert len(wnomrna_other) == 2 + @fix(cache('L41870.1')) def test_mrna_ref_adjacent_exons_warn(self): """ Warning for mRNA reference where exons are not adjacent. @@ -871,10 +883,11 @@ class TestVariantchecker(): w_exon_annotation = self.output.getMessagesWithErrorCode('WEXON_ANNOTATION') assert len(w_exon_annotation) == 1 + @fix(cache('NM_003002.2')) def test_mrna_ref_adjacent_exons_no_warn(self): """ No warning for mRNA reference where exons are adjacent. """ - check_variant('NM_133378.3:c.1del', self.output) + check_variant('NM_003002.2:c.1del', self.output) w_exon_annotation = self.output.getMessagesWithErrorCode('WEXON_ANNOTATION') assert len(w_exon_annotation) == 0 diff --git a/tests/test_website.py b/tests/test_website.py index 797ef63d9c4c0ee9c1d74120652f300c02d2dc9a..13984378cb66ff7e950d85c5001954b0647df324 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -6,372 +6,236 @@ Tests for the WSGI interface to Mutalyzer. #import logging; logging.basicConfig() +import bz2 +import cgi +import logging +from mock import patch import os import re -import urllib2 +from StringIO import StringIO import time -import web -from nose.tools import * -from webtest import TestApp -import logging import urllib -import cgi +import urllib2 -# Todo: Since the `mutalyzer.website` module accesses the configuration -# settings at import time, we need to pre-populate those. This hack can -# be removed once we moved from web.py to Flask and refactored the web -# application into an application factory (which does not access the -# configuration settings at import time). -from mutalyzer.config import settings -settings.configure(dict( - DEBUG = True, - TESTING = True, - CACHE_DIR = None, - DATABASE_URI = 'sqlite://', - LOG_FILE = None)) +from Bio import Entrez +import lxml.html +from nose.tools import * import mutalyzer -from mutalyzer import website -from mutalyzer.util import slow, skip +from mutalyzer import Scheduler +from mutalyzer.website import create_app -import utils +from fixtures import cache, database, hg19, hg19_transcript_mappings +from utils import MutalyzerTest +from utils import fix BATCH_RESULT_URL = 'http://localhost/mutalyzer/Results_{id}.txt' -class TestWSGI(): +class TestWebsite(MutalyzerTest): """ Test the Mutalyzer WSGI interface. """ def setup(self): - """ - Initialize test application. - """ - utils.create_test_environment(database=True) - web.config.debug = False - application = website.app.wsgifunc() - self.app = TestApp(application) - - def teardown(self): - utils.destroy_environment() + super(TestWebsite, self).setup() + self.app = create_app().test_client() - def test_root(self): - """ - Expect the index HTML page. - """ - r = self.app.get('') - assert_equal(r.status, '301 Moved Permanently') - assert r.location.endswith('/') - r = r.follow() - assert_equal(r.status, '200 OK') - # We check for <html> to make sure the menu template is included - r.mustcontain('<html>', - 'Welcome to the Mutalyzer website', - '</html>') - - def test_index(self): + def test_homepage(self): """ Expect the index HTML page. """ r = self.app.get('/') - assert_equal(r.status, '200 OK') - # We check for <html> to make sure the menu template is included - r.mustcontain('<html>', - 'Welcome to the Mutalyzer website', - '</html>') - - def test_index_explicit(self): - """ - Expect the index HTML page. - """ - r = self.app.get('/index') - assert_equal(r.status, '200 OK') - # We check for <html> to make sure the menu template is included - r.mustcontain('<html>', - 'Welcome to the Mutalyzer website', - '</html>') + assert_equal(r.status_code, 200) + assert 'Welcome to the Mutalyzer website' in r.data def test_about(self): """ - See if my name is on the About page ;) + See if people get proper credit. """ r = self.app.get('/about') assert_equal(r.status, '200 OK') - r.mustcontain('Martijn Vermaat') + assert 'Jonathan Vis' in r.data def test_non_existing(self): """ Expect a 404 response. """ - r = self.app.get('/this/doesnotexist', status=404) + r = self.app.get('/this/doesnotexist') + assert_equal(r.status_code, 404) + @fix(database) def test_menu_links(self): """ Test all links in the main menu. """ ignore = [] # This could contain relative links we want to skip r = self.app.get('/') - for link in r.lxml.cssselect('#menu a'): + + dom = lxml.html.fromstring(r.data) + + for link in dom.cssselect('#menu a'): href = link.get('href') - if href.startswith('http://') or href.startswith('https://') \ - or href in ignore: + if (href.startswith('http://') or + href.startswith('https://') or + href in ignore): continue if not href.startswith('/'): href = '/' + href - self.app.get(href) + + r = self.app.get(href) + assert_equal(r.status_code, 200) def test_description_extractor(self): """ Submit the variant description extractor. """ - r = self.app.get('/descriptionExtract') - form = r.forms[0] - form['referenceSeq'] = 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA' - form['variantSeq'] = 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA' - r = form.submit() - r.mustcontain('g.[5_6insTT;17del;26A>C;35dup]') + r = self.app.get('/description-extractor', query_string={ + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'variant_sequence': 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA'}) + assert 'g.[5_6insTT;17del;26A>C;35dup]' in r.data def test_checksyntax_valid(self): """ Submit the check syntax form with a valid variant. """ - r = self.app.get('/syntaxCheck') - form = r.forms[0] - form['variant'] = 'AB026906.1:c.274G>T' - r = form.submit() - r.mustcontain('The syntax of this variant is OK!') + r = self.app.get('/syntax-checker', + query_string={'description': 'AB026906.1:c.274G>T'}) + assert 'The syntax of this variant is OK!' in r.data def test_checksyntax_invalid(self): """ Submit the check syntax form with an invalid variant. """ - r = self.app.get('/syntaxCheck') - form = r.forms[0] - form['variant'] = 'AB026906.1:c.27' - r = form.submit() - r.mustcontain('Fatal', - 'Details of the parse error') + r = self.app.get('/syntax-checker', + query_string={'description': 'AB026906.1:c.27'}) + assert 'Fatal' in r.data + assert 'Details of the parse error' in r.data + @fix(database, cache('NM_002001.2')) def test_check_valid(self): """ Submit the name checker form with a valid variant. Should include form and main HTML layout. """ - r = self.app.get('/check') - form = r.forms[0] - form['name'] = 'NM_002001.2:g.1del' - r = form.submit() - r.mustcontain('0 Errors', - '0 Warnings', - 'Raw variant 1: deletion of 1', - '<a href="#bottom" class="hornav">go to bottom</a>', - '<input type="text" name="name" value="NM_002001.2:g.1del" style="width:100%">') - - def test_check_more_valid(self): - """ - Test the name checker for some more variants. - """ - def check_name(name): - r = self.app.get('/check?name=%s' % name) - r.mustcontain('0 Errors') - names = ['NG_012337.1:g.7055C>T'] - for name in names: - check_name(name) - - def test_check_post(self): - """ - Test the name checker for a POST request. - - We accept POST requests for backwards compatibility. - """ - def check_name(name): - r = self.app.post('/check', {'name': name}) - assert_equal(r.status, '301 Moved Permanently') - assert r.location.endswith('/check?name=%s' % urllib.quote(name)) - names = ['NG_012337.1:g.7055C>T'] - for name in names: - check_name(name) - - def test_check_post_old(self): - """ - Test the name checker for a POST request with the old parameter - name. - - We accept POST requests for backwards compatibility. - """ - def check_name(name): - r = self.app.post('/check', {'mutationName': name}) - assert_equal(r.status, '301 Moved Permanently') - assert r.location.endswith('/check?name=%s' % urllib.quote(name)) - names = ['NG_012337.1:g.7055C>T'] - for name in names: - check_name(name) + r = self.app.get('/name-checker', + query_string={'description': 'NM_002001.2:g.1del'}) + assert '0 Errors' in r.data + assert '0 Warnings' in r.data + assert 'Raw variant 1: deletion of 1' in r.data + assert '<a href="#bottom" class="hornav">go to bottom</a>' in r.data + assert '<input type="text" name="description" value="NM_002001.2:g.1del" style="width:100%">' in r.data def test_check_invalid(self): """ Submit the name checker form with an invalid variant. """ - r = self.app.get('/check') - form = r.forms[0] - form['name'] = 'NM_002001.2' - r = form.submit() - r.mustcontain('1 Error', - '0 Warnings', - 'Details of the parse error') + r = self.app.get('/name-checker', + query_string={'description': 'NM_002001.2'}) + assert '1 Error' in r.data + assert '0 Warnings' in r.data + assert 'Details of the parse error' in r.data + @fix(database, cache('NP_064445.1')) def test_check_protein_reference(self): """ Submit the name checker form with a protein reference sequence (not supported). """ - r = self.app.get('/check') - form = r.forms[0] - form['name'] = 'BAA81889.1:c.274G>T' - r = form.submit() - r.mustcontain('1 Error', - '0 Warnings', - 'Protein reference sequences are not supported') + r = self.app.get('/name-checker', + query_string={'description': 'NP_064445.1:c.274G>T'}) + assert '1 Error' in r.data + assert '0 Warnings' in r.data + assert 'Protein reference sequences are not supported' in r.data + @fix(database, cache('NM_002001.2')) def test_check_noninteractive(self): """ Submit the name checker form non-interactively. Should not include form and main layout HTML. """ - r = self.app.get('/check?name=NM_002001.2:g.1del&standalone=1') - assert_false('<a href="#bottom" class="hornav">go to bottom</a>' in r) - assert_false('<input type="text" name="name" value="NM_002001.2:g.1del" style="width:100%">' in r) - r.mustcontain('0 Errors', - '0 Warnings', - 'Raw variant 1: deletion of 1', - '<html>', - '</html>') + r = self.app.get('/name-checker', + query_string={'description': 'NM_002001.2:g.1del', + 'standalone': '1'}) + assert '<a href="#bottom" class="hornav">go to bottom</a>' not in r.data + assert '<input type="text" name="description" value="NM_002001.2:g.1del" style="width:100%">' not in r.data + assert '0 Errors' in r.data + assert '0 Warnings' in r.data + assert 'Raw variant 1: deletion of 1' in r.data + @fix(database, cache('NG_012772.1')) def test_check_interactive_links(self): """ Submitting interactively should have links to transcripts also interactive. """ - r = self.app.get('/check?name=%s' % urllib.quote('NG_012337.1:g.7055C>T')) - r.mustcontain('0 Errors') - r.mustcontain('"check?name=%s"' % cgi.escape(urllib.quote('NG_012337.1:g.7055C>T'))) - # Fix for r536: disable the -u and +d convention. - #r.mustcontain('"check?name=%s"' % cgi.escape(urllib.quote('NG_012337.1(TIMM8B_v001):c.-30-u2103G>A'))) - r.mustcontain('"check?name=%s"' % cgi.escape(urllib.quote('NG_012337.1(TIMM8B_v001):c.-2133G>A'))) - r.mustcontain('"check?name=%s"' % cgi.escape(urllib.quote('NG_012337.1(SDHD_v001):c.204C>T'))) - - @skip - def test_check_noninteractive_links(self): - """ - Submitting non-interactively should have links to transcripts also - non-interactive. - - Todo: This is hard to implement in TAL, do this when we move to - another template language. See Trac issue #97. - """ - r = self.app.get('/check?name=%s&standalone=1' % urllib.quote('NG_012337.1:g.7055C>T')) - r.mustcontain('0 Errors') - r.mustcontain('"check?name=%s&standalone=1"' % cgi.escape(urllib.quote('NG_012337.1:g.7055C>T'))) - r.mustcontain('"check?name=%s&standalone=1"' % cgi.escape(urllib.quote('NG_012337.1(TIMM8B_v001):c.-30-u2103G>A'))) - r.mustcontain('"check?name=%s&standalone=1"' % cgi.escape(urllib.quote('NG_012337.1(SDHD_v001):c.204C>T'))) - - def test_check_noninteractive_old(self): - """ - Submit the name checker form non-interactively in the old style. - Should redirect to new style. - """ - r = self.app.get('/check?mutationName=NM_002001.2:g.1del') - assert_equal(r.status, '301 Moved Permanently') - assert r.location.endswith('/check?name=%s&standalone=1' % urllib.quote('NM_002001.2:g.1del')) - - def test_check_browser_link(self): - """ - Submit the name checker form with a coding variant on a transcript. - Should include link to UCSC Genome Browser. - """ - r = self.app.get('/check') - form = r.forms[0] - form['name'] = 'NM_003002.2:c.274G>T' - r = form.submit() - # Note: the r.environ does not work in versions higher than webob 1.1.1 - bed_track = urllib.quote(r.environ['wsgi.url_scheme'] + '://' + r.environ['HTTP_HOST'] + '/bed?name=' + urllib.quote('NM_003002.2:c.274G>T')) - r.mustcontain('<a href="http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position=chr11:111959685-111959705&hgt.customText=%s">View original variant in UCSC Genome Browser</a>' % bed_track) - - def test_checkforward(self): - """ - A checkForward request should redirect to the name checker. - - This is for backwards compatibility with old bookmarks. - """ - r = self.app.get('/checkForward?mutationName=%s' % urllib.quote('NM_002001.2:g.1del')) - assert_equal(r.status, '301 Moved Permanently') - assert r.location.endswith('/check?name=%s' % urllib.quote('NM_002001.2:g.1del')) - r = r.follow() - r.mustcontain('0 Errors', - '0 Warnings', - 'Raw variant 1: deletion of 1', - '<a href="#bottom" class="hornav">go to bottom</a>', - '<input type="text" name="name" value="NM_002001.2:g.1del" style="width:100%">') + r = self.app.get('/name-checker', + query_string={'description': 'NG_012772.1:g.128del'}) + assert '0 Errors' in r.data + assert 'href="/name-checker?description=NG_012772.1%3Ag.128del"' in r.data + assert 'href="/name-checker?description=NG_012772.1%28BRCA2_v001%29%3Ac.-5100del"' in r.data def test_snp_converter_valid(self): """ Submit the SNP converter form with a valid SNP. """ - r = self.app.get('/snp') - form = r.forms[0] - form['rsId'] = 'rs9919552' - r = form.submit() - r.mustcontain('0 Errors', - '0 Warnings', - 'NC_000011.9:g.111959625C>T', - 'NG_012337.2:g.7055C>T', - 'NM_003002.3:c.204C>T', - 'NP_002993.1:p.Ser68=') + # Patch Retriever.snpConvert to return rs9919552. + def mock_efetch(*args, **kwargs): + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'rs9919552.xml.bz2') + return bz2.BZ2File(path) + + with patch.object(Entrez, 'efetch', mock_efetch): + r = self.app.get('/snp-converter', + query_string={'rs_id': 'rs9919552'}) + assert '0 Errors' in r.data + assert '0 Warnings' in r.data + assert 'NC_000011.9:g.111959625C>T' in r.data + assert 'NG_012337.2:g.7055C>T' in r.data + assert 'NM_003002.3:c.204C>T' in r.data + assert 'NP_002993.1:p.Ser68=' in r.data def test_snp_converter_invalid(self): """ Submit the SNP converter form with an invalid SNP. """ - r = self.app.get('/snp') - form = r.forms[0] - form['rsId'] = 'r9919552' - r = form.submit() - r.mustcontain('1 Error', - '0 Warnings', - 'Fatal', - 'This is not a valid dbSNP id') + r = self.app.get('/snp-converter', + query_string={'rs_id': 'r9919552'}) + + assert '1 Error' in r.data + assert '0 Warnings' in r.data + assert 'Fatal' in r.data + assert 'This is not a valid dbSNP id' in r.data + @fix(database, hg19, hg19_transcript_mappings) def test_position_converter_c2g(self): """ Submit the position converter form with a valid variant. """ - r = self.app.get('/positionConverter') - form = r.forms[0] - form['assembly_name_or_alias'] = 'hg19' - form['variant'] = 'NM_003002.2:c.204C>T' - r = form.submit() - r.mustcontain('NC_000011.9:g.111959625C>T') + r = self.app.get('/position-converter', + query_string={'assembly_name_or_alias': 'hg19', + 'description': 'NM_003002.2:c.204C>T'}) + assert 'NC_000011.9:g.111959625C>T' in r.data + @fix(database, hg19, hg19_transcript_mappings) def test_position_converter_g2c(self): """ Submit the position converter form with a valid variant. """ - r = self.app.get('/positionConverter') - form = r.forms[0] - form['assembly_name_or_alias'] = 'hg19' - form['variant'] = 'NC_000011.9:g.111959625C>T' - r = form.submit() - r.mustcontain('NM_003002.2:c.204C>T') + r = self.app.get('/position-converter', + query_string={'assembly_name_or_alias': 'hg19', + 'description': 'NC_000011.9:g.111959625C>T'}) + assert 'NM_003002.2:c.204C>T' in r.data - @slow - def _batch(self, batch_type='NameChecker', arg1=None, file="", size=0, - header='', lines=None): + def _batch(self, job_type='name-checker', assembly_name_or_alias=None, + file="", size=0, header='', lines=None): """ Submit a batch form. - @kwarg batch_type: Type of batch job to test. One of NameChecker, - SyntaxChecker, PositionConverter. - @kwarg arg1: Optional extra argument for the batch job. + @kwarg batch_type: Type of batch job to test. One of name-checker, + syntax-checker, position-converter. + @kwarg argument: Optional extra argument for the batch job. @kwarg file: String with variants to use as input for the batch job. @kwarg size: Number of variants in input. @kwarg header: Message that must be found in the batch job result. @@ -379,73 +243,71 @@ class TestWSGI(): @return: The batch result document. @rtype: string + """ + data = {'job_type': job_type, + 'email': 'test@test.test', + 'file': (StringIO(file), 'test.txt')} + if assembly_name_or_alias is not None: + data['assembly_name_or_alias'] = assembly_name_or_alias + + r = self.app.post('/batch-jobs', + data=data) + progress_url = '/' + r.location.split('/')[-1] + + r = self.app.get(progress_url) + assert '<div id="if_items_left">' in r.data + assert '<div id="ifnot_items_left" style="display:none">' in r.data + assert ('<span id="items_left">%d</span>' % size) in r.data + + scheduler = Scheduler.Scheduler() + scheduler.process() + + r = self.app.get(progress_url) + assert '<div id="if_items_left" style="display:none">' in r.data + assert '<div id="ifnot_items_left">' in r.data + + dom = lxml.html.fromstring(r.data) + result_url = dom.cssselect('#ifnot_items_left a')[0].attrib['href'] - @note: Since the batch files are processed by a running batch daemon - process, the result gets written to the directory defined by the - system-wide configuration (e.g. /var/mutalyzer/cache), thus - inaccessible for the TestApp instance under our current user. - The 'solution' for this is to download the results via a running - webserver that should be using the same configuration as the batch - daemon. Yes, this is a hack. - """ - r = self.app.get('/batch') - form = r.forms[0] - if arg1: - form['arg1'] = arg1 - form['batchType'] = batch_type - form['batchEmail'] = 'test@test.test' - form.set('batchFile', ('test_%s.txt' % batch_type, - file)) - r = form.submit() - id = r.lxml.cssselect('#jobID')[0].get('value') - max_tries = 60 - for i in range(max_tries): - r = self.app.get('/progress?jobID=' + id + '&totalJobs=' + str(size) + '&ajax=1') - assert_equal(r.content_type, 'text/plain') - #print '%s: %s' % (batch_type, r.body) - if r.body == 'OK': break - assert re.match('[0-9]+', r.body) - time.sleep(2) - assert_equal(r.body, 'OK') - # Actually, this only means the last entry was taken from the database - # queue. It might still be processing, in which case we miss some - # expected output. So let's wait a few seconds. - time.sleep(2) - # This is a hack to get to the batch results (see @note above). - response = urllib2.urlopen(BATCH_RESULT_URL.format(id=id)) - assert_equal(response.info().getheader('Content-Type'), 'text/plain') - result = response.read() - assert header in result if not lines: lines = size - assert_equal(len(result.strip().split('\n')) - 1, lines) - return result - @skip # Todo: AL449423.14 no longer contains gene annotations. + r = self.app.get(result_url) + assert 'text/plain' in r.headers['Content-Type'] + assert header in r.data + assert_equal(len(r.data.strip().split('\n')) - 1, lines) + + return r.data + + @fix(database, cache('AB026906.1', 'NM_003002.2', 'AL449423.14')) def test_batch_namechecker(self): """ Submit the batch name checker form. """ variants=['AB026906.1(SDHD):g.7872G>T', - 'NM_003002.1:c.3_4insG', + 'NM_003002.2:c.3_4insG', 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('NameChecker', + self._batch('name-checker', file='\n'.join(variants), size=len(variants), - header='Input\tErrors | Messages') + header='Input\tErrors and warnings') + @fix(database) def test_batch_namechecker_extra_tab(self): """ - Submit the batch name checker form with lines ending with tab + Submit the batch syntax checker form with lines ending with tab characters. """ - variants=['AB026906.1(SDHD):g.7872G>T\t'] - self._batch('NameChecker', + variants=['AB026906.1(SDHD):g.7872G>T\t', + 'AB026906.1(SDHD):g.7872G>T\t', + 'AB026906.1(SDHD):g.7872G>T\t'] + self._batch('syntax-checker', file='\n'.join(variants), - size=len(variants), - header='Input\tErrors | Messages') + size=len(variants) * 2, + lines=len(variants), + header='Input\tStatus') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database) def test_batch_syntaxchecker(self): """ Submit the batch syntax checker form. @@ -453,67 +315,67 @@ class TestWSGI(): variants = ['AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG', 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('SyntaxChecker', + self._batch('syntax-checker', file='\n'.join(variants), size=len(variants), header='Input\tStatus') + @fix(database, hg19, hg19_transcript_mappings) def test_batch_positionconverter(self): """ Submit the batch position converter form. """ variants = ['NM_003002.2:c.204C>T', 'NC_000011.9:g.111959625C>T'] - self._batch('PositionConverter', - arg1='hg19', + self._batch('position-converter', + assembly_name_or_alias='hg19', file='\n'.join(variants), size=len(variants), header='Input Variant') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database) def test_batch_syntaxchecker_newlines_unix(self): """ - Submit the batch syntax checker form with unix line endings. + Submit batch syntax checker job with Unix line endings. """ variants = ['AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG', 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('SyntaxChecker', + self._batch('syntax-checker', file='\n'.join(variants), size=len(variants), header='Input\tStatus') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database) def test_batch_syntaxchecker_newlines_mac(self): """ - Submit the batch syntax checker form with mac line endings. + Submit batch syntax checker job with Mac line endings. """ variants = ['AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG', 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('SyntaxChecker', + self._batch('syntax-checker', file='\r'.join(variants), size=len(variants), header='Input\tStatus') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database) def test_batch_syntaxchecker_newlines_windows(self): """ - Submit the batch syntax checker form with windows line endings. + Submit batch syntax checker job with Windows line endings. """ variants = ['AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG', 'AL449423.14(CDKN2A_v002):c.5_400del'] - self._batch('SyntaxChecker', + self._batch('syntax-checker', file='\r\n'.join(variants), size=len(variants), header='Input\tStatus') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database) def test_batch_syntaxchecker_newlines_big_unix(self): """ - Submit the batch syntax checker form with unix line ending - styles and a big input file. + Submit big batch syntax checker job with Unix line endings. """ samples = ['AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG', @@ -522,16 +384,15 @@ class TestWSGI(): # Create 240 variants out of 3 samples for i in range(80): variants.extend(samples) - self._batch('SyntaxChecker', + self._batch('syntax-checker', file='\n'.join(variants), size=len(variants), header='Input\tStatus') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database) def test_batch_syntaxchecker_newlines_big_mac(self): """ - Submit the batch syntax checker form with mac line ending - styles and a big input file. + Submit big batch syntax checker job with Mac line endings. """ samples = ['AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG', @@ -540,16 +401,15 @@ class TestWSGI(): # Create 240 variants out of 3 samples for i in range(80): variants.extend(samples) - self._batch('SyntaxChecker', + self._batch('syntax-checker', file='\r'.join(variants), size=len(variants), header='Input\tStatus') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database) def test_batch_syntaxchecker_newlines_big_windows(self): """ - Submit the batch syntax checker form with windows line ending - styles and a big input file. + Submit big batch syntax checker job with Windows line endings. """ samples = ['AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG', @@ -558,12 +418,12 @@ class TestWSGI(): # Create 240 variants out of 3 samples for i in range(80): variants.extend(samples) - self._batch('SyntaxChecker', + self._batch('syntax-checker', file='\r\n'.join(variants), size=len(variants), header='Input\tStatus') - @skip # Todo: AL449423.14 no longer contains gene annotations. + @fix(database) def test_batch_syntaxchecker_oldstyle(self): """ Submit the batch syntax checker form with old style input file. @@ -572,60 +432,28 @@ class TestWSGI(): 'AB026906.1\tSDHD\tg.7872G>T', 'NM_003002.1\t\tc.3_4insG', 'AL449423.14\tCDKN2A_v002\tc.5_400del'] - self._batch('SyntaxChecker', + self._batch('syntax-checker', file='\n'.join(variants), size=len(variants)-1, header='Input\tStatus') - @slow + @fix(database, cache('AB026906.1')) def test_batch_namechecker_restriction_sites(self): """ Submit the batch name checker form and see if restriction site effects are added. - - Note that we use the @slow decorator here even though it is already - applied to self._batch. The reason is that we use the result from - self._batch, which does not exist if @slow checks are disabled. """ variants=['AB026906.1:c.274G>T', 'AB026906.1:c.[274G>T;143A>G;15G>T]'] - results = self._batch('NameChecker', + results = self._batch('name-checker', file='\n'.join(variants), size=len(variants), - header='Input\tErrors | Messages').strip().split('\n') + header='Input\tErrors and warnings').strip().split('\n') assert 'Restriction Sites Created\tRestriction Sites Deleted' in results[0] assert 'CviQI,RsaI\tBccI' in results[1] assert 'CviQI,RsaI;HhaI,HinP1I;SfcI\tBccI;;BpmI,BsaXI (2),MnlI' in results[2] - @slow - def test_batch_syntaxchecker_toobig(self): - """ - Submit the batch syntax checker with a too big input file. - """ - seed = """ -Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy -nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. Ut wisi -enim ad minim veniam, quis nostrud exerci tation ullamcorper suscipit lobortis -nisl ut aliquip ex ea commodo consequat. Duis autem vel eum iriure dolor in -hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu -feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui -blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla -facilisi.""" - file = seed - # Very crude way of creating something at least 6MB in size - while len(file) < 6000000: - file += file - r = self.app.get('/batch') - form = r.forms[0] - form['batchType'] = 'SyntaxChecker' - form['batchEmail'] = 'm.vermaat.hg@lumc.nl' - form.set('batchFile', ('test_batch_toobig.txt', - file)) - r = form.submit(status=413) - assert_equal(r.content_type, 'text/plain') - - @skip # Todo: AL449423.14 no longer contains gene annotations. - @slow + @fix(database) def test_batch_multicolumn(self): """ Submit the batch syntax checker with a multiple-colums input file. @@ -635,7 +463,7 @@ facilisi.""" variants = [('AB026906.1(SDHD):g.7872G>T', 'NM_003002.1:c.3_4insG'), ('NM_003002.1:c.3_4insG', 'AB026906.1(SDHD):g.7872G>T'), ('AL449423.14(CDKN2A_v002):c.5_400del', 'AL449423.14(CDKN2A_v002):c.5_400del')] - result = self._batch('SyntaxChecker', + result = self._batch('syntax-checker', file='\n'.join(['\t'.join(r) for r in variants]), size=len(variants) * 2, header='Input\tStatus', @@ -647,198 +475,219 @@ facilisi.""" """ Download a Python example client for the web service. """ - r = self.app.get('/download/client-suds.py') - assert_equal(r.content_type, 'text/plain') - r.mustcontain('#!/usr/bin/env python') + r = self.app.get('/downloads/client-suds.py') + assert 'text/plain' in r.headers['Content-Type'] + assert '#!/usr/bin/env python' in r.data def test_download_rb(self): """ Download a Ruby example client for the web service. """ - r = self.app.get('/download/client-savon.rb') - assert_equal(r.content_type, 'text/plain') - r.mustcontain('#!/usr/bin/env ruby') + r = self.app.get('/downloads/client-savon.rb') + assert 'text/plain' in r.headers['Content-Type'] + assert '#!/usr/bin/env ruby' in r.data def test_download_cs(self): """ Download a C# example client for the web service. """ - r = self.app.get('/download/client-mono.cs') - assert_equal(r.content_type, 'text/plain') - r.mustcontain('public static void Main(String [] args) {') + r = self.app.get('/downloads/client-mono.cs') + assert_equal(r.headers['Content-Type'], 'text/plain') + assert 'public static void Main(String [] args) {' in r.data def test_download_php(self): """ Download a PHP example client for the web service. """ - r = self.app.get('/download/client-php.php') - assert_equal(r.content_type, 'text/plain') - r.mustcontain('<?php') + r = self.app.get('/downloads/client-php.php') + assert 'text/plain' in r.headers['Content-Type'] + assert '<?php' in r.data def test_downloads_batchtest(self): """ Download the batch test example file. """ r = self.app.get('/downloads/batchtestnew.txt') - assert_equal(r.content_type, 'text/plain') - r.mustcontain('NM_003002.1:c.3_4insG') + assert 'text/plain' in r.headers['Content-Type'] + assert 'NM_003002.1:c.3_4insG' in r.data def test_annotated_soap_api(self): """ Test the SOAP documentation generated from the WSDL. """ r = self.app.get('/soap-api') - assert_equal(r.content_type, 'text/html') - r.mustcontain('Web Service: Mutalyzer') + assert 'text/html' in r.headers['Content-Type'] + assert 'Web Service: Mutalyzer' in r.data + @fix(database, cache('NG_012337.1')) def test_getgs(self): """ Test the /getGS interface used by LOVD2. """ - r = self.app.get('/getGS?variantRecord=NM_003002.2&forward=1&mutationName=NG_012337.1:g.7055C%3ET') - r.mustcontain('0 Errors', - '0 Warnings', - 'Raw variant 1: substitution at 7055') - assert_equal(r.body.find('go to bottom'), -1) - assert_equal(r.body.find('<input'), -1) - + r = self.app.get('/getGS', + query_string={'variantRecord': 'NM_003002.2', + 'forward': '1', + 'mutationName': 'NG_012337.1:g.7055C>T'}, + follow_redirects=True) + assert '0 Errors' in r.data + assert '0 Warnings' in r.data + assert 'Raw variant 1: substitution at 7055' in r.data + assert 'go to bottom' not in r.data + assert '<input' not in r.data + + @fix(database, hg19, hg19_transcript_mappings) def test_variantinfo_g2c(self): """ Test the /Variant_info interface used by LOVD2 (g to c). """ - r = self.app.get('/Variant_info?LOVD_ver=2.0-29&build=hg19&acc=NM_203473.1&var=g.48374289_48374389del') - assert_equal(r.content_type, 'text/plain') + r = self.app.get('/Variant_info', + query_string={'LOVD_ver': '2.0-29', + 'build': 'hg19', + 'acc': 'NM_203473.1', + 'var': 'g.48374289_48374389del'}) + assert 'text/plain' in r.headers['Content-Type'] expected = '\n'.join(['1020', '0', '1072', '48', '48374289', '48374389', 'del']) - assert_equal(r.body, expected) + assert_equal(r.data, expected) + @fix(database, hg19, hg19_transcript_mappings) def test_variantinfo_c2g(self): """ Test the /Variant_info interface used by LOVD2 (c to g). """ - r = self.app.get('/Variant_info?LOVD_ver=2.0-29&build=hg19&acc=NM_203473.1&var=c.1020_1072%2B48del') - assert_equal(r.content_type, 'text/plain') + r = self.app.get('/Variant_info', + query_string={'LOVD_ver': '2.0-29', + 'build': 'hg19', + 'acc': 'NM_203473.1', + 'var': 'c.1020_1072+48del'}) + assert 'text/plain' in r.headers['Content-Type'] expected = '\n'.join(['1020', '0', '1072', '48', '48374289', '48374389', 'del']) - assert_equal(r.body, expected) + assert_equal(r.data, expected) + @fix(database, hg19, hg19_transcript_mappings) def test_variantinfo_c2g_downstream(self): """ Test the /Variant_info interface used by LOVD2 (c variant downstream notation to g). """ - r = self.app.get('/Variant_info?LOVD_ver=2.0-29&build=hg19&acc=NM_203473.1&var=c.1709%2Bd187del') - assert_equal(r.content_type, 'text/plain') + r = self.app.get('/Variant_info', + query_string={'LOVD_ver': '2.0-29', + 'build': 'hg19', + 'acc': 'NM_203473.1', + 'var': 'c.1709+d187del'}) + assert 'text/plain' in r.headers['Content-Type'] expected = '\n'.join(['1709', '187', '1709', '187', '48379389', '48379389', 'del']) - assert_equal(r.body, expected) + assert_equal(r.data, expected) + @fix(database, hg19, hg19_transcript_mappings) def test_variantinfo_no_variant(self): """ Test the /Variant_info interface used by LOVD2 (without variant). """ - r = self.app.get('/Variant_info?LOVD_ver=2.0-32&build=hg19&acc=NM_001083962.1') + r = self.app.get('/Variant_info', + query_string={'LOVD_ver': '2.0-29', + 'build': 'hg19', + 'acc': 'NM_203473.1'}) + assert 'text/plain' in r.headers['Content-Type'] assert_equal(r.content_type, 'text/plain') - expected = '\n'.join(['-612', '7720', '2016']) - assert_equal(r.body, expected) + expected = '\n'.join(['-158', '1709', '1371']) + assert_equal(r.data, expected) + @fix(database, hg19, hg19_transcript_mappings) def test_variantinfo_ivs(self): """ Test the /Variant_info interface used by LOVD2 (with IVS positioning). """ - r = self.app.get('/Variant_info?LOVD_ver=2.0-33&build=hg19&acc=NM_000249.3&var=c.IVS10%2B3A%3EG') - assert_equal(r.content_type, 'text/plain') + r = self.app.get('/Variant_info', + query_string={'LOVD_ver': '2.0-33', + 'build': 'hg19', + 'acc': 'NM_000249.3', + 'var': 'c.IVS10+3A>G'}) + assert 'text/plain' in r.headers['Content-Type'] expected = '\n'.join(['884', '3', '884', '3', '37059093', '37059093', 'subst']) - assert_equal(r.body, expected) + assert_equal(r.data, expected) + @fix(database) def test_upload_local_file(self): """ Test the genbank uploader. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'AB026906.1.gb.bz2') + r = self.app.post('/reference-loader', + data={'method': 'upload', + 'file': (bz2.BZ2File(path), 'AB026906.1.gb')}) + assert 'Your reference sequence was loaded successfully.' in r.data + + dom = lxml.html.fromstring(r.data) + reference_url = dom.cssselect('#reference_download')[0].attrib['href'] - @todo: Use another genbank file to get a UD number and check that - we can then check variants using that UD number. - @todo: This genbank file location is bogus. The tests directory is not - included with the package installation. - """ - test_genbank_file = os.path.join(os.path.realpath(os.path.dirname(__file__)), - 'data', 'AB026906.1.gb') - r = self.app.get('/upload') - form = r.forms[0] - form['invoermethode'] = 'file' - form.set('bestandsveld', ('test_upload.gb', - open(test_genbank_file, 'r').read())) - r = form.submit() - r.mustcontain('Your reference sequence was loaded successfully.') + r = self.app.get(reference_url) + assert_equal(r.data, bz2.BZ2File(path).read()) + @fix(database) def test_upload_local_file_invalid(self): """ Test the genbank uploader with a non-genbank file. - - @note: We add the current time to the file contents to make sure it is - not recognized by its hash. """ - r = self.app.get('/upload') - form = r.forms[0] - form['invoermethode'] = 'file' - form.set('bestandsveld', ('test_upload.gb', - 'this is not a genbank file (%s)\n' % time.ctime())) - r = form.submit() - r.mustcontain('The file could not be parsed.') - print r.body + r = self.app.post('/reference-loader', + data={'method': 'upload', + 'file': (StringIO('this is not a genbank file'), 'AB026906.1.gb')}) + assert 'Your reference sequence was loaded successfully.' not in r.data + assert 'The file could not be parsed.' in r.data + @fix(database, cache('NM_002001.2')) def test_reference(self): """ Test if reference files are cached. """ - r = self.app.get('/check') - form = r.forms[0] - form['name'] = 'AB026906.1:c.274G>T' - r = form.submit() - r.mustcontain('0 Errors', - '1 Warning', - 'Raw variant 1: substitution at 7872', - '<a href="#bottom" class="hornav">go to bottom</a>', - '<input type="text" name="name" value="AB026906.1:c.274G>T" style="width:100%">') - r = self.app.get('/Reference/AB026906.1.gb') - assert_equal(r.content_type, 'text/plain') - assert_equal(r.content_length, 26427) - r.mustcontain('ggaaaaagtc tctcaaaaaa cctgctttat') + r = self.app.get('/name-checker', + query_string={'description': 'NM_002001.2:g.1del'}) + assert '0 Errors' in r.data + r = self.app.get('/reference/NM_002001.2.gb') + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', + 'NM_002001.2.gb.bz2') + assert_equal(r.data, bz2.BZ2File(path).read()) + + @fix(database, cache('NM_002001.2')) def test_reference_head(self): """ Test if reference files are cached, by issuing a HEAD request. + """ + r = self.app.get('/name-checker', + query_string={'description': 'NM_002001.2:g.1del'}) + assert '0 Errors' in r.data - Note: The WebTest module also checks that the response to a HEAD - request is empty, as it should be. - """ - r = self.app.get('/check') - form = r.forms[0] - form['name'] = 'AB026906.1:c.274G>T' - r = form.submit() - r.mustcontain('0 Errors', - '1 Warning', - 'Raw variant 1: substitution at 7872', - '<a href="#bottom" class="hornav">go to bottom</a>', - '<input type="text" name="name" value="AB026906.1:c.274G>T" style="width:100%">') - r = self.app.head('/Reference/AB026906.1.gb') - assert_equal(r.content_type, 'text/plain') + r = self.app.head('/reference/NM_002001.2.gb') + assert_equal(r.status_code, 200) + @fix(database) def test_reference_head_none(self): """ Test if non-existing reference files gives a 404 on a HEAD request. """ - r = self.app.head('/Reference/AB026906.78.gb', status=404) + r = self.app.head('/reference/NM_002001.2.gb') + assert_equal(r.status_code, 404) + @fix(database, hg19, hg19_transcript_mappings, cache('NM_003002.2')) def test_bed(self): """ BED track for variant. """ - r = self.app.get('/bed?name=NM_003002.2%3Ac.274G%3ET') - assert_equal(r.content_type, 'text/plain') - r.mustcontain('\t'.join(['chr11', '111959694', '111959695', '274G>T', '0', '+'])) + r = self.app.get('/bed', + query_string={'description': 'NM_003002.2:c.274G>T'}) + assert 'text/plain' in r.headers['Content-Type'] + assert '\t'.join(['chr11', '111959694', '111959695', '274G>T', '0', '+']) in r.data + @fix(database, hg19, hg19_transcript_mappings, cache('NM_000132.3')) def test_bed_reverse(self): """ BED track for variant on reverse strand. """ - r = self.app.get('/bed?name=NM_000132.3%3Ac.%5B4374A%3ET%3B4380_4381del%5D') - assert_equal(r.content_type, 'text/plain') - r.mustcontain('\t'.join(['chrX', '154157690', '154157691', '4374A>T', '0', '-'])) - r.mustcontain('\t'.join(['chrX', '154157683', '154157685', '4380_4381del', '0', '-'])) + r = self.app.get('/bed', + query_string={'description': 'NM_000132.3:c.[4374A>T;4380_4381del]'}) + assert 'text/plain' in r.headers['Content-Type'] + assert '\t'.join(['chrX', '154157690', '154157691', '4374A>T', '0', '-']) in r.data + assert '\t'.join(['chrX', '154157683', '154157685', '4380_4381del', '0', '-']) in r.data diff --git a/tests/utils.py b/tests/utils.py index 93c6aab52c3684d40e9316f0411994aaacd07d04..befa5d72859279140211ad412fa2920fce8961d6 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,36 +1,68 @@ +""" +Utilities for unit tests. +""" + + +from functools import wraps import os import shutil import tempfile from mutalyzer.config import settings -from mutalyzer.db import models -# Todo: Refactor creating and destroying environment into a context manager. +class TestEnvironment(object): + """ + Configure Mutalyzer for unit tests. All storage is transient and isolated. + """ + def __init__(self, fixtures=None): + fixtures = fixtures or [] + + self.cache_dir = tempfile.mkdtemp() + log_handle, self.log_file = tempfile.mkstemp() + os.close(log_handle) -def create_test_environment(database=False): + settings.configure({'DEBUG': False, + 'TESTING': True, + 'CACHE_DIR': self.cache_dir, + 'REDIS_URI': None, + 'DATABASE_URI': 'sqlite://', + 'LOG_FILE': self.log_file}) + + for fixture in fixtures: + fixture() + + def destroy(self): + """ + Destroy all storage defined in the current environment. + """ + shutil.rmtree(self.cache_dir) + os.unlink(self.log_file) + + +class MutalyzerTest(object): """ - Configure Mutalyzer for unit tests. All storage is transient and isolated. + Test class providing an isolated test environment for each test. """ - log_handle, log_filename = tempfile.mkstemp() - os.close(log_handle) + fixtures = () - settings.configure(dict( - DEBUG = False, - TESTING = True, - CACHE_DIR = tempfile.mkdtemp(), - REDIS_URI = None, - DATABASE_URI = 'sqlite://', - LOG_FILE = log_filename)) + def setup(self): + self.environment = TestEnvironment(fixtures=self.fixtures) - if database: - models.create_all() + def teardown(self): + self.environment.destroy() -def destroy_environment(): +def fix(*fixtures): """ - Destroy all storage defined in the current environment. + Decorator for a unit test setting up the specified fixtures. """ - shutil.rmtree(settings.CACHE_DIR) - os.unlink(settings.LOG_FILE) + def decorator(f): + @wraps(f) + def fixed_f(*args, **kwargs): + for fixture in fixtures: + fixture() + return f(*args, **kwargs) + return fixed_f + return decorator