diff --git a/mutalyzer/services/rpc.py b/mutalyzer/services/rpc.py index 770271128c6012bedb2139eff9cd58ce5e7a6edb..3e3264175fa083ce1b1490c30e11c3fb774705fb 100644 --- a/mutalyzer/services/rpc.py +++ b/mutalyzer/services/rpc.py @@ -1240,6 +1240,11 @@ class MutalyzerService(ServiceBase): output.addMessage(__file__, -1, 'INFO', 'Received request descriptionExtract') + if not settings.TESTING and (len(reference) > 1000 or + len(observed) > 1000): + raise Fault('EMAXSIZE', + 'Input sequences are restricted to 1000bp.') + allele = extractor.describe_dna(reference, observed) result = Allele() diff --git a/mutalyzer/util.py b/mutalyzer/util.py index 4bf06e3effef5fe56f507e7dd0a82f2594aab8b4..4a3ac13c1dc73d165cff23a68571341629e29dfa 100644 --- a/mutalyzer/util.py +++ b/mutalyzer/util.py @@ -29,6 +29,7 @@ import operator import sys import time +from Bio import SeqIO from Bio.SeqUtils import seq3 # NOTE: This is a temporary fix. @@ -352,6 +353,46 @@ def is_dna(s): #is_dna +def guess_file_type(handle): + """ + Guess the file type of an NGS data file. + + We assume that the stream is rewinded before use, after use, the input + stream will be rewinded. + + :arg stream handle: Open readable handle to an NGS data file. + + :returns unicode: Either 'fasta', 'fastq' or 'text'. + """ + token = handle.read(1) + handle.seek(0) + + if token == '>': + return 'fasta' + elif token == '@': + return 'fastq' + return 'text' + + +def read_dna(handle): + """ + Read the first record in an NGS data file. + + If the format is not recognised as FASTA or FASTQ, we assume that the input + is in plain text. In this case, all non-DNA characters are removed. + + :arg stream handle: Open readable handle to an NGS data file. + + :returns unicode: Content of the first record in the file. + """ + file_format = guess_file_type(handle) + + if file_format != 'text': + return unicode(SeqIO.parse(handle, file_format).next().seq) + + return ''.join(x for x in unicode(handle.read()) if x in 'ATCG') + + def in_frame_description(s1, s2) : """ Give a description of an inframe difference of two proteins. Also give diff --git a/mutalyzer/website/templates/about.html b/mutalyzer/website/templates/about.html index 9ebed3da3a04e3be6c5e162cf7a9043acf57e26c..c8ddf49fb25c29424246c0b6c610562b6513839d 100644 --- a/mutalyzer/website/templates/about.html +++ b/mutalyzer/website/templates/about.html @@ -17,8 +17,8 @@ F.J. Laros, with the following exceptions: written by Gerard C.P. Schaafsma.</li> <li>Current development and maintenance is done by Martijn Vermaat.</li> - <li>Automatic extraction of variant descriptions is implemented by - Jonathan Vis.</li> + <li><a href="https://github.com/mutalyzer/description-extractor">Automatic + extraction of variant descriptions</a> is implemented by Jonathan Vis.</li> </ul> <p> diff --git a/mutalyzer/website/templates/description-extractor.html b/mutalyzer/website/templates/description-extractor.html index d1d8917a4b82ff92d7145e05a2248ecbd0b6ab6b..5927bf3e21152477f18311d5b429012ae47c3dc9 100644 --- a/mutalyzer/website/templates/description-extractor.html +++ b/mutalyzer/website/templates/description-extractor.html @@ -5,7 +5,10 @@ {% block content %} -<p class="alert alert-warning">Note that this is an experimental service.</p +<p class="alert alert-warning"> +Please note that this is an experimental service and we are currently limiting +input sequences to 1000bp. +</p> <p> Extract the HGVS variant description from a reference sequence and an observed @@ -25,20 +28,126 @@ testing phase, we plan to use the underlying algorithm for: </li> </ul> +<p> +The algorithm is implemented in +the <a href="https://github.com/mutalyzer/description-extractor">HGVS variant +description extractor</a>. To apply it on longer input sequences than accepted +on this page, you can download that package and run it locally. +</p> + <p> Please supply a reference sequence and an observed sequence. </p> -<form action="{{ url_for('.description_extractor') }}" method="get" class="form"> - <div class="form-group"> - <label for="reference_sequence">Reference sequence</label> - <input type="text" name="reference_sequence" id="reference_sequence" value="{{ reference_sequence }}" class="form-control form-pre example-target" placeholder="Reference sequence"> - <p>Example: <code class="example-input">ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA</code></p> +<form enctype="multipart/form-data" action="{{ url_for('.description_extractor') }}" method="post" class="form" id="invoer"> + <div class="row"> + <h4>Reference input</h4> + <div class="col-md-6"> + <div class="form-group" id="input-methods"> + <div class="radio"> + <label> + <input type="radio" name="reference_method" value="raw_method" class="input-select" data-context="select-form1" data-for="reference_raw_method" {{ 'checked' if reference_method == 'raw_method' or not reference_method }}> + Enter a sequence (FASTA, FASTQ, or plain text). + </label> + </div> + <div class="radio"> + <label> + <input type="radio" name="reference_method" value="file_method" class="input-select" data-context="select-form1" data-for="reference_file_method" {{ 'checked' if reference_method == 'file_method' }}> + Upload a file (FASTA, FASTQ, or plain text). + </label> + </div> + <div class="radio"> + <label> + <input type="radio" name="reference_method" value="refseq_method" class="input-select" data-context="select-form1" data-for="reference_refseq_method" {{ 'checked' if reference_method == 'refseq_method' }}> + Enter a RefSeq accession number. + </label> + </div> + </div> + </div> + + <div id="select-form1"> + <div class="col-md-6"> + <div class="subform" id="reference_raw_method" style="display: {{ '' if reference_method == 'raw_method' or not reference_method else 'none' }}"> + <div class="form-group"> + <label for="reference_sequence">Reference sequence</label> + <textarea name="reference_sequence" id="reference_sequence" class="form-control form-pre example-target" placeholder="Reference sequence">{{ reference_sequence }}</textarea> + <p>Example: <code class="example-input" data-for="reference_sequence">ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA</code></p> + </div> + </div> + </div> + <div class="col-md-6"> + <div class="subform" id="reference_file_method" style="display: {{ 'none' if reference_method != 'file_method' }}"> + <div class="form-group"> + <label for="reference_file">Reference file</label> + <input type="file" name="reference_file" id="reference_file"> + </div> + </div> + </div> + <div class="col-md-6"> + <div class="subform" id="reference_refseq_method" style="display: {{ 'none' if reference_method != 'refseq_method' }}"> + <div class="form-group"> + <label for="reference_accession_number">Reference accession number</label> + <input type="text" name="reference_accession_number" id="reference_accession_number" value="{{ reference_accession_number }}" class="form-control form-pre example-target" placeholder="Reference accession number"> + <p>Example: <code class="example-input" data-for="reference_accession_number">NM_198697.1</code></p> + </div> + </div> + </div> + </div> </div> - <div class="form-group"> - <label for="variant_sequence">Observed sequence</label> - <input type="text" name="variant_sequence" id="variant_sequence" value="{{ variant_sequence }}" class="form-control form-pre example-target-2" placeholder="Observed sequence"> - <p>Example: <code class="example-input-2">ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA</code></p> + + <div class="row"> + <h4>Sample input</h4> + <div class="col-md-6"> + <div class="form-group" id="input-methods"> + <div class="radio"> + <label> + <input type="radio" name="sample_method" value="raw_method" class="input-select" data-context="select-form2" data-for="sample_raw_method" {{ 'checked' if sample_method == 'raw_method' or not sample_method }}> + Enter a sequence (FASTA, FASTQ, or plain text). + </label> + </div> + <div class="radio"> + <label> + <input type="radio" name="sample_method" value="file_method" class="input-select" data-context="select-form2" data-for="sample_file_method" {{ 'checked' if sample_method == 'file_method' }}> + Upload a file (FASTA, FASTQ, or plain text). + </label> + </div> + <div class="radio"> + <label> + <input type="radio" name="sample_method" value="refseq_method" class="input-select" data-context="select-form2" data-for="sample_refseq_method" {{ 'checked' if sample_method == 'refseq_method' }}> + Enter a RefSeq accession number. + </label> + </div> + </div> + </div> + + <div id="select-form2"> + <div class="col-md-6"> + <div class="subform" id="sample_raw_method" style="display: {{ '' if sample_method == 'raw_method' or not sample_method else 'none' }}"> + <div class="form-group"> + <label for="sample_sequence">Sample sequence</label> + <textarea name="sample_sequence" id="sample_sequence" class="form-control form-pre example-target-2" placeholder="Sample sequence">{{ sample_sequence }}</textarea> + <p>Example: <code class="example-input" data-for="sample_sequence">ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA</code></p> + </div> + </div> + </div> + <div class="col-md-6"> + <div class="subform" id="sample_file_method" style="display: {{ 'none' if sample_method != 'file_method' }}"> + <div class="form-group"> + <label for="sample_file">Reference file</label> + <input type="file" name="sample_file" id="sample_file"> + </div> + </div> + </div> + <div class="col-md-6"> + <div class="subform" id="sample_refseq_method" style="display: {{ 'none' if sample_method != 'refseq_method' }}"> + <div class="form-group"> + <label for="sample_accession_number">Sample accession number</label> + <input type="text" name="sample_accession_number" id="sample_accession_number" value="{{ sample_accession_number }}" class="form-control form-pre example-target-2" placeholder="Sample accession number"> + <p>Example: <code class="example-input" data-for="sample_accession_number">NM_198697.2</code></p> + </div> + </div> + </div> + </div> </div> <div class="form-group"> <input type="submit" class="btn btn-primary" value="Extract variant description"> @@ -46,7 +155,7 @@ Please supply a reference sequence and an observed sequence. </div> </form> -{% if description %} +{% if reference_method and sample_method %} <hr> {% for m in messages %} {% if m.class == "error" %} @@ -69,8 +178,44 @@ Please supply a reference sequence and an observed sequence. {% if not errors %} <hr> - <h4>Genomic description</h4> - <p><code>g.{{ description }}</code></p> + <h4>Input</h4> + <table class="table"> + <thead> + <tr> + <th>Field</th> + <th>Value</th> + </tr> + </thead> + <tbody> + <tr> + <td>Reference input</td> + <td> + {% if reference_method == 'raw_method' %} + <code>{{ reference_sequence|short(40) }}</code> + {% elif reference_method == 'file_method' %} + File upload + {% elif reference_method == 'refseq_method' %} + {{ reference_accession_number }} + {% endif %} + </td> + </tr> + <tr> + <td>Sample input</td> + <td> + {% if sample_method == 'raw_method' %} + <code>{{ sample_sequence|short(40) }}</code> + {% elif sample_method == 'file_method' %} + File upload + {% elif sample_method == 'refseq_method' %} + {{ sample_accession_number }} + {% endif %} + </td> + </tr> + </tbody> + </table> + + <h4>Description</h4> + <p><pre class="description">{{ raw_vars|string }}</pre></p> <h4>Overview of the raw variants</h4> <table class="table"> @@ -86,17 +231,17 @@ Please supply a reference sequence and an observed sequence. </tr> </thead> <tbody> - {% for raw_var in raw_vars %} - <tr> - <td>{{ raw_var.start }}</td> - <td>{{ raw_var.end }}</td> - <td>{{ raw_var.type }}</td> - <td>{{ raw_var.deleted }}</td> - <td>{{ raw_var.inserted }}</td> - <td>{{ raw_var.shift }}</td> - <td>{{ raw_var }}</td> - </tr> - {% endfor %} + {% for raw_var in raw_vars %} + <tr> + <td>{{ raw_var.start }}</td> + <td>{{ raw_var.end }}</td> + <td>{{ raw_var.type }}</td> + <td><code>{{ raw_var.deleted|string|short }}</code></td> + <td><code>{{ raw_var.inserted|string|short }}</code></td> + <td>{{ raw_var.shift }}</td> + <td>{% if raw_var|string|length > 20 %}Too long to show{% else %}<code>{{ raw_var|string }}</code>{% endif %}</td> + </tr> + {% endfor %} </tbody> </table> {% endif %}{# not errors #} diff --git a/mutalyzer/website/templates/reference-loader.html b/mutalyzer/website/templates/reference-loader.html index 0e8b56177618607dedc5068f177d97637375c865..d4b4c755adb244875559fc9c38591811f79d4d74 100644 --- a/mutalyzer/website/templates/reference-loader.html +++ b/mutalyzer/website/templates/reference-loader.html @@ -5,14 +5,6 @@ {% block content %} -<script> -var oldLoad = window.onload; -window.onload=function(){ - if (oldLoad) oldLoad(); - updateVisibility(); -} -</script> - <p> The Reference File Loader allows you to use your own reference sequence when no appropriate RefSeq, GenBank or LRG file is available. @@ -22,138 +14,140 @@ Please select one of the options below to upload or retrieve your reference sequence (maximum size is {{ max_file_size }} megabytes). </p> -<form name="invoer" enctype="multipart/form-data" action="{{ url_for('.reference_loader') }}" method="post"> +<form name="invoer" enctype="multipart/form-data" action="{{ url_for('.reference_loader') }}" method="post" id="invoer"> <div class="row"> <div class="col-md-6"> <div class="form-group" id="input-methods"> <div class="radio"> <label> - <input type="radio" name="method" value="upload" checked > + <input class="input-select" type="radio" name="method" value="upload_method" data-context="select-form" data-for="upload_method" checked > The reference sequence file is a local file. </label> </div> <div class="radio"> <label> - <input type="radio" name="method" value="url" > + <input class="input-select" type="radio" name="method" value="url_method" data-context="select-form" data-for="url_method" > The reference sequence file can be found at the following URL. </label> </div> <div class="radio"> <label> - <input type="radio" name="method" value="slice_gene" > + <input class="input-select" type="radio" name="method" value="slice_gene_method" data-context="select-form" data-for="slice_gene_method" > Retrieve part of the reference genome for a (HGNC) gene symbol. </label> </div> <div class="radio"> <label> - <input type="radio" name="method" value="slice_accession" > + <input class="input-select" type="radio" name="method" value="slice_accession_method" data-context="select-form" data-for="slice_accession_method" > Retrieve a range of a chromosome by accession number. </label> </div> <div class="radio"> <label> - <input type="radio" name="method" value="slice_chromosome" > + <input class="input-select" type="radio" name="method" value="slice_chromosome_method" data-context="select-form" data-for="slice_chromosome_method" > Retrieve a range of a chromosome by name. </label> </div> </div> </div> - <script type="text/javascript"> - $('#input-methods input').on('change', updateVisibility); - </script> - - <div class="col-md-6"> - <div class="form-group" id="upload_label"> - <label for="file">GenBank file</label> - <input type="file" name="file" id="file"> - <p class="help-block">Please select the GenBank file in plain text format.</p> - </div> - - <div class="form-group" id="url_label"> - <label for="url">GenBank file URL</label> - <input type="text" name="url" id="url" class="form-control"> - <p class="help-block">Please enter the URL of the GenBank file in plain text (including http://).</p> - </div> - - <div id="slice_gene_label"> - <div class="form-group"> - <p class="help-block">Please enter the Gene symbol and organism name without spaces - and specify the length of the flanking sequences.</p> - <p class="help-block"><b>Note:</b> This uses - the <a href="http://www.ncbi.nlm.nih.gov/sites/gquery">NCBI - Entrez</a> search engine and is therefore based on the current - Entrez assembly for the given organism (GRCh38/hg38 for human).</p> - <label for="genesymbol">Gene symbol</label> - <input type="text" name="genesymbol" id="genesymbol" class="form-control"> - </div> - <div class="form-group"> - <label for="organism">Organism name</label> - <input type="text" name="organism" id="organism" class="form-control"> - </div> - <div class="form-group"> - <label for="upstream">Number of 5' flanking nucleotides</label> - <input type="text" name="upstream" id="upstream" value="5000" class="form-control"> - </div> - <div class="form-group"> - <label for="downstream"><td>Number of 3' flanking nucleotides</label> - <input type="text" name="downstream" id="downstream" value="2000" class="form-control"> + <div id="select-form"> + <div class="col-md-6"> + <div class="subform" id="upload_method"> + <div class="form-group"> + <label for="file">GenBank file</label> + <input type="file" name="file" id="file"> + <p class="help-block">Please select the GenBank file in plain text format.</p> + </div> </div> - </div> - <div id="slice_accession_label"> - <div class="form-group"> - <p class="help-block">Please enter the accession number of the chromosome or contig and specify the range.</p> - <label for="accession">Chromosome accession number</label> - <input type="text" name="accession" id="accession" class="form-control"> - </div> - <div class="form-group"> - <label for="accession_start">Start position</label> - <input type="text" name="accession_start" id="accession_start" class="form-control"> - </div> - <div class="form-group"> - <label for="accession_stop">Stop position</label> - <input type="text" name="accession_stop" id="accession_stop" class="form-control"> + <div class="subform" id="url_method" style="display: none"> + <div class="form-group"> + <label for="url">GenBank file URL</label> + <input type="text" name="url" id="url" class="form-control"> + <p class="help-block">Please enter the URL of the GenBank file in plain text (including http://).</p> + </div> </div> - <div class="form-group"> - <label>Orientation</label> - <div class="radio"><label><input type="radio" name="accession_orientation" value="1" checked> Forward</label></div> - <div class="radio"><label><input type="radio" name="accession_orientation" value="2"> Reverse</label></div> - </div> - </div> - <div id="slice_chromosome_label"> - <div class="form-group"> - <p class="help-block">Please enter the name of the chromosome and specify the range.</p> - <label for="assembly_name_or_alias">Assembly</label> - <select name="assembly_name_or_alias" id="assembly_name_or_alias" class="form-control"> - {% for assembly in assemblies %} - <option value="{{ assembly.name }}"{% if assembly_name_or_alias in (assembly.name, assembly.alias) %} selected="selected"{% endif %}>{{ assembly.taxonomy_common_name }} — {{ assembly.name }}{% if assembly.alias %} ({{assembly.alias }}){% endif %}</option> - {% endfor %} - </select> + <div class="subform" id="slice_gene_method" style="display: none"> + <div class="form-group"> + <p class="help-block">Please enter the Gene symbol and organism name without spaces + and specify the length of the flanking sequences.</p> + <p class="help-block"><b>Note:</b> This uses + the <a href="http://www.ncbi.nlm.nih.gov/sites/gquery">NCBI + Entrez</a> search engine and is therefore based on the current + Entrez assembly for the given organism (GRCh38/hg38 for human).</p> + <label for="genesymbol">Gene symbol</label> + <input type="text" name="genesymbol" id="genesymbol" class="form-control"> + </div> + <div class="form-group"> + <label for="organism">Organism name</label> + <input type="text" name="organism" id="organism" class="form-control"> + </div> + <div class="form-group"> + <label for="upstream">Number of 5' flanking nucleotides</label> + <input type="text" name="upstream" id="upstream" value="5000" class="form-control"> + </div> + <div class="form-group"> + <label for="downstream"><td>Number of 3' flanking nucleotides</label> + <input type="text" name="downstream" id="downstream" value="2000" class="form-control"> + </div> </div> - <div class="form-group"> - <label for="chromosome">Chromosome name</label> - <input type="text" name="chromosome" id="chromosome" class="form-control"> - </div> - <div class="form-group"> - <label for="chromosome_start">Start position</label> - <input type="text" name="chromosome_start" id="chromosome_start" class="form-control"> - </div> - <div class="form-group"> - <label for="chromosome_stop">Stop position</label> - <input type="text" name="chromosome_stop" id="chromosome_stop" class="form-control"> + + <div class="subform" id="slice_accession_method" style="display: none"> + <div class="form-group"> + <p class="help-block">Please enter the accession number of the chromosome or contig and specify the range.</p> + <label for="accession">Chromosome accession number</label> + <input type="text" name="accession" id="accession" class="form-control"> + </div> + <div class="form-group"> + <label for="accession_start">Start position</label> + <input type="text" name="accession_start" id="accession_start" class="form-control"> + </div> + <div class="form-group"> + <label for="accession_stop">Stop position</label> + <input type="text" name="accession_stop" id="accession_stop" class="form-control"> + </div> + <div class="form-group"> + <label>Orientation</label> + <div class="radio"><label><input type="radio" name="accession_orientation" value="1" checked> Forward</label></div> + <div class="radio"><label><input type="radio" name="accession_orientation" value="2"> Reverse</label></div> + </div> </div> - <div class="form-group"> - <label for="chromosome_orientation">Orientation</label> - <div class="radio"><label><input type="radio" name="chromosome_orientation" value="1" checked> Forward</label></div> - <div class="radio"><label><input type="radio" name="chromosome_orientation" value="2"> Reverse</label></div> + + <div class="subform" id="slice_chromosome_method" style="display: none"> + <div class="form-group"> + <p class="help-block">Please enter the name of the chromosome and specify the range.</p> + <label for="assembly_name_or_alias">Assembly</label> + <select name="assembly_name_or_alias" id="assembly_name_or_alias" class="form-control"> + {% for assembly in assemblies %} + <option value="{{ assembly.name }}"{% if assembly_name_or_alias in (assembly.name, assembly.alias) %} selected="selected"{% endif %}>{{ assembly.taxonomy_common_name }} — {{ assembly.name }}{% if assembly.alias %} ({{assembly.alias }}){% endif %}</option> + {% endfor %} + </select> + </div> + <div class="form-group"> + <label for="chromosome">Chromosome name</label> + <input type="text" name="chromosome" id="chromosome" class="form-control"> + </div> + <div class="form-group"> + <label for="chromosome_start">Start position</label> + <input type="text" name="chromosome_start" id="chromosome_start" class="form-control"> + </div> + <div class="form-group"> + <label for="chromosome_stop">Stop position</label> + <input type="text" name="chromosome_stop" id="chromosome_stop" class="form-control"> + </div> + <div class="form-group"> + <label for="chromosome_orientation">Orientation</label> + <div class="radio"><label><input type="radio" name="chromosome_orientation" value="1" checked> Forward</label></div> + <div class="radio"><label><input type="radio" name="chromosome_orientation" value="2"> Reverse</label></div> + </div> </div> </div> </div> </div> <div class="form-group"> - <input type="submit" value="Load reference file"class="btn btn-primary"> + <input type="submit" value="Load reference file" class="btn btn-primary"> <a href="https://humgenprojects.lumc.nl/trac/mutalyzer/wiki/ReferenceLoader" target="_blank" class="btn btn-default pull-right">Help</a> </div> </form> diff --git a/mutalyzer/website/templates/static/css/style.css b/mutalyzer/website/templates/static/css/style.css index 81552a68939211520f20faa61da6b8d1f6976c0c..b4b391002bc0ac4b7f048d73998c0677796b1465 100644 --- a/mutalyzer/website/templates/static/css/style.css +++ b/mutalyzer/website/templates/static/css/style.css @@ -136,6 +136,10 @@ input[type="text"].form-pre{ font-family: Menlo, Monaco, Consolas, "Courier New", monospace; } +textarea.form-pre{ + font-family: Menlo, Monaco, Consolas, "Courier New", monospace; +} + code { /* color: #0B9B33; background-color: #F2F9F3;*/ @@ -144,9 +148,11 @@ code { white-space: normal; } -pre{ -/* margin: 15px 30px;*/ +pre.description { + white-space: pre-wrap; + max-height: 300px; } + header.main-header { height: 80px; /* width: 649px;*/ @@ -372,12 +378,12 @@ header.main-header { float: right; } -.example-input, .example-input-2 { +.example-input { cursor: pointer; color: #0B9B33; } -.example-input:hover, .example-input-2:hover { +.example-input:hover { color: #F2F9F3; background-color: #5BC779; } diff --git a/mutalyzer/website/templates/static/js/interface.js b/mutalyzer/website/templates/static/js/interface.js index 44c4e8c21da49b3ac5343fcc2b96fc1805d2fa22..88897ee4a3a591251089d30ec2b5057e170d5c7f 100644 --- a/mutalyzer/website/templates/static/js/interface.js +++ b/mutalyzer/website/templates/static/js/interface.js @@ -1,70 +1,52 @@ -function updateVisibility() { - document.getElementById('upload_label').style.display = "none"; - document.getElementById('url_label').style.display = "none"; - document.getElementById('slice_gene_label').style.display = "none"; - document.getElementById('slice_accession_label').style.display = "none"; - document.getElementById('slice_chromosome_label').style.display = "none"; - - for (i = 0; i < document.invoer.method.length; i++) { - if (document.invoer.method[i].checked) { - if (document.invoer.method[i].value == 'upload') { - document.getElementById('upload_label').style.display = ""; - } - else if (document.invoer.method[i].value == 'url') { - document.getElementById('url_label').style.display = ""; - } - else if (document.invoer.method[i].value == 'slice_gene') { - document.getElementById('slice_gene_label').style.display = ""; - } - else if (document.invoer.method[i].value == 'slice_accession') { - document.getElementById('slice_accession_label').style.display = ""; - } - else if (document.invoer.method[i].value == 'slice_chromosome') { - document.getElementById('slice_chromosome_label').style.display = ""; - } - }//if - }//for -}//updateVisibility - -//Toggle the build option in the batch.html page +// Toggle the build option in the batch.html page. function changeBatch(sel) { - var opt = $(sel).val(); - // var opt = sel.options[sel.selectedIndex].value; - if(opt=='position-converter') { - document.getElementById('assembly_name_or_alias').style.display = ""; - } else { - document.getElementById('assembly_name_or_alias').style.display = "none"; - } + var opt = $(sel).val(); + + if(opt == 'position-converter') { + $('#assembly_name_or_alias').show(); + } + else { + $('#assembly_name_or_alias').hide(); + } } function toggle_visibility(id) { - var e = document.getElementById(id); - if (e.style.display == 'block') { - e.style.display = 'none'; - } else { - e.style.display = 'block'; - } + $(document.getElementById(id)).toggle(); } function onloadBatch() { - changeBatch($('input[name="job_type"]:checked')); + changeBatch($('input[name="job_type"]:checked')); } function clearField(form, fieldName) { - for (var i = 0; i < form.elements.length; i++) { - if (form.elements[i].name == fieldName) { - form.elements[i].value = ''; - } + var i; + + for (i = 0; i < form.elements.length; i++) { + if (form.elements[i].name == fieldName) { + form.elements[i].value = ''; } + } } $(document).ready(function() { - $('.example-input').on('click', function() { - $('.example-target').val($(this).text()); - return false; - }); - $('.example-input-2').on('click', function() { - $('.example-target-2').val($(this).text()); - return false; - }); -}) + $('.example-input').on('click', function() { + var target = document.getElementById($(this).data('for')); + + $(target).val($(this).text()); + return false; + }); + + $('.input-select').on('change', function() { + var context = document.getElementById( + $(this).data('context')).getElementsByClassName('subform'), + target = document.getElementById($(this).data('for')), + i; + + for (i = 0; i < context.length; i++) { + context[i].style.display = 'none'; + } + target.style.display = ''; + + return false; + }); +}); diff --git a/mutalyzer/website/views.py b/mutalyzer/website/views.py index 0136564bf2b3311410cc1caf0d91b5f58058dfff..fffb8bb68e4c3ac1c6a2cdfc41ba61e0349c02f4 100644 --- a/mutalyzer/website/views.py +++ b/mutalyzer/website/views.py @@ -9,6 +9,7 @@ import bz2 import os import pkg_resources import re +import StringIO import urllib from flask import Blueprint @@ -68,6 +69,13 @@ def request_terms(): return terms +@website.app_template_filter('short') +def short_seq_filter(s, max_len=21): + if len(s) > max_len: + return s[:max_len / 2 - 1] + '...' + s[-max_len / 2 + 2:] + return s + + @website.context_processor def add_globals(): return global_context() @@ -527,17 +535,17 @@ def reference_loader_submit(): corresponding to values for the `method` field, each requiring some additional fields to be defined.: - `method=upload` + `method=upload_method` The reference sequence file is uploaded from a local file. - `file`: Reference sequence file to upload. - `method=url` + `method=url_method` The reference sequence file can be found at the specified URL. - `url`: URL of reference sequence file to load. - `method=slice_gene` + `method=slice_gene_method` Retrieve part of the reference genome for an HGNC gene symbol. - `genesymbol`: Gene symbol. @@ -545,7 +553,7 @@ def reference_loader_submit(): - `upstream`: Number of 5' flanking nucleotides. - `downstream`: Number of 3' flanking nucleotides. - `method=slice_accession` + `method=slice_accession_method` Retrieve a range of a chromosome by accession number. - `accession`: Chromosome Accession Number. @@ -553,7 +561,7 @@ def reference_loader_submit(): - `accession_stop`: Stop position. - `accession_orientation`: Orientation. - `method=slice_chromosome` + `method=slice_chromosome_method` Retrieve a range of a chromosome by name. - `assembly_name_or_alias`: Genome assembly by name or by alias. @@ -587,7 +595,7 @@ def reference_loader_submit(): raise InputException('Expected an integer in field: %s' % field) try: - if method == 'upload': + if method == 'upload_method': # Todo: Non-conforming clients (read: LOVD) might send the form # request urlencoded (and not as the requested multipart/ # form-data). @@ -597,10 +605,10 @@ def reference_loader_submit(): ud = retriever.uploadrecord(file.read()) - elif method == 'url': + elif method == 'url_method': ud = retriever.downloadrecord(request.form.get('url')) - elif method == 'slice_gene': + elif method == 'slice_gene_method': genesymbol = request.form.get('genesymbol') organism = request.form.get('organism') upstream = check_position(request.form.get('upstream', ''), @@ -610,7 +618,7 @@ def reference_loader_submit(): ud = retriever.retrievegene(genesymbol, organism, upstream, downstream) - elif method == 'slice_accession': + elif method == 'slice_accession_method': accession = request.form.get('accession') start = check_position(request.form.get('accession_start', ''), 'Start position') @@ -619,7 +627,7 @@ def reference_loader_submit(): orientation = int(request.form.get('accession_orientation')) ud = retriever.retrieveslice(accession, start, stop, orientation) - elif method == 'slice_chromosome': + elif method == 'slice_chromosome_method': chromosome_name = request.form.get('chromosome') start = check_position(request.form.get('chromosome_start', ''), 'Start position') @@ -672,29 +680,114 @@ def reference_loader_submit(): @website.route('/description-extractor') def description_extractor(): """ - The Variant Description Extractor (experimental service). + Description extractor loader form. + """ + return render_template('description-extractor.html') + + +@website.route('/description-extractor', methods=['POST']) +def description_extractor_submit(): """ - reference_sequence = request.args.get('reference_sequence') - variant_sequence = request.args.get('variant_sequence') + The Variant Description Extractor (experimental service). + + There multiple ways for the user to provide two sequences, corresponding to + the values for the `reference_method` and `sample_method` fields, each + requiring some additional fields to be defined: + + `raw_method` + The reference and sample sequences are pasted into the form fields. + + - `reference_sequence`: The reference sequence. + - `sample_sequence`: The sample sequence. + + `file_method` + The reference and sample sequences are uploaded. + + - `reference_file`: The reference file. + - `sample_file`: The sample file. - if not (reference_sequence and variant_sequence): - return render_template('description-extractor.html') + `refseq_method` + The reference and sample sequences are given by RefSeq accession numbers. + - `reference_accession_number`: RefSeq accession number for the reference + sequence. + - `sample_accession_number`: RefSeq accession number for the sample + sequence. + """ output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received Description Extract request from %s' % request.remote_addr) + r = s = '' + reference_method = request.form.get('reference_method') + sample_method = request.form.get('sample_method') + reference_sequence = request.form.get('reference_sequence') + sample_sequence = request.form.get('sample_sequence') + reference_file = request.files.get('reference_file') + sample_file = request.files.get('sample_file') + reference_accession_number = request.form.get('reference_accession_number') + sample_accession_number = request.form.get('sample_accession_number') + + if reference_method == 'refseq_method': + if reference_accession_number: + retriever = Retriever.GenBankRetriever(output) + genbank_record = retriever.loadrecord(reference_accession_number) + if genbank_record: + r = unicode(genbank_record.seq) + else: + output.addMessage(__file__, 3, 'EEMPTYFIELD', + 'Reference accession number input fields is empty.') + elif reference_method == 'file_method': + if reference_file: + r = util.read_dna(reference_file) + else: + output.addMessage(__file__, 3, 'EEMPTYFIELD', + 'No reference file provided.') + else: # raw_method + if reference_sequence: + r = util.read_dna(StringIO.StringIO(reference_sequence)) + else: + output.addMessage(__file__, 3, 'EEMPTYFIELD', + 'Reference sequence number input fields is empty.') + + if sample_method == 'refseq_method': + if sample_accession_number: + retriever = Retriever.GenBankRetriever(output) + genbank_record = retriever.loadrecord(sample_accession_number) + if genbank_record: + s = unicode(genbank_record.seq) + else: + output.addMessage(__file__, 3, 'EEMPTYFIELD', + 'Sample accession number input fields is empty.') + elif sample_method == 'file_method': + if sample_file: + s = util.read_dna(sample_file) + else: + output.addMessage(__file__, 3, 'EEMPTYFIELD', + 'No sample file provided.') + else: # raw_method + if sample_sequence: + s = util.read_dna(StringIO.StringIO(sample_sequence)) + else: + output.addMessage(__file__, 3, 'EEMPTYFIELD', + 'Sample sequence number input fields is empty.') + # Todo: Move this to the describe module. - if not util.is_dna(reference_sequence): + if not r or not util.is_dna(r): output.addMessage(__file__, 3, 'ENODNA', 'Reference sequence is not DNA.') - if not util.is_dna(variant_sequence): + if not s or not util.is_dna(s): output.addMessage(__file__, 3, 'ENODNA', - 'Variant sequence is not DNA.') + 'Sample sequence is not DNA.') - raw_vars = extractor.describe_dna(reference_sequence, variant_sequence) - description = unicode(raw_vars) #describe.allele_description(raw_vars) + raw_vars = None + if r and s: + if not settings.TESTING and (len(r) > 1000 or len(s) > 1000): + output.addMessage(__file__, 3, 'EMAXSIZE', + 'Input sequences are restricted to 1000bp.') + else: + raw_vars = extractor.describe_dna(r, s) errors, warnings, summary = output.Summary() messages = map(util.message_info, output.getMessages()) @@ -703,13 +796,12 @@ def description_extractor(): 'Finished Description Extract request') return render_template('description-extractor.html', - reference_sequence=reference_sequence, - variant_sequence=variant_sequence, - raw_vars=raw_vars, - description=description, - errors=errors, - summary=summary, - messages=messages) + reference_sequence=reference_sequence or '', + sample_sequence=sample_sequence or '', + reference_accession_number=reference_accession_number or '', + sample_accession_number=sample_accession_number or '', + raw_vars=raw_vars, errors=errors, summary=summary, messages=messages, + reference_method=reference_method, sample_method=sample_method) @website.route('/reference/<string:filename>') diff --git a/tests/data/NM_004006.1.gb.bz2 b/tests/data/NM_004006.1.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..c88783f8789c7fc9b9baeab395ecfa27871e7617 Binary files /dev/null and b/tests/data/NM_004006.1.gb.bz2 differ diff --git a/tests/data/NM_004006.2.gb.bz2 b/tests/data/NM_004006.2.gb.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..d6ac307a77f8116c89336c634ac905c8c8c1abf7 Binary files /dev/null and b/tests/data/NM_004006.2.gb.bz2 differ diff --git a/tests/data/extractor_input.fa b/tests/data/extractor_input.fa new file mode 100644 index 0000000000000000000000000000000000000000..95135a9eaa1210c8e3f3374d29b5d5ca65ae1036 --- /dev/null +++ b/tests/data/extractor_input.fa @@ -0,0 +1,2 @@ +>example_input +ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA diff --git a/tests/data/extractor_input.fq b/tests/data/extractor_input.fq new file mode 100644 index 0000000000000000000000000000000000000000..059ed2bdd2d5ae494ba29ebd32a92b9dbab5c4f5 --- /dev/null +++ b/tests/data/extractor_input.fq @@ -0,0 +1,4 @@ +@example_input +ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA ++ +bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb diff --git a/tests/data/extractor_input.txt b/tests/data/extractor_input.txt new file mode 100644 index 0000000000000000000000000000000000000000..7ea837629e790570102c936ba16fb5a3e982b71e --- /dev/null +++ b/tests/data/extractor_input.txt @@ -0,0 +1,2 @@ +ATGATTTGATCAGATACATGTGATACCGGT +AGTTAGGACAA diff --git a/tests/fixtures.py b/tests/fixtures.py index c867dd9774cf59b7d4ed51904471fbbc9224d451..652d2f86f4a1a593601958b756aa0c485fa98939 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -85,6 +85,12 @@ REFERENCES = { 'NM_000132.3': {'filename': 'NM_000132.3.gb.bz2', 'checksum': '94569bee76d7c8b1168e17df4fe1dcb4', 'geninfo_id': '192448441'}, + 'NM_004006.1': {'filename': 'NM_004006.1.gb.bz2', + 'checksum': 'be8fea2905e146bfe096e25fbfda2eef', + 'geninfo_id': '5032282'}, + 'NM_004006.2': {'filename': 'NM_004006.2.gb.bz2', + 'checksum': 'ee2090536af19a13ac1d6faa46d0b12e', + 'geninfo_id': '238018044'}, 'LRG_1': {'filename': 'LRG_1.xml.bz2', 'checksum': '5b8f5a39fcd9e3005688eddffd482746'}, 'DMD': {'accession': 'UD_139262478721', diff --git a/tests/test_website.py b/tests/test_website.py index b299722d1f30aa532eaa0542c792f18caba6d02e..bb3ff5afe3aee57617e41891bde28f9d8a5099b5 100644 --- a/tests/test_website.py +++ b/tests/test_website.py @@ -100,14 +100,83 @@ class TestWebsite(MutalyzerTest): assert r.status == '200 OK' assert 'nnouncement' not in r.data - def test_description_extractor(self): + def test_description_extractor_raw(self): """ - Submit the variant description extractor. + Submit two sequences to the variant description extractor. """ - r = self.app.get('/description-extractor', query_string={ - 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', - 'variant_sequence': 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA'}) - assert 'g.[5_6insTT;17del;26A>C;35dup]' in r.data + r = self.app.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'raw_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_sequence': 'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA'}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + def test_description_extractor_raw_fastq(self): + """ + Submit two sequences to the variant description extractor. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'extractor_input.fq') + r = self.app.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'raw_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_sequence': open(path).read()}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + @fix(database, cache('NM_004006.1', 'NM_004006.2')) + def test_description_extractor_refseq(self): + """ + Submit two accession numbers to the variant description extractor. + """ + r = self.app.post('/description-extractor', data={ + 'reference_method': 'refseq_method', + 'sample_method': 'refseq_method', + 'reference_accession_number': 'NM_004006.1', + 'sample_accession_number': 'NM_004006.2'}) + assert '[12749G>A;13729G>A]' in r.data + + def test_description_extractor_file_fasta(self): + """ + Submit a sequence and a FASTA file to the variant description + extractor. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'extractor_input.fa') + r = self.app.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'file_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_file': (open(path), 'extractor_input.fa')}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + def test_description_extractor_file_fastq(self): + """ + Submit a sequence and a FASTQ file to the variant description + extractor. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'extractor_input.fq') + r = self.app.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'file_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_file': (open(path), 'extractor_input.fq')}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data + + def test_description_extractor_file_text(self): + """ + Submit a sequence and a text file to the variant description + extractor. + """ + path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'data', 'extractor_input.txt') + r = self.app.post('/description-extractor', data={ + 'reference_method': 'raw_method', + 'sample_method': 'file_method', + 'reference_sequence': 'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA', + 'sample_file': (open(path), 'extractor_input.txt')}) + assert '[5_6insTT;17del;26A>C;35dup]' in r.data def test_checksyntax_valid(self): """ @@ -656,7 +725,7 @@ class TestWebsite(MutalyzerTest): 'data', 'AB026906.1.gb.bz2') r = self.app.post('/reference-loader', - data={'method': 'upload', + data={'method': 'upload_method', 'file': (bz2.BZ2File(path), 'AB026906.1.gb')}) assert 'Your reference sequence was loaded successfully.' in r.data @@ -672,7 +741,7 @@ class TestWebsite(MutalyzerTest): Test the genbank uploader with a non-genbank file. """ r = self.app.post('/reference-loader', - data={'method': 'upload', + data={'method': 'upload_method', 'file': (BytesIO('this is not a genbank file'.encode('utf-8')), 'AB026906.1.gb')}) assert 'Your reference sequence was loaded successfully.' not in r.data assert 'The file could not be parsed.' in r.data