Commit 8213ec14 authored by jhoogenboom's avatar jhoogenboom
Browse files

Introducing BGRawVis

* Added new visualisation BGRawVis to the Vis tool. It visualises BGHomRaw output data.
* Now using more reliable linear X axis label formatting in Profilevis.
* Changed filtering operands in Profilevis and Samplevis from > to >=.
parent 20337ba3
......@@ -7,8 +7,7 @@ import argparse
from ..lib import pos_int_arg, add_input_output_args, get_input_output_files,\
add_allele_detection_args, parse_allelelist, parse_library,\
get_sample_data, add_sequence_format_args, adjust_stats,\
add_random_subsampling_args
get_sample_data, add_sequence_format_args
__version__ = "0.1dev"
......
......@@ -117,7 +117,7 @@ def create_visualisation(vistype, infile, outfile, vega, online, tidy,
set_data_formula_transform_value(spec, "yscale", "subgraphoffset", padding)
set_data_formula_transform_value(
spec, "table", "filter_marker", "'" + marker + "'")
if vistype == "sample":
if vistype == "sample" or vistype == "bgraw":
set_data_formula_transform_value(
spec, "table", "amplitude_threshold", min_abs)
set_data_formula_transform_value(
......@@ -168,11 +168,13 @@ def create_visualisation(vistype, infile, outfile, vega, online, tidy,
def add_arguments(parser):
parser.add_argument('type', metavar="TYPE", choices=("sample", "profile"),
parser.add_argument('type', metavar="TYPE",
choices=("sample", "profile", "bgraw"),
help="the type of data to visualise; use 'sample' to visualise "
"sample data files and BGCorrect output; use 'profile' to "
"visualise background noise profiles obtained with BGEstimate, "
"BGHomStats, and BGPredict")
"BGHomStats, and BGPredict; use 'bgraw' to visualise raw "
"background noise data obtained with BGHomRaw")
parser.add_argument('infile', metavar="IN", nargs="?",
help="file containing the data to embed in the visualisation file; if "
"not specified, HTML visualisation files will contain a file "
......@@ -200,31 +202,31 @@ def add_arguments(parser):
"types")
visgroup.add_argument('-n', '--min-abs', metavar="N", type=pos_int_arg,
default=_DEF_THRESHOLD_ABS,
help="[sample] only show sequences with this minimum number of reads "
"(default: %(default)s)")
help="[sample, bgraw] only show sequences with this minimum number of "
"reads (default: %(default)s)")
visgroup.add_argument('-m', '--min-pct', metavar="PCT", type=float,
default=_DEF_THRESHOLD_PCT,
help="[sample, profile] for sample: only show sequences with at least "
"this percentage of the number of reads of the highest allele of "
"a marker; for profile: at least this percentage of the true "
"allele (default: %(default)s)")
help="[sample, profile, bgraw] for sample: only show sequences with "
"at least this percentage of the number of reads of the highest "
"allele of a marker; for profile and bgraw: at least this "
"percentage of the true allele (default: %(default)s)")
visgroup.add_argument('-M', '--marker', metavar="REGEX",
default=_DEF_MARKER_REGEX,
help="[sample, profile] only show graphs for the markers that match "
"the given regular expression; the default value '%(default)s' "
"matches any marker name")
help="[sample, profile, bgraw] only show graphs for the markers that "
"match the given regular expression; the default value "
"'%(default)s' matches any marker name")
visgroup.add_argument('-b', '--bar-width', metavar="N", type=pos_int_arg,
default=_DEF_BAR_WIDTH,
help="[sample, profile] width of the bars in pixels (default: "
help="[sample, profile, bgraw] width of the bars in pixels (default: "
"%(default)s)")
visgroup.add_argument('-p', '--padding', metavar="N", type=pos_int_arg,
default=_DEF_SUBGRAPH_PADDING,
help="[sample, profile] amount of padding (in pixels) between graphs "
"of different markers (default: %(default)s)")
help="[sample, profile, bgraw] amount of padding (in pixels) between "
"graphs of different markers/alleles (default: %(default)s)")
visgroup.add_argument('-w', '--width', metavar="N", type=pos_int_arg,
default=_DEF_WIDTH,
help="[sample, profile] width of the graph area in pixels (default: "
"%(default)s)")
help="[sample, profile, bgraw] width of the graph area in pixels "
"(default: %(default)s)")
#add_arguments
......
{
"width": 600,
"height": 10,
"data": [
{
"name": "table",
"values": "VALUES HERE",
"format": {
"type": "tsv",
"parse": {
"forward": "number",
"reverse": "number",
"total": "number",
"fnoise": "number",
"rnoise": "number",
"tnoise": "number"
}
},
"transform": [
{
"type": "filter",
"test": "datum.allele != datum.sequence"
},
{
"type": "formula",
"field": "amplitude_threshold",
"expr": "5"
},
{
"type": "formula",
"field": "amplitude_pct_threshold",
"expr": "0.5"
},
{
"type":
"formula",
"field": "maxnoise",
"expr": "max(datum.fnoise, datum.rnoise, datum.tnoise)"
},
{
"type": "filter",
"test": "datum.total >= 1 && datum.total >= datum.amplitude_threshold && datum.maxnoise >= datum.amplitude_pct_threshold"
},
{
"type": "formula",
"field": "filter_marker",
"expr": "'.*'"
},
{
"type": "filter",
"test": "test('^' + datum.filter_marker + '$', datum.marker)"
},
{
"type": "formula",
"field": "name",
"expr": "datum.marker + ' ' + datum.allele"
},
{
"type": "sort",
"by": ["name", "-tnoise", "-maxnoise"]
},
{
"type":
"formula",
"field": "minnoise",
"expr": "max(0.001, min(datum.fnoise, datum.rnoise, datum.tnoise))"
}
]
},
{
"name": "barcounts",
"source": "table",
"transform": [
{
"type": "aggregate",
"groupby": ["name"],
"summarize": [{"field": "sequence", "ops": ["distinct"], "as": ["count"]}]
}
]
},
{
"name": "subgraphpadding",
"source": "barcounts",
"transform": [
{
"type": "cross",
"diagonal": false
},
{
"type": "filter",
"test": "datum.b.name < datum.a.name"
},
{
"type": "aggregate",
"groupby": ["a.name"],
"summarize": [{"field": "b.name", "ops": ["distinct"], "as": ["cumulpadding"]}]
},
{
"type": "formula",
"field": "name",
"expr": "datum['a.name']"
}
]
},
{
"name": "subgraphoffsets",
"source": "barcounts",
"transform": [
{
"type": "cross",
"with": "table"
},
{
"type": "filter",
"test": "datum.b.name < datum.a.name"
},
{
"type": "formula",
"field": "namesequence",
"expr": "datum.b.name + datum.b.sequence"
},
{
"type": "aggregate",
"groupby": ["a.name"],
"summarize": [{"field": "namesequence", "ops": ["distinct"], "as": ["cumulcount"]}]
},
{
"type": "formula",
"field": "name",
"expr": "datum['a.name']"
}
]
},
{
"name": "yscale",
"source": "barcounts",
"transform": [
{
"type": "lookup",
"on": "subgraphpadding",
"onKey": "name",
"keys": ["name"],
"as": ["paddingobj"],
"default": {"cumulpadding": 0}
},
{
"type": "lookup",
"on": "subgraphoffsets",
"onKey": "name",
"keys": ["name"],
"as": ["offsetobj"],
"default": {"cumulcount": 0}
},
{
"type": "formula",
"field": "barwidth",
"expr": "15"
},
{
"type": "formula",
"field": "subgraphoffset",
"expr": "70"
},
{
"type": "formula",
"field": "offset",
"expr": "(10+datum.barwidth)*datum.offsetobj.cumulcount + datum.subgraphoffset*datum.paddingobj.cumulpadding"
},
{
"type": "formula",
"field": "end",
"expr": "datum.offset + (10+datum.barwidth)*datum.count"
}
]
}
],
"scales": [
{
"name": "c",
"type": "ordinal",
"range": ["#5e3c99", "#fdb863", "#e66101"],
"domain": ["Total reads", "Forward reads", "Reverse reads"]
}
],
"marks": [
{
"type": "text",
"from": {
"data": "yscale"
},
"properties": {
"enter": {
"x": {"field": {"group": "width"}, "mult": 0.5},
"y": {"field": "offset"},
"fontWeight": {"value": "bold"},
"text": {"field": "name"},
"align": {"value": "center"},
"baseline": {"value": "bottom"},
"fill": {"value": "black"}
}
}
},
{
"type": "group",
"from": {
"data": "table",
"transform": [
{
"type": "facet",
"groupby": ["name"]
},
{
"type": "lookup",
"on": "yscale",
"onKey": "name",
"keys": ["name"],
"as": ["subgraphscale"]
}
]
},
"properties": {
"enter": {
"x": {"value": 0},
"width": {"field": {"group": "width"}},
"y": {"field": "subgraphscale.offset"},
"y2": {"field": "subgraphscale.end"},
"stroke": {"value": "#dddddd"}
}
},
"scales": [
{
"name": "x",
"type": "linear",
"nice": true,
"range": "width",
"domain": {"data": "table", "field": ["minnoise", "maxnoise"]}
},
{
"name": "y",
"type": "ordinal",
"points": true,
"padding": 1,
"range": "height",
"domain": {"field": "sequence"}
}
],
"axes": [
{
"type": "x",
"scale": "x",
"format": "g",
"grid": true,
"layer": "back",
"title": "Noise ratio (%)",
"properties": {
"title": {
"dy": {"value": -5}
}
}
},
{
"type": "y",
"scale": "y",
"grid": true,
"layer": "back"
}
],
"legends": [
{
"fill": "c",
"title": "Noise ratio in:",
"properties": {
"symbols": {
"size": {"value": 100},
"stroke": {"value": "transparent"},
"fillOpacity": {"value": 0.8}
}
}
}
],
"marks": [
{
"type": "symbol",
"from": {
"transform": [
{
"type": "filter",
"test": "datum.forward >= 1 && datum.forward >= datum.amplitude_threshold"
},
{
"type": "filter",
"test": "datum.fnoise >= datum.amplitude_pct_threshold"
}
]
},
"properties": {
"enter": {
"x": {"scale": "x", "field": "fnoise"},
"y": {"scale": "y", "field": "sequence", "offset": -5},
"fill": {"scale": "c", "value": "Forward reads"},
"fillOpacity": {"value": 0.8}
},
"update": {
"size": {"value": 100},
"stroke": {"value": "transparent"}
},
"hover": {
"size": {"value": 300},
"stroke": {"value": "white"}
}
}
},
{
"type": "symbol",
"from": {
"transform": [
{
"type": "filter",
"test": "datum.reverse >= 1 && datum.reverse >= datum.amplitude_threshold"
},
{
"type": "filter",
"test": "datum.rnoise >= datum.amplitude_pct_threshold"
}
]
},
"properties": {
"enter": {
"x": {"scale": "x", "field": "rnoise"},
"y": {"scale": "y", "field": "sequence", "offset": 5},
"fill": {"scale": "c", "value": "Reverse reads"},
"fillOpacity": {"value": 0.8}
},
"update": {
"size": {"value": 100},
"stroke": {"value": "transparent"}
},
"hover": {
"size": {"value": 300},
"stroke": {"value": "white"}
}
}
},
{
"type": "symbol",
"from": {
"transform": [
{
"type": "filter",
"test": "datum.total >= 1 && datum.total >= datum.amplitude_threshold"
},
{
"type": "filter",
"test": "datum.tnoise >= datum.amplitude_pct_threshold"
}
]
},
"properties": {
"enter": {
"x": {"scale": "x", "field": "tnoise"},
"y": {"scale": "y", "field": "sequence"},
"fill": {"scale": "c", "value": "Total reads"},
"fillOpacity": {"value": 0.8}
},
"update": {
"size": {"value": 100},
"stroke": {"value": "transparent"}
},
"hover": {
"size": {"value": 300},
"stroke": {"value": "white"}
}
}
}
]
}
]
}
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Background Noise Visualisation - FDSTools</title/>
<!-- BEGIN_LIBRARIES -->
<script src="http://vega.github.io/vega-editor/vendor/d3.min.js"></script>
<script src="http://vega.github.io/vega/vega.min.js"></script>
<!-- END_LIBRARIES -->
<style>
* {
font-family: Helvetica Neue, Helvetica, Arial, sans-serif;
}
body {
margin: 0px
}
div.options {
position: absolute;
margin: 5px;
background-color: rgba(128, 128, 255, 0.5);
z-index: 10;
}
#optionsheader{
cursor: pointer;
}
#optionsheader:hover{
border-bottom: 1px dashed black;
}
div#vis {
position: absolute;
overflow: auto;
bottom: 0px;
top: 0px;
right: 0px;
left: 0px;
text-align: right;
}
</style>
</head>
<body>
<div class="options">
<strong id="optionsheader">Options</strong><br>
<div id="options">
<span id="fileselectspan" style="display: none">
Open raw background noise data file (or drag a file onto this page): <input id="fileselect" type="file"><br>
</span>
Display options: graph width <input type="text" value="600" id="graphwidth" size="3">px;
sequence spacing <input type="text" value="15" id="barwidth" size="2">px;
subgraph spacing <input type="text" value="70" id="subgraphoffset" size="3">px<br>
Filtering: require at least <input type="text" value="15" id="minN" size="3"> reads and at least <input type="text" value="0.5" id="minP" size="3">% of highest allele per marker<br>
Filtering: marker <input type="text" id="markerFilter" size="10" title="Supports regular expression syntax: e.g., use '.*' to match anything."><br>
Axis scale: <input type="radio" name="scale" value="linear" id="scaleLinear" checked> Linear
<input type="radio" name="scale" value="log" id="scaleLog"> Logarithmic<br>
Render as: <input type="radio" name="renderer" value="canvas" id="renderCanvas"> Canvas
<input type="radio" name="renderer" value="svg" id="renderSVG" checked> SVG
<br>
<a id="saveLink" href="javascript:void(saveImage())" style="display: none">Save image</a>
</div>
</div>
<div id="vis"></div>
<script type="text/javascript">
var graph = false;
function parse(){
vg.parse.spec(graph_spec, function(chart){
var rendererName = "canvas";
if(document.getElementById("renderSVG").checked)
rendererName="svg";
graph = chart({el: "#vis", renderer: rendererName});
graph.update();
document.getElementById("saveLink").style.display = "inline";
//Scroll to the right; the graph is more interesting than the long labels.
var visdiv = document.getElementById("vis");
visdiv.scrollLeft = visdiv.scrollWidth;
});
}
function setScale(value){
if(!graph_spec)
return;
for(i in graph_spec["marks"])
if(graph_spec["marks"][i]["scales"])
for(j in graph_spec["marks"][i]["scales"])
if(graph_spec["marks"][i]["scales"][j]["name"] == "x"){
graph_spec["marks"][i]["scales"][j]["type"] = value;
graph_spec["marks"][i]["scales"][j]["format"] = (value == "log"? ".1r" : "g");
}
if(graph)
parse();
}
function setDataFormulaTransformValue(dataname, fieldname, value){
if(!graph_spec)
return false;
for(i in graph_spec["data"]){
if(graph_spec["data"][i]["name"] == dataname){
for(j in graph_spec["data"][i]["transform"]){
if(graph_spec["data"][i]["transform"][j]["type"] == "formula" && graph_spec["data"][i]["transform"][j]["field"] == fieldname){
graph_spec["data"][i]["transform"][j]["expr"] = "" + value;
return true;
}
}
}
}
return false;
}
function getDataFormulaTransformValue(dataname, fieldname){
if(!graph_spec)
return false;
for(i in graph_spec["data"]){
if(graph_spec["data"][i]["name"] == dataname){
for(j in graph_spec["data"][i]["transform"]){
if(graph_spec["data"][i]["transform"][j]["type"] == "formula" && graph_spec["data"][i]["transform"][j]["field"] == fieldname){
return graph_spec["data"][i]["transform"][j]["expr"];
}
}
}
}
return false;
}
function setRenderer(value){
if(graph)
graph.renderer(value);
}
//Load the data (input is a fileList object; only the first file is loaded).
function loadDataset(fileList){
if(!graph_spec)
return;
var reader = new FileReader();
reader.onload = function(theFile){
graph_spec["data"][0]["values"] = reader.result;
parse();
};