Commit 0065d2cb authored by Hoogenboom, Jerry's avatar Hoogenboom, Jerry

PctRecovery relative to number of reads after correction

Changed:
* The PctRecovery as used for automatic allele selection in Samplevis
  HTML visualisations as well as Samplestats is now computed w.r.t. the
  number of reads after correction, instead of the number of reads
  before correction.

Added:
* Added X_recovery columns to the output of the Samplestats tool. The
  value is equal to X_add / X_corrected * 100.
parent 9565c837
......@@ -27,6 +27,8 @@ X_correction_pct: The difference between the values of X_corrected and
X, as a percentage of the value of X.
X_removed_pct: The value of X_noise, as a percentage of the value of X.
X_added_pct: The value of X_add, as a percentage of the value of X.
X_recovery_pct: The value of X_add, as a percentage of the value of
X_corrected.
"""
import sys
......@@ -61,7 +63,7 @@ _DEF_MIN_PCT_OF_SUM = 3.
_DEF_MIN_CORRECTION = 0
# Default minimum number of recovered reads as a percentage of the
# original number of reads to mark as allele.
# number of reads after correction to mark as allele.
# This value can be overridden by the -r command line option.
_DEF_MIN_RECOVERY = 0
......@@ -88,7 +90,7 @@ _DEF_MIN_PCT_OF_SUM_FILT = 0.
_DEF_MIN_CORRECTION_FILT = 0
# Default minimum number of recovered reads as a percentage of the
# original number of reads for filtering.
# number of reads after correction for filtering.
# This value can be overridden by the -R command line option.
_DEF_MIN_RECOVERY_FILT = 0
......@@ -97,15 +99,18 @@ COLUMN_ORDER = [
"total_corrected_mp_sum",
"total_corrected_mp_max",
"total_correction_pct",
"total_recovery_pct",
"forward_corrected_pct",
"forward_corrected",
"forward_corrected_mp_sum",
"forward_corrected_mp_max",
"forward_correction_pct",
"forward_recovery_pct",
"reverse_corrected",
"reverse_corrected_mp_sum",
"reverse_corrected_mp_max",
"reverse_correction_pct",
"reverse_recovery_pct",
"total",
"total_mp_sum",
......@@ -167,14 +172,20 @@ def compute_stats(infile, outfile, min_reads,
column_names.append("total_correction_pct")
if "forward_corrected" in column_names:
column_names.append("forward_corrected_pct")
if "total_add" in column_names:
column_names.append("total_recovery")
if "forward_corrected" in column_names:
column_names.append("forward_corrected_mp_sum")
column_names.append("forward_corrected_mp_max")
column_names.append("forward_correction_pct")
if "forward_add" in column_names:
column_names.append("forward_recovery")
if "reverse_corrected" in column_names:
column_names.append("reverse_corrected_mp_sum")
column_names.append("reverse_corrected_mp_max")
column_names.append("reverse_correction_pct")
if "reverse_add" in column_names:
column_names.append("reverse_recovery")
column_names.append("total_mp_sum")
column_names.append("total_mp_max")
column_names.append("forward_pct")
......@@ -308,6 +319,10 @@ def compute_stats(infile, outfile, min_reads,
row[ci["forward_corrected"]]/row[ci["total_corrected"]]
if row[ci["total_corrected"]]
else row[ci["forward_corrected"]] > 0))
if "total_add" in ci:
row.append(100.*row[ci["total_add"]] /
row[ci["total_corrected"]]
if row[ci["total_corrected"]] else 0)
if "forward_corrected" in ci:
row.append(100.*row[ci["forward_corrected"]] /
marker_forward_corrected_sum
......@@ -320,6 +335,10 @@ def compute_stats(infile, outfile, min_reads,
if row[ci["forward"]]
else ((row[ci["forward_corrected"]]>0)*200-100
if row[ci["forward_corrected"]] else 0))
if "forward_add" in ci:
row.append(100.*row[ci["forward_add"]] /
row[ci["forward_corrected"]]
if row[ci["forward_corrected"]] else 0)
if "reverse_corrected" in ci:
row.append(100.*row[ci["reverse_corrected"]] /
marker_reverse_corrected_sum
......@@ -332,6 +351,10 @@ def compute_stats(infile, outfile, min_reads,
if row[ci["reverse"]]
else ((row[ci["reverse_corrected"]]>0)*200-100
if row[ci["reverse_corrected"]] else 0))
if "reverse_add" in ci:
row.append(100.*row[ci["reverse_add"]] /
row[ci["reverse_corrected"]]
if row[ci["reverse_corrected"]] else 0)
row.append(100.*row[ci["total"]]/marker_total_sum
if marker_total_sum else 0)
row.append(100.*row[ci["total"]]/marker_total_max
......@@ -417,8 +440,8 @@ def compute_stats(infile, outfile, min_reads,
not in ci else row[ci["total_corrected_mp_max"]]
correction = 0 if "total_correction_pct" not in ci \
else row[ci["total_correction_pct"]]
recovery = 0 if "total_added_pct" not in ci \
else row[ci["total_added_pct"]]
recovery = 0 if "total_recovery" not in ci \
else row[ci["total_recovery"]]
strands = [
row[ci["forward"]] if "forward_corrected" not in ci
else row[ci["forward_corrected"]],
......@@ -656,7 +679,7 @@ def add_arguments(parser):
type=float, default=_DEF_MIN_RECOVERY,
help="the minimum number of reads that was recovered thanks to "
"noise correction (by e.g., bgcorrect), as a percentage of the "
"original number of reads (default: %(default)s)")
"total number of reads after correction (default: %(default)s)")
filtergroup = parser.add_argument_group("filtering options",
"sequences that match the -C or -Y option (or both) and all of the "
"other settings are retained, all others are filtered")
......@@ -693,7 +716,7 @@ def add_arguments(parser):
type=float, default=_DEF_MIN_RECOVERY_FILT,
help="the minimum number of reads that was recovered thanks to "
"noise correction (by e.g., bgcorrect), as a percentage of the "
"original number of reads (default: %(default)s)")
"total number of reads after correction (default: %(default)s)")
#add_arguments
......
......@@ -749,7 +749,7 @@ function autoSelectAlleles(graph){
datum.pct_of_max >= minPa &&
datum.pct_of_sum >= minTa &&
((datum.total_added/datum.total*100-100) >= minCa ||
(datum.total_add/datum.total*100) >= minAa) &&
(datum.total_add/datum.total_added*100) >= minAa) &&
Math.min(datum.forward_added, datum.reverse_added) >= minOa){
if(autoSelected.indexOf(datum) == -1)
autoSelected.push(datum);
......@@ -968,7 +968,7 @@ function saveTable(){
datum.pct_of_max.toFixed(2),
datum.pct_of_sum.toFixed(2),
(datum.total_added/datum.total*100-100).toFixed(2),
(datum.total_add/datum.total*100).toFixed(2),
(datum.total_add/datum.total_added*100).toFixed(2),
datum.forwardpct.toFixed(2),
notes.join(",")].join("\t");
}).join(linebreak)
......@@ -1027,7 +1027,7 @@ function updateTable(i){
row.insertCell().appendChild(document.createTextNode(datum.pct_of_sum.toFixed(2) + "%")).parentNode.setAttribute("class", "num");
var corrAmount = datum.total_added/datum.total*100-100;
row.insertCell().appendChild(document.createTextNode((corrAmount < 0? "" : "+") + corrAmount.toFixed(2) + "%")).parentNode.setAttribute("class", "num");
row.insertCell().appendChild(document.createTextNode((datum.total_add/datum.total*100).toFixed(2) + "%")).parentNode.setAttribute("class", "num");
row.insertCell().appendChild(document.createTextNode((datum.total_add/datum.total_added*100).toFixed(2) + "%")).parentNode.setAttribute("class", "num");
var cell = row.insertCell();
cell.appendChild(document.createTextNode(datum.forwardpct.toFixed(2) + "%"));
cell.setAttribute("class", "num");
......
To-do:
* Samplevis:
* Add absolute number of reads before correction, noise reads, and recovered
reads to the tables in HTML visualisations.
* Option to choose complete table download (all columns, not all rows).
* Option to freely adjust the sorting (currently CE length toggle only).
* When we have them, add default values to table filtering (for reference).
......@@ -12,6 +14,8 @@ To-do:
input events may get scheduled between the chunks.
* Add 'Save page' button that also saves the alleles clicked by the user.
* Add options to set the Table filtering options in the Vis tool.
* Add ability to mark sequences that have a '*' in the 'new_allele' column
(if that column exists at all). Used together with FindNewAlleles.
* Additions needed for publication:
* Check whether there is a difference between filtering short artefacts in
TSSV vs having BGEstimate/BGCorrect filter them.
......@@ -25,6 +29,10 @@ To-do:
(maybe also additional value for confidence interval).
* Visualisation to display highest remaining background (positive and
negative) in known samples after BGCorrect analysis.
* And similarly for PctRecovery...
* Add section to the library file where genomic positions of known pathogenic
variants are specified. The TSSV tool should always output the reference base
at these positions to comply with ethical regulations.
* Add options to Libconvert to generate a template for STR or non-STR markers.
* Add options to Samplevis, Samplestats (and possibly other relevant tools) to
filter alleles by sequence length. The TSSV tool already supports this.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment