Skip to content
Snippets Groups Projects
Commit 5c258648 authored by Martin Larralde's avatar Martin Larralde
Browse files

First check if block maximum is high enough before running `Pipeline::threshold` in `Scanner`

parent 45099ad0
Branches
Tags
No related merge requests found
...@@ -91,6 +91,7 @@ impl Ord for Hit { ...@@ -91,6 +91,7 @@ impl Ord for Hit {
} }
} }
/// A scanner for iterating over scoring matrix hits in a sequence.
#[derive(Debug)] #[derive(Debug)]
pub struct Scanner<'a, A: Alphabet, C: StrictlyPositive = DefaultColumns> { pub struct Scanner<'a, A: Alphabet, C: StrictlyPositive = DefaultColumns> {
pssm: &'a ScoringMatrix<A>, pssm: &'a ScoringMatrix<A>,
...@@ -157,45 +158,72 @@ where ...@@ -157,45 +158,72 @@ where
// score the row slice // score the row slice
self.pipeline self.pipeline
.score_rows_into(&self.dm, &self.seq, self.row..end, &mut self.dscores); .score_rows_into(&self.dm, &self.seq, self.row..end, &mut self.dscores);
// scan through the positions above discrete threshold and recompute // check if any position is higher than the discrete threshold.
// scores in floating-point to see if they pass the real threshold. if self.pipeline.max(&self.dscores).unwrap() >= t {
for c in self.pipeline.threshold(&self.dscores, t) { // scan through the positions above discrete threshold and recompute
let index = c.col * (self.seq.matrix().rows() - self.seq.wrap()) + self.row + c.row; // scores in floating-point to see if they pass the real threshold.
let score = self.pssm.score_position(&self.seq, index); for c in self.pipeline.threshold(&self.dscores, t) {
if score >= self.threshold { let index =
self.hits.push(Hit::new(index, score)); c.col * (self.seq.matrix().rows() - self.seq.wrap()) + self.row + c.row;
let score = self.pssm.score_position(&self.seq, index);
if score >= self.threshold {
self.hits.push(Hit::new(index, score));
}
} }
} }
// Proceed to the next block.
self.row += self.block_size; self.row += self.block_size;
} }
self.hits.pop() self.hits.pop()
} }
fn max(mut self) -> Option<Self::Item> { fn max(mut self) -> Option<Self::Item> {
// Compute the score of the best hit not yet returned, and translate
// the `f32` score threshold into a discrete, under-estimate `u8`
// threshold.
let mut best = std::mem::take(&mut self.hits) let mut best = std::mem::take(&mut self.hits)
.into_iter() .into_iter()
.max_by(|x, y| x.score.partial_cmp(&y.score).unwrap()) .filter(|hit| hit.score >= self.threshold)
.unwrap_or(Hit::new(0, f32::MIN)); .max_by(|x, y| x.score.partial_cmp(&y.score).unwrap());
let mut best_discrete = self.dm.scale(best.score.max(self.threshold)); let mut best_discrete = match &best {
Some(hit) => self.dm.scale(hit.score),
None => self.dm.scale(self.threshold),
};
// Cache th number of sequence rows in the striped sequence matrix.
let sequence_rows = self.seq.matrix().rows() - self.seq.wrap();
// Process all rows of the sequence and record the local
while self.row < self.seq.matrix().rows() { while self.row < self.seq.matrix().rows() {
let end = (self.row + self.block_size).min(self.seq.matrix().rows() - self.seq.wrap()); // Score the rows of the current block.
let end = (self.row + self.block_size).min(sequence_rows);
self.pipeline self.pipeline
.score_rows_into(&self.dm, &self.seq, self.row..end, &mut self.dscores); .score_rows_into(&self.dm, &self.seq, self.row..end, &mut self.dscores);
if let Some(c) = self.pipeline.argmax(&self.dscores) { // Check if the highest score in the block is high enough to be
let dscore = self.dscores.matrix()[c]; // a new global maximum
if dscore >= best_discrete { if self.pipeline.max(&self.dscores).unwrap() >= best_discrete {
let index = // Iterate over candidate position in `u8` scores and recalculate
c.col * (self.seq.matrix().rows() - self.seq.wrap()) + self.row + c.row; // scores for candidates passing the threshold.
let score = self.pssm.score_position(&self.seq, index); for c in self.pipeline.threshold(&self.dscores, best_discrete) {
if (score > best.score) | (score == best.score && index > best.position) { let dscore = self.dscores.matrix()[c];
best = Hit::new(index, score); if dscore >= best_discrete {
best_discrete = dscore; let index = c.col * sequence_rows + self.row + c.row;
let score = self.pssm.score_position(&self.seq, index);
if let Some(hit) = &best {
if (score > hit.score) | (score == hit.score && index > hit.position) {
best = Some(Hit::new(index, score));
best_discrete = dscore;
}
} else {
best = Some(Hit::new(index, score))
}
} }
} }
} }
// Proceed to the next block.
self.row += self.block_size; self.row += self.block_size;
} }
Some(best) best
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment