Skip to content
Snippets Groups Projects
Commit 9086f036 authored by Martin Larralde's avatar Martin Larralde
Browse files

Make `TfmPvalue` generic over the reference to the `ScoringMatrix`

parent 4d975710
No related branches found
No related tags found
No related merge requests found
...@@ -64,14 +64,13 @@ let pssm = counts.to_freq(0.1).to_scoring(None); ...@@ -64,14 +64,13 @@ let pssm = counts.to_freq(0.1).to_scoring(None);
/// Create a pipeline to run tasks with platform acceleration /// Create a pipeline to run tasks with platform acceleration
let pli = Pipeline::dispatch(); let pli = Pipeline::dispatch();
// Encode the target sequence into a striped matrix // Use the pipeline to encode the target sequence into a striped matrix
let seq = "ATGTCCCAACAACGATACCCCGAGCCCATCGCCGTCATCGGCTCGGCATGCAGATTCCCAGGCG"; let seq = "ATGTCCCAACAACGATACCCCGAGCCCATCGCCGTCATCGGCTCGGCATGCAGATTCCCAGGCG";
let encoded = pli.encode(seq).unwrap(); let encoded = pli.encode(seq).unwrap();
let mut striped = pli.stripe(encoded); let mut striped = pli.stripe(encoded);
striped.configure(&pssm);
// Use a pipeline to compute scores for every position of the matrix.
// Use the pipeline to compute scores for every position of the matrix.
striped.configure(&pssm);
let scores = pli.score(&striped, &pssm); let scores = pli.score(&striped, &pssm);
// Scores can be extracted into a Vec<f32>, or indexed directly. // Scores can be extracted into a Vec<f32>, or indexed directly.
...@@ -85,8 +84,8 @@ assert_eq!(best, 18); ...@@ -85,8 +84,8 @@ assert_eq!(best, 18);
``` ```
This example uses a dynamic dispatch pipeline, which selects the best available This example uses a dynamic dispatch pipeline, which selects the best available
backend (AVX2, SSE2, NEON, or a generic implementation) depending on backend (AVX2, SSE2, NEON, or a generic implementation) depending on the local
the local platform. platform.
## ⏱️ Benchmarks ## ⏱️ Benchmarks
......
...@@ -20,9 +20,9 @@ type Map<K, V> = HashMap<K, V>; ...@@ -20,9 +20,9 @@ type Map<K, V> = HashMap<K, V>;
/// The TFMPvalue algorithm. /// The TFMPvalue algorithm.
#[derive(Debug)] #[derive(Debug)]
pub struct TfmPvalue<'pssm, A: Alphabet> { pub struct TfmPvalue<A: Alphabet, M: AsRef<ScoringMatrix<A>>> {
/// A reference to the original scoring matrix. /// A reference to the original scoring matrix.
matrix: &'pssm ScoringMatrix<A>, matrix: M,
/// The granularity with which the round matrix has been built. /// The granularity with which the round matrix has been built.
granularity: f64, granularity: f64,
/// The round matrix offsets. /// The round matrix offsets.
...@@ -38,10 +38,11 @@ pub struct TfmPvalue<'pssm, A: Alphabet> { ...@@ -38,10 +38,11 @@ pub struct TfmPvalue<'pssm, A: Alphabet> {
} }
#[allow(non_snake_case)] #[allow(non_snake_case)]
impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> { impl<A: Alphabet, M: AsRef<ScoringMatrix<A>>> TfmPvalue<A, M> {
/// Initialize the TFM-Pvalue algorithm for the given scoring matrix. /// Initialize the TFM-Pvalue algorithm for the given scoring matrix.
pub fn new(matrix: &'pssm ScoringMatrix<A>) -> Self { pub fn new(matrix: M) -> Self {
let M = matrix.len(); let m = matrix.as_ref();
let M = m.len();
Self { Self {
granularity: f64::NAN, granularity: f64::NAN,
matrix, matrix,
...@@ -56,7 +57,8 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> { ...@@ -56,7 +57,8 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> {
/// Compute the approximate score matrix with the given granularity. /// Compute the approximate score matrix with the given granularity.
fn recompute(&mut self, granularity: f64) { fn recompute(&mut self, granularity: f64) {
assert!(granularity < 1.0); assert!(granularity < 1.0);
let M: usize = self.matrix.len(); let matrix = self.matrix.as_ref();
let M: usize = matrix.len();
let K: usize = <A as Alphabet>::K::USIZE; let K: usize = <A as Alphabet>::K::USIZE;
// compute effective granularity // compute effective granularity
...@@ -65,22 +67,17 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> { ...@@ -65,22 +67,17 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> {
// compute integer matrix // compute integer matrix
for i in 0..M { for i in 0..M {
for j in 0..K - 1 { for j in 0..K - 1 {
self.int_matrix[i][j] = self.int_matrix[i][j] = (matrix[i][j] as f64 / self.granularity).floor() as i64;
(self.matrix[i][j] as f64 / self.granularity).floor() as i64;
} }
} }
// compute maximum error by summing max error at each row // compute maximum error by summing max error at each row
self.error_max = 0.0; self.error_max = 0.0;
for i in 1..M { for i in 1..M {
let mut max_e = let mut max_e = matrix[i][0] as f64 / self.granularity - self.int_matrix[i][0] as f64;
self.matrix[i][0] as f64 / self.granularity - self.int_matrix[i][0] as f64;
for j in 0..K - 1 { for j in 0..K - 1 {
if max_e if max_e < matrix[i][j] as f64 / self.granularity - self.int_matrix[i][j] as f64 {
< self.matrix[i][j] as f64 / self.granularity - self.int_matrix[i][j] as f64 max_e = matrix[i][j] as f64 / self.granularity - self.int_matrix[i][j] as f64;
{
max_e =
self.matrix[i][j] as f64 / self.granularity - self.int_matrix[i][j] as f64;
} }
} }
self.error_max += max_e; self.error_max += max_e;
...@@ -108,11 +105,12 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> { ...@@ -108,11 +105,12 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> {
/// Compute the score distribution between `min` and `max`. /// Compute the score distribution between `min` and `max`.
fn distribution(&self, min: i64, max: i64) -> Vec<Map<i64, f64>> { fn distribution(&self, min: i64, max: i64) -> Vec<Map<i64, f64>> {
let M: usize = self.matrix.len(); let matrix = self.matrix.as_ref();
let M: usize = matrix.len();
let K: usize = <A as Alphabet>::K::USIZE; let K: usize = <A as Alphabet>::K::USIZE;
// background frequencies // background frequencies
let bg = self.matrix.background().frequencies(); let bg = matrix.background().frequencies();
// maps for each steps of the computation // maps for each steps of the computation
let mut qvalues = vec![Map::default(); M + 1]; let mut qvalues = vec![Map::default(); M + 1];
...@@ -159,7 +157,8 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> { ...@@ -159,7 +157,8 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> {
/// Search the p-value range for the given score. /// Search the p-value range for the given score.
fn lookup_pvalue(&self, score: f64) -> RangeInclusive<f64> { fn lookup_pvalue(&self, score: f64) -> RangeInclusive<f64> {
assert!(!self.granularity.is_nan()); assert!(!self.granularity.is_nan());
let M: usize = self.matrix.len(); let matrix = self.matrix.as_ref();
let M: usize = matrix.len();
// Compute the integer score range from the given score. // Compute the integer score range from the given score.
let scaled = score / self.granularity + self.offsets.iter().sum::<i64>() as f64; let scaled = score / self.granularity + self.offsets.iter().sum::<i64>() as f64;
...@@ -201,7 +200,8 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> { ...@@ -201,7 +200,8 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> {
/// Search the score and p-value range for a given p-value. /// Search the score and p-value range for a given p-value.
fn lookup_score(&self, pvalue: f64, range: RangeInclusive<i64>) -> (i64, RangeInclusive<f64>) { fn lookup_score(&self, pvalue: f64, range: RangeInclusive<i64>) -> (i64, RangeInclusive<f64>) {
assert!(!self.granularity.is_nan()); assert!(!self.granularity.is_nan());
let M: usize = self.matrix.len(); let matrix = self.matrix.as_ref();
let M: usize = matrix.len();
// compute score range for target pvalue // compute score range for target pvalue
let min = *range.start(); let min = *range.start();
...@@ -268,7 +268,7 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> { ...@@ -268,7 +268,7 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> {
} }
/// Iterate with decreasing granularity to compute an approximate P-value for a score. /// Iterate with decreasing granularity to compute an approximate P-value for a score.
pub fn approximate_pvalue(&mut self, score: f64) -> PvaluesIterator<'pssm, '_, A> { pub fn approximate_pvalue(&mut self, score: f64) -> PvaluesIterator<'_, A, M> {
PvaluesIterator { PvaluesIterator {
tfmp: self, tfmp: self,
score, score,
...@@ -293,7 +293,7 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> { ...@@ -293,7 +293,7 @@ impl<'pssm, A: Alphabet> TfmPvalue<'pssm, A> {
} }
/// Iterate with decreasing granularity to compute an approximate score for a P-value. /// Iterate with decreasing granularity to compute an approximate score for a P-value.
pub fn approximate_score(&mut self, pvalue: f64) -> ScoresIterator<'pssm, '_, A> { pub fn approximate_score(&mut self, pvalue: f64) -> ScoresIterator<'_, A, M> {
self.recompute(0.1); self.recompute(0.1);
ScoresIterator { ScoresIterator {
min: self.min_score_rows.iter().sum(), min: self.min_score_rows.iter().sum(),
...@@ -325,8 +325,8 @@ pub struct Iteration { ...@@ -325,8 +325,8 @@ pub struct Iteration {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct PvaluesIterator<'pssm, 'tfmp, A: Alphabet> { pub struct PvaluesIterator<'tfmp, A: Alphabet, M: AsRef<ScoringMatrix<A>>> {
tfmp: &'tfmp mut TfmPvalue<'pssm, A>, tfmp: &'tfmp mut TfmPvalue<A, M>,
score: f64, score: f64,
decay: f64, decay: f64,
granularity: f64, granularity: f64,
...@@ -334,7 +334,7 @@ pub struct PvaluesIterator<'pssm, 'tfmp, A: Alphabet> { ...@@ -334,7 +334,7 @@ pub struct PvaluesIterator<'pssm, 'tfmp, A: Alphabet> {
converged: bool, converged: bool,
} }
impl<'pssm, 'tfmp, A: Alphabet> Iterator for PvaluesIterator<'pssm, 'tfmp, A> { impl<'tfmp, A: Alphabet, M: AsRef<ScoringMatrix<A>>> Iterator for PvaluesIterator<'tfmp, A, M> {
type Item = Iteration; type Item = Iteration;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if self.converged || self.granularity <= self.target { if self.converged || self.granularity <= self.target {
...@@ -361,8 +361,8 @@ impl<'pssm, 'tfmp, A: Alphabet> Iterator for PvaluesIterator<'pssm, 'tfmp, A> { ...@@ -361,8 +361,8 @@ impl<'pssm, 'tfmp, A: Alphabet> Iterator for PvaluesIterator<'pssm, 'tfmp, A> {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct ScoresIterator<'pssm, 'tfmp, A: Alphabet> { pub struct ScoresIterator<'tfmp, A: Alphabet, M: AsRef<ScoringMatrix<A>>> {
tfmp: &'tfmp mut TfmPvalue<'pssm, A>, tfmp: &'tfmp mut TfmPvalue<A, M>,
pvalue: f64, pvalue: f64,
decay: f64, decay: f64,
granularity: f64, granularity: f64,
...@@ -372,7 +372,7 @@ pub struct ScoresIterator<'pssm, 'tfmp, A: Alphabet> { ...@@ -372,7 +372,7 @@ pub struct ScoresIterator<'pssm, 'tfmp, A: Alphabet> {
max: i64, max: i64,
} }
impl<'pssm, 'tfmp, A: Alphabet> Iterator for ScoresIterator<'pssm, 'tfmp, A> { impl<'tfmp, A: Alphabet, M: AsRef<ScoringMatrix<A>>> Iterator for ScoresIterator<'tfmp, A, M> {
type Item = Iteration; type Item = Iteration;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if self.converged || self.granularity <= self.target { if self.converged || self.granularity <= self.target {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment