Skip to content
Snippets Groups Projects
Commit b9504e91 authored by Martin Larralde's avatar Martin Larralde
Browse files

Rename `DenseMatrix::columns_effective` to `DenseMatrix::stride`

parent 9fe5d5ae
No related branches found
No related tags found
No related merge requests found
...@@ -14,7 +14,6 @@ use lightmotif::pli::Score; ...@@ -14,7 +14,6 @@ use lightmotif::pli::Score;
use pyo3::exceptions::PyBufferError; use pyo3::exceptions::PyBufferError;
use pyo3::exceptions::PyIndexError; use pyo3::exceptions::PyIndexError;
use pyo3::exceptions::PyKeyError;
use pyo3::exceptions::PyTypeError; use pyo3::exceptions::PyTypeError;
use pyo3::exceptions::PyValueError; use pyo3::exceptions::PyValueError;
use pyo3::ffi::Py_ssize_t; use pyo3::ffi::Py_ssize_t;
...@@ -390,7 +389,7 @@ impl From<lightmotif::pli::StripedScores<C>> for StripedScores { ...@@ -390,7 +389,7 @@ impl From<lightmotif::pli::StripedScores<C>> for StripedScores {
let shape = [cols as Py_ssize_t, rows as Py_ssize_t]; let shape = [cols as Py_ssize_t, rows as Py_ssize_t];
let strides = [ let strides = [
std::mem::size_of::<f32>() as Py_ssize_t, std::mem::size_of::<f32>() as Py_ssize_t,
(cols.next_power_of_two() * std::mem::size_of::<f32>()) as Py_ssize_t, (scores.matrix().stride() * std::mem::size_of::<f32>()) as Py_ssize_t,
]; ];
// mask the remaining positions that are outside the sequence length // mask the remaining positions that are outside the sequence length
let length = scores.len(); let length = scores.len();
......
...@@ -21,7 +21,7 @@ pub type DefaultAlignment = _DefaultAlignment; ...@@ -21,7 +21,7 @@ pub type DefaultAlignment = _DefaultAlignment;
// --- DenseMatrix ------------------------------------------------------------- // --- DenseMatrix -------------------------------------------------------------
/// An aligned dense matrix of with a constant number of columns. /// A memory-aligned dense matrix with a constant number of columns.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct DenseMatrix<T: Default + Copy, C: Unsigned, A: Unsigned = DefaultAlignment> { pub struct DenseMatrix<T: Default + Copy, C: Unsigned, A: Unsigned = DefaultAlignment> {
data: Vec<T>, data: Vec<T>,
...@@ -101,9 +101,21 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned> DenseMatrix<T, C, A> { ...@@ -101,9 +101,21 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned> DenseMatrix<T, C, A> {
C::USIZE C::USIZE
} }
/// The effective number of columns in the matrix, counting alignment. /// The stride of the matrix, as a number of elements.
///
/// This may be different from the number of columns to account for memory
/// alignment constraints. Multiply by `std::mem::size_of::<T>()` to obtain
/// the stride in bytes.
///
/// # Example
/// ```rust
/// # use typenum::{U43, U32};
/// # use lightmotif::dense::DenseMatrix;
/// let d = DenseMatrix::<u8, U43, U32>::new(0);
/// assert_eq!(d.stride(), 64);
/// ```
#[inline] #[inline]
pub const fn columns_effective(&self) -> usize { pub const fn stride(&self) -> usize {
let x = std::mem::size_of::<T>(); let x = std::mem::size_of::<T>();
let c = C::USIZE * x; let c = C::USIZE * x;
let b = c + (A::USIZE - c % A::USIZE) * ((c % A::USIZE) > 0) as usize; let b = c + (A::USIZE - c % A::USIZE) * ((c % A::USIZE) > 0) as usize;
...@@ -119,7 +131,7 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned> DenseMatrix<T, C, A> { ...@@ -119,7 +131,7 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned> DenseMatrix<T, C, A> {
/// Change the number of rows of the matrix. /// Change the number of rows of the matrix.
pub fn resize(&mut self, rows: usize) { pub fn resize(&mut self, rows: usize) {
// Always over-allocate columns to avoid alignment issues. // Always over-allocate columns to avoid alignment issues.
let c: usize = self.columns_effective(); let c: usize = self.stride();
// Cache previous dimensions // Cache previous dimensions
let previous_rows = self.rows; let previous_rows = self.rows;
...@@ -167,7 +179,7 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned> Index<usize> for DenseMatrix<T ...@@ -167,7 +179,7 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned> Index<usize> for DenseMatrix<T
type Output = [T]; type Output = [T];
#[inline] #[inline]
fn index(&self, index: usize) -> &Self::Output { fn index(&self, index: usize) -> &Self::Output {
let c = self.columns_effective(); let c = self.stride();
let row = self.offset + c * index; let row = self.offset + c * index;
&self.data[row..row + C::USIZE] &self.data[row..row + C::USIZE]
} }
...@@ -176,7 +188,7 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned> Index<usize> for DenseMatrix<T ...@@ -176,7 +188,7 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned> Index<usize> for DenseMatrix<T
impl<T: Default + Copy, C: Unsigned, A: Unsigned> IndexMut<usize> for DenseMatrix<T, C, A> { impl<T: Default + Copy, C: Unsigned, A: Unsigned> IndexMut<usize> for DenseMatrix<T, C, A> {
#[inline] #[inline]
fn index_mut(&mut self, index: usize) -> &mut Self::Output { fn index_mut(&mut self, index: usize) -> &mut Self::Output {
let c = self.columns_effective(); let c = self.stride();
let row = self.offset + c * index; let row = self.offset + c * index;
&mut self.data[row..row + C::USIZE] &mut self.data[row..row + C::USIZE]
} }
...@@ -233,7 +245,7 @@ where ...@@ -233,7 +245,7 @@ where
#[inline] #[inline]
fn get(&mut self, i: usize) -> &'a [T] { fn get(&mut self, i: usize) -> &'a [T] {
let c = self.matrix.columns_effective(); let c = self.matrix.stride();
let row = self.matrix.offset + c * i; let row = self.matrix.offset + c * i;
unsafe { std::slice::from_raw_parts(self.data.as_ptr().add(row), C::USIZE) } unsafe { std::slice::from_raw_parts(self.data.as_ptr().add(row), C::USIZE) }
} }
...@@ -270,7 +282,7 @@ where ...@@ -270,7 +282,7 @@ where
#[inline] #[inline]
fn get(&mut self, i: usize) -> &'a mut [T] { fn get(&mut self, i: usize) -> &'a mut [T] {
let c = self.matrix.columns_effective(); let c = self.matrix.stride();
let row = self.matrix.offset + c * i; let row = self.matrix.offset + c * i;
unsafe { std::slice::from_raw_parts_mut(self.data.as_ptr().add(row), C::USIZE) } unsafe { std::slice::from_raw_parts_mut(self.data.as_ptr().add(row), C::USIZE) }
} }
...@@ -338,27 +350,27 @@ mod test { ...@@ -338,27 +350,27 @@ mod test {
use super::*; use super::*;
#[test] #[test]
fn test_columns_effective() { fn test_stride() {
let d1 = DenseMatrix::<u8, U32, U32>::new(0); let d1 = DenseMatrix::<u8, U32, U32>::new(0);
assert_eq!(d1.columns_effective(), 32); assert_eq!(d1.stride(), 32);
let d2 = DenseMatrix::<u8, U16, U32>::new(0); let d2 = DenseMatrix::<u8, U16, U32>::new(0);
assert_eq!(d2.columns_effective(), 32); assert_eq!(d2.stride(), 32);
let d3 = DenseMatrix::<u32, U32, U32>::new(0); let d3 = DenseMatrix::<u32, U32, U32>::new(0);
assert_eq!(d3.columns_effective(), 32); assert_eq!(d3.stride(), 32);
let d4 = DenseMatrix::<u32, U8, U32>::new(0); let d4 = DenseMatrix::<u32, U8, U32>::new(0);
assert_eq!(d4.columns_effective(), 8); assert_eq!(d4.stride(), 8);
let d5 = DenseMatrix::<u32, U16, U32>::new(0); let d5 = DenseMatrix::<u32, U16, U32>::new(0);
assert_eq!(d5.columns_effective(), 16); assert_eq!(d5.stride(), 16);
let d6 = DenseMatrix::<u32, U3, U16>::new(0); let d6 = DenseMatrix::<u32, U3, U16>::new(0);
assert_eq!(d6.columns_effective(), 4); assert_eq!(d6.stride(), 4);
let d7 = DenseMatrix::<u8, U15, U8>::new(0); let d7 = DenseMatrix::<u8, U15, U8>::new(0);
assert_eq!(d7.columns_effective(), 16); assert_eq!(d7.stride(), 16);
} }
#[test] #[test]
......
...@@ -98,8 +98,8 @@ unsafe fn score_avx2( ...@@ -98,8 +98,8 @@ unsafe fn score_avx2(
s3 = _mm256_add_ps(s3, b3); s3 = _mm256_add_ps(s3, b3);
s4 = _mm256_add_ps(s4, b4); s4 = _mm256_add_ps(s4, b4);
// advance to next row in PSSM and sequence matrices // advance to next row in PSSM and sequence matrices
seqptr = seqptr.add(seq.data.columns_effective()); seqptr = seqptr.add(seq.data.stride());
pssmptr = pssmptr.add(pssm.weights().columns_effective()); pssmptr = pssmptr.add(pssm.weights().stride());
} }
// permute lanes so that scores are in the right order // permute lanes so that scores are in the right order
let r1 = _mm256_permute2f128_ps(s1, s2, 0x20); let r1 = _mm256_permute2f128_ps(s1, s2, 0x20);
...@@ -166,7 +166,7 @@ unsafe fn best_position_avx2(scores: &StripedScores<<Avx2 as Backend>::LANES>) - ...@@ -166,7 +166,7 @@ unsafe fn best_position_avx2(scores: &StripedScores<<Avx2 as Backend>::LANES>) -
s3 = _mm256_blendv_ps(s3, r3, c3); s3 = _mm256_blendv_ps(s3, r3, c3);
s4 = _mm256_blendv_ps(s4, r4, c4); s4 = _mm256_blendv_ps(s4, r4, c4);
// advance to next row // advance to next row
dataptr = dataptr.add(data.columns_effective()); dataptr = dataptr.add(data.stride());
} }
// find the global maximum across all columns // find the global maximum across all columns
let mut x: [u32; 32] = [0; 32]; let mut x: [u32; 32] = [0; 32];
...@@ -275,7 +275,7 @@ unsafe fn threshold_avx2( ...@@ -275,7 +275,7 @@ unsafe fn threshold_avx2(
x3 = _mm256_add_epi32(x3, ones); x3 = _mm256_add_epi32(x3, ones);
x4 = _mm256_add_epi32(x4, ones); x4 = _mm256_add_epi32(x4, ones);
// Advance data pointer to next row // Advance data pointer to next row
dataptr = dataptr.add(data.columns_effective()); dataptr = dataptr.add(data.stride());
} }
} }
......
...@@ -78,8 +78,8 @@ unsafe fn score_neon<A, C>( ...@@ -78,8 +78,8 @@ unsafe fn score_neon<A, C>(
s.3 = vaddq_f32(s.3, vreinterpretq_f32_u32(vandq_u32(lut, p4))); s.3 = vaddq_f32(s.3, vreinterpretq_f32_u32(vandq_u32(lut, p4)));
} }
// advance to next row in sequence and PSSM matrices // advance to next row in sequence and PSSM matrices
dataptr = dataptr.add(seq.data.columns_effective()); dataptr = dataptr.add(seq.data.stride());
pssmptr = pssmptr.add(pssm.weights().columns_effective()); pssmptr = pssmptr.add(pssm.weights().stride());
} }
// record the score for the current position // record the score for the current position
let row = &mut data[i]; let row = &mut data[i];
......
...@@ -77,8 +77,8 @@ unsafe fn score_sse2<A, C>( ...@@ -77,8 +77,8 @@ unsafe fn score_sse2<A, C>(
s4 = _mm_add_ps(s4, _mm_and_ps(lut, p4)); s4 = _mm_add_ps(s4, _mm_and_ps(lut, p4));
} }
// advance to next row in sequence and PSSM matrices // advance to next row in sequence and PSSM matrices
dataptr = dataptr.add(seq.data.columns_effective()); dataptr = dataptr.add(seq.data.stride());
pssmptr = pssmptr.add(pssm.weights().columns_effective()); pssmptr = pssmptr.add(pssm.weights().stride());
} }
// record the score for the current position // record the score for the current position
let row = &mut data[i]; let row = &mut data[i];
...@@ -154,7 +154,7 @@ where ...@@ -154,7 +154,7 @@ where
s3 = _mm_or_ps(_mm_andnot_ps(c3, s3), _mm_and_ps(r3, c3)); s3 = _mm_or_ps(_mm_andnot_ps(c3, s3), _mm_and_ps(r3, c3));
s4 = _mm_or_ps(_mm_andnot_ps(c4, s4), _mm_and_ps(r4, c4)); s4 = _mm_or_ps(_mm_andnot_ps(c4, s4), _mm_and_ps(r4, c4));
// advance to next row // advance to next row
dataptr = dataptr.add(data.columns_effective()); dataptr = dataptr.add(data.stride());
} }
// find the global maximum across all columns // find the global maximum across all columns
_mm_storeu_si128( _mm_storeu_si128(
...@@ -274,7 +274,7 @@ where ...@@ -274,7 +274,7 @@ where
x3 = _mm_add_epi32(x3, ones); x3 = _mm_add_epi32(x3, ones);
x4 = _mm_add_epi32(x4, ones); x4 = _mm_add_epi32(x4, ones);
// Advance data pointer to next row // Advance data pointer to next row
dataptr = dataptr.add(data.columns_effective()); dataptr = dataptr.add(data.stride());
} }
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment