From 72f427051aa6ccd6764d08eb577a7d8a5191a6a5 Mon Sep 17 00:00:00 2001 From: Martin Larralde <martin.larralde@embl.de> Date: Sun, 16 Jun 2024 18:35:09 +0200 Subject: [PATCH] Fix default alignment used for x86-64 and Aarch64 --- lightmotif/src/dense.rs | 33 +++++++++++++++-------------- lightmotif/src/pli/platform/avx2.rs | 3 +++ 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/lightmotif/src/dense.rs b/lightmotif/src/dense.rs index f16ef3a..edae74d 100644 --- a/lightmotif/src/dense.rs +++ b/lightmotif/src/dense.rs @@ -15,11 +15,16 @@ use crate::num::Unsigned; // --- DefaultAlignment -------------------------------------------------------- #[cfg(target_arch = "x86_64")] -type _DefaultAlignment = typenum::consts::U64; -#[cfg(target_arch = "x86")] type _DefaultAlignment = typenum::consts::U32; -#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +#[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "aarch64"))] type _DefaultAlignment = typenum::consts::U16; +#[cfg(not(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" +)))] +type _DefaultAlignment = typenum::consts::U1; /// The default alignment used in dense matrices. pub type DefaultAlignment = _DefaultAlignment; @@ -70,28 +75,24 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned + PowerOfTwo> DenseMatrix<T, C, /// Create a new *uninitialized* matrix with the given number of rows. pub unsafe fn uninitialized(rows: usize) -> Self { // Always over-allocate columns to avoid alignment issues. - let c = C::USIZE + (A::USIZE - C::USIZE % A::USIZE) * (C::USIZE % A::USIZE > 0) as usize; + let mut m = Self::new(0); + let c = m.stride(); // NOTE: this is unsafe but given that we require `T` to be // copy, this should be fine, as `Copy` prevents the // type to be `Drop` as well. // reserve the vector without initializing the data - let mut data = Vec::with_capacity((rows + 1) * c); - data.set_len((rows + 1) * c); + m.data.reserve((rows + 1) * c); + m.data.set_len((rows + 1) * c); // compute offset to aligned memory - let mut offset = 0; - while data[offset..].as_ptr() as usize % c > 0 { - offset += 1 + m.offset = 0; + while m.data[m.offset..].as_ptr() as usize % c > 0 { + m.offset += 1 } - Self { - data, - offset, - rows, - _columns: std::marker::PhantomData, - _alignment: std::marker::PhantomData, - } + m.rows = rows; + m } /// Create a new dense matrix from an iterable of rows. diff --git a/lightmotif/src/pli/platform/avx2.rs b/lightmotif/src/pli/platform/avx2.rs index 7fddb07..01e4cee 100644 --- a/lightmotif/src/pli/platform/avx2.rs +++ b/lightmotif/src/pli/platform/avx2.rs @@ -145,12 +145,14 @@ unsafe fn score_avx2_permute<A>( // advance position in the position weight matrix for _ in 0..pssm.len() { // load sequence row and broadcast to f32 + debug_assert_eq!(seqptr as usize & 0x1f, 0); let x = _mm256_load_si256(seqptr as *const __m256i); let x1 = _mm256_shuffle_epi8(x, m1); let x2 = _mm256_shuffle_epi8(x, m2); let x3 = _mm256_shuffle_epi8(x, m3); let x4 = _mm256_shuffle_epi8(x, m4); // load row for current weight matrix position + // debug_assert_eq!(pssmptr as usize & 0x1f, 0); let t = _mm256_loadu_ps(pssmptr); // index A/T/G/C/N lookup table with the bases let b1 = _mm256_permutevar8x32_ps(t, x1); @@ -227,6 +229,7 @@ unsafe fn score_avx2_gather<A>( // advance position in the position weight matrix for _ in 0..pssm.len() { // load sequence row and broadcast to f32 + debug_assert_eq!(seqptr as usize & 0x1f, 0); let x = _mm256_load_si256(seqptr as *const __m256i); let x1 = _mm256_shuffle_epi8(x, m1); let x2 = _mm256_shuffle_epi8(x, m2); -- GitLab