Skip to content
Snippets Groups Projects
Commit 72f42705 authored by Martin Larralde's avatar Martin Larralde
Browse files

Fix default alignment used for x86-64 and Aarch64

parent 8aab22de
No related branches found
No related tags found
No related merge requests found
......@@ -15,11 +15,16 @@ use crate::num::Unsigned;
// --- DefaultAlignment --------------------------------------------------------
#[cfg(target_arch = "x86_64")]
type _DefaultAlignment = typenum::consts::U64;
#[cfg(target_arch = "x86")]
type _DefaultAlignment = typenum::consts::U32;
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
#[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "aarch64"))]
type _DefaultAlignment = typenum::consts::U16;
#[cfg(not(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "arm",
target_arch = "aarch64"
)))]
type _DefaultAlignment = typenum::consts::U1;
/// The default alignment used in dense matrices.
pub type DefaultAlignment = _DefaultAlignment;
......@@ -70,28 +75,24 @@ impl<T: Default + Copy, C: Unsigned, A: Unsigned + PowerOfTwo> DenseMatrix<T, C,
/// Create a new *uninitialized* matrix with the given number of rows.
pub unsafe fn uninitialized(rows: usize) -> Self {
// Always over-allocate columns to avoid alignment issues.
let c = C::USIZE + (A::USIZE - C::USIZE % A::USIZE) * (C::USIZE % A::USIZE > 0) as usize;
let mut m = Self::new(0);
let c = m.stride();
// NOTE: this is unsafe but given that we require `T` to be
// copy, this should be fine, as `Copy` prevents the
// type to be `Drop` as well.
// reserve the vector without initializing the data
let mut data = Vec::with_capacity((rows + 1) * c);
data.set_len((rows + 1) * c);
m.data.reserve((rows + 1) * c);
m.data.set_len((rows + 1) * c);
// compute offset to aligned memory
let mut offset = 0;
while data[offset..].as_ptr() as usize % c > 0 {
offset += 1
m.offset = 0;
while m.data[m.offset..].as_ptr() as usize % c > 0 {
m.offset += 1
}
Self {
data,
offset,
rows,
_columns: std::marker::PhantomData,
_alignment: std::marker::PhantomData,
}
m.rows = rows;
m
}
/// Create a new dense matrix from an iterable of rows.
......
......@@ -145,12 +145,14 @@ unsafe fn score_avx2_permute<A>(
// advance position in the position weight matrix
for _ in 0..pssm.len() {
// load sequence row and broadcast to f32
debug_assert_eq!(seqptr as usize & 0x1f, 0);
let x = _mm256_load_si256(seqptr as *const __m256i);
let x1 = _mm256_shuffle_epi8(x, m1);
let x2 = _mm256_shuffle_epi8(x, m2);
let x3 = _mm256_shuffle_epi8(x, m3);
let x4 = _mm256_shuffle_epi8(x, m4);
// load row for current weight matrix position
// debug_assert_eq!(pssmptr as usize & 0x1f, 0);
let t = _mm256_loadu_ps(pssmptr);
// index A/T/G/C/N lookup table with the bases
let b1 = _mm256_permutevar8x32_ps(t, x1);
......@@ -227,6 +229,7 @@ unsafe fn score_avx2_gather<A>(
// advance position in the position weight matrix
for _ in 0..pssm.len() {
// load sequence row and broadcast to f32
debug_assert_eq!(seqptr as usize & 0x1f, 0);
let x = _mm256_load_si256(seqptr as *const __m256i);
let x1 = _mm256_shuffle_epi8(x, m1);
let x2 = _mm256_shuffle_epi8(x, m2);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment