From ff811f371547ed700872b79a9b5eec5e5db89ede Mon Sep 17 00:00:00 2001 From: Martin Larralde <martin.larralde@embl.de> Date: Sat, 15 Jun 2024 09:21:05 +0200 Subject: [PATCH] Use `_mm256_permutevar8x32_ps` in AVX2 `Score` to avoid special case for default Dna symbol --- lightmotif/src/dense.rs | 4 +++- lightmotif/src/pli/platform/avx2.rs | 23 ++++++----------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/lightmotif/src/dense.rs b/lightmotif/src/dense.rs index 4fe699c..7487b2a 100644 --- a/lightmotif/src/dense.rs +++ b/lightmotif/src/dense.rs @@ -14,7 +14,9 @@ use crate::num::Unsigned; // --- DefaultAlignment -------------------------------------------------------- -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(target_arch = "x86_64")] +type _DefaultAlignment = typenum::consts::U64; +#[cfg(target_arch = "x86")] type _DefaultAlignment = typenum::consts::U32; #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] type _DefaultAlignment = typenum::consts::U16; diff --git a/lightmotif/src/pli/platform/avx2.rs b/lightmotif/src/pli/platform/avx2.rs index 367c502..a856458 100644 --- a/lightmotif/src/pli/platform/avx2.rs +++ b/lightmotif/src/pli/platform/avx2.rs @@ -150,23 +150,12 @@ unsafe fn score_avx2_permute<A>( let x3 = _mm256_shuffle_epi8(x, m3); let x4 = _mm256_shuffle_epi8(x, m4); // load row for current weight matrix position - let t = _mm256_broadcast_ps(&*(pssmptr as *const __m128)); - let u = _mm256_broadcast_ss(&*(pssmptr.add(<A as Alphabet>::K::USIZE - 1))); - // check which bases from the sequence are unknown - let unk1 = _mm256_castsi256_ps(_mm256_cmpeq_epi32(x1, n)); - let unk2 = _mm256_castsi256_ps(_mm256_cmpeq_epi32(x2, n)); - let unk3 = _mm256_castsi256_ps(_mm256_cmpeq_epi32(x3, n)); - let unk4 = _mm256_castsi256_ps(_mm256_cmpeq_epi32(x4, n)); - // index A/T/G/C lookup table with the bases - let p1 = _mm256_permutevar_ps(t, x1); - let p2 = _mm256_permutevar_ps(t, x2); - let p3 = _mm256_permutevar_ps(t, x3); - let p4 = _mm256_permutevar_ps(t, x4); - // blend together known and unknown scores - let b1 = _mm256_blendv_ps(p1, u, unk1); - let b2 = _mm256_blendv_ps(p2, u, unk2); - let b3 = _mm256_blendv_ps(p3, u, unk3); - let b4 = _mm256_blendv_ps(p4, u, unk4); + let t = _mm256_load_ps(pssmptr); + // index A/T/G/C/N lookup table with the bases + let b1 = _mm256_permutevar8x32_ps(t, x1); + let b2 = _mm256_permutevar8x32_ps(t, x2); + let b3 = _mm256_permutevar8x32_ps(t, x3); + let b4 = _mm256_permutevar8x32_ps(t, x4); // add log odds to the running sum s1 = _mm256_add_ps(s1, b1); s2 = _mm256_add_ps(s2, b2); -- GitLab