From 0dee6009ba67ab93b4b08d5911f3d7c162728874 Mon Sep 17 00:00:00 2001
From: Martin Larralde <martin.larralde@embl.de>
Date: Tue, 16 May 2023 13:04:35 +0200
Subject: [PATCH] Reorganize pipeline code into several submodules

---
 README.md                                 |   7 +-
 lightmotif/benches/ecoli.rs               |  21 +-
 lightmotif/src/lib.rs                     |  14 +-
 lightmotif/src/pli/mod.rs                 | 326 +++++++---------------
 lightmotif/src/pli/{ => platform}/avx2.rs |  77 +++--
 lightmotif/src/pli/platform/generic.rs    |  28 ++
 lightmotif/src/pli/platform/mod.rs        |  19 ++
 lightmotif/src/pli/{ => platform}/sse2.rs |  82 ++++--
 lightmotif/src/pli/scores.rs              | 156 +++++++++++
 lightmotif/src/pli/vector.rs              |  49 ++++
 lightmotif/src/seq.rs                     |   2 +
 lightmotif/tests/dna.rs                   |  28 +-
 12 files changed, 495 insertions(+), 314 deletions(-)
 rename lightmotif/src/pli/{ => platform}/avx2.rs (77%)
 create mode 100644 lightmotif/src/pli/platform/generic.rs
 create mode 100644 lightmotif/src/pli/platform/mod.rs
 rename lightmotif/src/pli/{ => platform}/sse2.rs (71%)
 create mode 100644 lightmotif/src/pli/scores.rs
 create mode 100644 lightmotif/src/pli/vector.rs

diff --git a/README.md b/README.md
index c7b471d..269d6fb 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ use lightmotif::*;
 use typenum::U32;
 
 // Create a count matrix from an iterable of motif sequences
-let counts = CountMatrix::from_sequences(&[
+let counts = CountMatrix::<Dna>::from_sequences(&[
     EncodedSequence::encode("GTTGACCTTATCAAC").unwrap(),
     EncodedSequence::encode("GTTGATCCAGTCAAC").unwrap(),
 ]).unwrap();
@@ -62,7 +62,8 @@ let mut striped = encoded.to_striped::<U32>();
 striped.configure(&pssm);
 
 // Use a pipeline to compute scores for every position of the matrix.
-let scores = Pipeline::<Dna>::score(&striped, &pssm);
+let pli = Pipeline::generic();
+let scores = pli.score(&striped, &pssm);
 
 // Scores can be extracted into a Vec<f32>, or indexed directly.
 let v = scores.to_vec();
@@ -70,7 +71,7 @@ assert_eq!(scores[0], -23.07094);
 assert_eq!(v[0], -23.07094);
 
 // The highest scoring position can be searched with a pipeline as well.
-let best = Pipeline::<Dna>::best_position(&scores).unwrap();
+let best = pli.best_position(&scores).unwrap();
 assert_eq!(best, 18);
 
 ```
diff --git a/lightmotif/benches/ecoli.rs b/lightmotif/benches/ecoli.rs
index f2cdb68..e076bbf 100644
--- a/lightmotif/benches/ecoli.rs
+++ b/lightmotif/benches/ecoli.rs
@@ -35,9 +35,10 @@ fn bench_generic(bencher: &mut test::Bencher) {
     let pssm = pbm.to_scoring(None);
 
     striped.configure(&pssm);
+    let pli = Pipeline::generic();
     let mut scores = StripedScores::new_for(&striped, &pssm);
     bencher.bytes = SEQUENCE.len() as u64;
-    bencher.iter(|| test::black_box(Pipeline::<_, u8>::score_into(&striped, &pssm, &mut scores)));
+    bencher.iter(|| test::black_box(pli.score_into(&striped, &pssm, &mut scores)));
 }
 
 #[cfg(target_feature = "sse2")]
@@ -55,15 +56,10 @@ fn bench_sse2(bencher: &mut test::Bencher) {
     let pssm = pbm.to_scoring(None);
 
     striped.configure(&pssm);
+    let pli = Pipeline::sse2().unwrap();
     let mut scores = StripedScores::new_for(&striped, &pssm);
     bencher.bytes = SEQUENCE.len() as u64;
-    bencher.iter(|| {
-        test::black_box(Pipeline::<_, __m128i>::score_into(
-            &striped,
-            &pssm,
-            &mut scores,
-        ))
-    });
+    bencher.iter(|| test::black_box(pli.score_into(&striped, &pssm, &mut scores)));
 }
 
 #[cfg(target_feature = "avx2")]
@@ -81,13 +77,8 @@ fn bench_avx2(bencher: &mut test::Bencher) {
     let pssm = pbm.to_scoring(None);
 
     striped.configure(&pssm);
+    let pli = Pipeline::avx2().unwrap();
     let mut scores = StripedScores::new_for(&striped, &pssm);
     bencher.bytes = SEQUENCE.len() as u64;
-    bencher.iter(|| {
-        test::black_box(Pipeline::<_, __m256i>::score_into(
-            &striped,
-            &pssm,
-            &mut scores,
-        ))
-    });
+    bencher.iter(|| test::black_box(pli.score_into(&striped, &pssm, &mut scores)));
 }
diff --git a/lightmotif/src/lib.rs b/lightmotif/src/lib.rs
index 87c49fd..7339ff8 100644
--- a/lightmotif/src/lib.rs
+++ b/lightmotif/src/lib.rs
@@ -3,12 +3,12 @@
 extern crate generic_array;
 extern crate typenum;
 
-mod abc;
-mod dense;
-mod err;
-mod pli;
-mod pwm;
-mod seq;
+pub mod abc;
+pub mod dense;
+pub mod err;
+pub mod pli;
+pub mod pwm;
+pub mod seq;
 
 pub use abc::Alphabet;
 pub use abc::AminoAcid;
@@ -22,10 +22,10 @@ pub use abc::Pseudocounts;
 pub use abc::Symbol;
 pub use dense::DenseMatrix;
 pub use err::InvalidSymbol;
+pub use pli::BestPosition;
 pub use pli::Pipeline;
 pub use pli::Score;
 pub use pli::StripedScores;
-pub use pli::Vector;
 pub use pwm::CountMatrix;
 pub use pwm::FrequencyMatrix;
 pub use pwm::ScoringMatrix;
diff --git a/lightmotif/src/pli/mod.rs b/lightmotif/src/pli/mod.rs
index 7336f11..702443b 100644
--- a/lightmotif/src/pli/mod.rs
+++ b/lightmotif/src/pli/mod.rs
@@ -5,8 +5,14 @@ use std::ops::Range;
 use typenum::marker_traits::NonZero;
 use typenum::marker_traits::Unsigned;
 
-pub use self::vector::Vector;
-
+pub use self::platform::Avx2;
+pub use self::platform::Backend;
+pub use self::platform::Generic;
+pub use self::platform::Sse2;
+pub use self::platform::UnsupportedBackend;
+pub use self::scores::StripedScores;
+
+use self::vector::Vector;
 use super::abc::Alphabet;
 use super::abc::Dna;
 use super::abc::Symbol;
@@ -14,75 +20,43 @@ use super::dense::DenseMatrix;
 use super::pwm::ScoringMatrix;
 use super::seq::StripedSequence;
 
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod avx2;
-#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-mod sse2;
-
-// --- Vector ------------------------------------------------------------------
-
-mod vector {
-    use typenum::consts::U1;
-    use typenum::consts::U16;
-    use typenum::consts::U32;
-    use typenum::marker_traits::NonZero;
-    use typenum::marker_traits::Unsigned;
-
-    mod seal {
-        pub trait Sealed {}
-
-        impl Sealed for u8 {}
-
-        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-        impl Sealed for std::arch::x86_64::__m128i {}
-
-        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-        impl Sealed for std::arch::x86_64::__m256i {}
-    }
-
-    /// Sealed trait for concrete vector implementations.
-    ///
-    /// The trait is defined for the loading vector type, which has `LANES`
-    /// lanes of `u8` values. These values are then splat into 4 vectors with
-    /// `f32` values to actually compute the scores.
-    pub trait Vector: seal::Sealed {
-        type LANES: Unsigned + NonZero;
-    }
-
-    impl Vector for u8 {
-        type LANES = U1;
-    }
-
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    impl Vector for std::arch::x86_64::__m128i {
-        type LANES = U16;
-    }
-
-    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    impl Vector for std::arch::x86_64::__m256i {
-        type LANES = U32;
-    }
-
-    #[cfg(target_feature = "avx2")]
-    pub type Best = std::arch::x86_64::__m256i;
-    #[cfg(all(not(target_feature = "avx2"), target_feature = "ssse3"))]
-    pub type Best = std::arch::x86_64::__m128i;
-    #[cfg(all(not(target_feature = "avx2"), not(target_feature = "ssse3")))]
-    pub type Best = u8;
-}
+mod platform;
+mod scores;
+mod vector;
 
 // --- Score -------------------------------------------------------------------
 
 /// Generic trait for computing sequence scores with a PSSM.
-pub trait Score<A: Alphabet, V: Vector, C: NonZero + Unsigned = <V as Vector>::LANES> {
+pub trait Score<A: Alphabet, C: NonZero + Unsigned> {
     /// Compute the PSSM scores into the given buffer.
-    fn score_into<S, M>(seq: S, pssm: M, scores: &mut StripedScores<C>)
+    fn score_into<S, M>(&self, seq: S, pssm: M, scores: &mut StripedScores<C>)
     where
         S: AsRef<StripedSequence<A, C>>,
-        M: AsRef<ScoringMatrix<A>>;
+        M: AsRef<ScoringMatrix<A>>,
+    {
+        let seq = seq.as_ref();
+        let pssm = pssm.as_ref();
+
+        let seq_rows = seq.data.rows() - seq.wrap;
+        scores.resize(seq.length - pssm.len() + 1, seq_rows);
+
+        let result = scores.matrix_mut();
+        for i in 0..seq.length - pssm.len() + 1 {
+            let mut score = 0.0;
+            for j in 0..pssm.len() {
+                let offset = i + j;
+                let col = offset / seq_rows;
+                let row = offset % seq_rows;
+                score += pssm.weights()[j][seq.data[row][col].as_index()];
+            }
+            let col = i / result.rows();
+            let row = i % result.rows();
+            result[row][col] = score;
+        }
+    }
 
     /// Compute the PSSM scores for every sequence positions.
-    fn score<S, M>(seq: S, pssm: M) -> StripedScores<C>
+    fn score<S, M>(&self, seq: S, pssm: M) -> StripedScores<C>
     where
         S: AsRef<StripedSequence<A, C>>,
         M: AsRef<ScoringMatrix<A>>,
@@ -91,24 +65,27 @@ pub trait Score<A: Alphabet, V: Vector, C: NonZero + Unsigned = <V as Vector>::L
         let pssm = pssm.as_ref();
         let data = unsafe { DenseMatrix::uninitialized(seq.data.rows() - seq.wrap) };
         let length = seq.length - pssm.len() + 1;
-        let mut scores = StripedScores { length, data };
-        Self::score_into(seq, pssm, &mut scores);
+        let mut scores = StripedScores::new(length, data);
+        self.score_into(seq, pssm, &mut scores);
         scores
     }
+}
 
+pub trait BestPosition<C: NonZero + Unsigned> {
     /// Find the sequence position with the highest score.
-    fn best_position(scores: &StripedScores<C>) -> Option<usize> {
-        if scores.length == 0 {
+    fn best_position(&self, scores: &StripedScores<C>) -> Option<usize> {
+        if scores.len() == 0 {
             return None;
         }
 
+        let data = scores.matrix();
         let mut best_pos = 0;
-        let mut best_score = scores.data[0][0];
-        for i in 0..scores.length {
-            let col = i / scores.data.rows();
-            let row = i % scores.data.rows();
-            if scores.data[row][col] > best_score {
-                best_score = scores.data[row][col];
+        let mut best_score = data[0][0];
+        for i in 0..scores.len() {
+            let col = i / data.rows();
+            let row = i % data.rows();
+            if data[row][col] > best_score {
+                best_score = data[row][col];
                 best_pos = i;
             }
         }
@@ -121,183 +98,92 @@ pub trait Score<A: Alphabet, V: Vector, C: NonZero + Unsigned = <V as Vector>::L
 
 /// Wrapper implementing score computation for different platforms.
 #[derive(Debug, Default, Clone)]
-pub struct Pipeline<A: Alphabet, V: Vector = vector::Best> {
+pub struct Pipeline<A: Alphabet, B: Backend> {
     alphabet: std::marker::PhantomData<A>,
-    vector: std::marker::PhantomData<V>,
+    backend: std::marker::PhantomData<B>,
 }
 
-/// Scalar scoring implementation.
-impl<A: Alphabet, C: NonZero + Unsigned> Score<A, u8, C> for Pipeline<A, u8> {
-    fn score_into<S, M>(seq: S, pssm: M, scores: &mut StripedScores<C>)
-    where
-        S: AsRef<StripedSequence<A, C>>,
-        M: AsRef<ScoringMatrix<A>>,
-    {
-        let seq = seq.as_ref();
-        let pssm = pssm.as_ref();
+// --- Generic pipeline --------------------------------------------------------
 
-        let seq_rows = seq.data.rows() - seq.wrap;
-        let result = &mut scores.data;
-        if result.rows() < seq_rows {
-            panic!("not enough rows for scores: {}", pssm.len());
-        }
-
-        for i in 0..seq.length - pssm.len() + 1 {
-            let mut score = 0.0;
-            for j in 0..pssm.len() {
-                let offset = i + j;
-                let col = offset / seq_rows;
-                let row = offset % seq_rows;
-                score += pssm.weights()[j][seq.data[row][col].as_index()];
-            }
-            let col = i / result.rows();
-            let row = i % result.rows();
-            result[row][col] = score;
+impl<A: Alphabet> Pipeline<A, Generic> {
+    /// Create a new generic pipeline.
+    pub const fn generic() -> Self {
+        Self {
+            alphabet: std::marker::PhantomData,
+            backend: std::marker::PhantomData,
         }
     }
 }
 
-// --- SSSE3 -------------------------------------------------------------------
+impl<A: Alphabet, C: NonZero + Unsigned> Score<A, C> for Pipeline<A, Generic> {}
 
-// --- StripedScores -----------------------------------------------------------
+impl<A: Alphabet, C: NonZero + Unsigned> BestPosition<C> for Pipeline<A, Generic> {}
 
-#[derive(Clone, Debug)]
-pub struct StripedScores<C: Unsigned + NonZero> {
-    data: DenseMatrix<f32, C>,
-    length: usize,
-}
+// --- SSE2 pipeline -----------------------------------------------------------
 
-impl<C: Unsigned + NonZero> StripedScores<C> {
-    /// Create a new striped score matrix with the given length and rows.
-    pub fn new(length: usize, rows: usize) -> Self {
-        Self {
-            length,
-            data: DenseMatrix::new(rows),
+impl<A: Alphabet> Pipeline<A, Sse2> {
+    /// Attempt to create a new SSE2-accelerated pipeline.
+    pub fn sse2() -> Result<Self, UnsupportedBackend> {
+        #[cfg(target_arch = "x86")]
+        if std::is_x86_feature_detected!("sse2") {
+            return Ok(Self::default());
         }
+        #[cfg(target_arch = "x86_64")]
+        return Ok(Self::default());
+        #[allow(unreachable_code)]
+        Err(UnsupportedBackend)
     }
+}
 
-    /// Return the number of scored positions.
-    pub fn len(&self) -> usize {
-        self.length
-    }
-
-    /// Return a reference to the striped matrix storing the scores.
-    pub fn matrix(&self) -> &DenseMatrix<f32, C> {
-        &self.data
-    }
-
-    /// Return a mutable reference to the striped matrix storing the scores.
-    pub fn matrix_mut(&mut self) -> &mut DenseMatrix<f32, C> {
-        &mut self.data
-    }
-
-    /// Create a new matrix large enough to store the scores of `pssm` applied to `seq`.
-    pub fn new_for<S, M, A>(seq: S, pssm: M) -> Self
-    where
-        A: Alphabet,
-        S: AsRef<StripedSequence<A, C>>,
-        M: AsRef<ScoringMatrix<A>>,
-    {
-        let seq = seq.as_ref();
-        let pssm = pssm.as_ref();
-        Self::new(seq.length - pssm.len() + 1, seq.data.rows() - seq.wrap)
-    }
-
-    /// Resize the striped scores storage to the given length and number of rows.
-    pub fn resize(&mut self, length: usize, rows: usize) {
-        self.length = length;
-        self.data.resize(rows);
-    }
-
-    /// Resize the striped scores storage to store the scores of `pssm` applied to `seq`.
-    pub fn resize_for<S, M, A>(&mut self, seq: S, pssm: M)
-    where
-        A: Alphabet,
-        S: AsRef<StripedSequence<A, C>>,
+impl<A: Alphabet> Score<A, <Sse2 as Backend>::LANES> for Pipeline<A, Sse2> {
+    fn score_into<S, M>(
+        &self,
+        seq: S,
+        pssm: M,
+        scores: &mut StripedScores<<Sse2 as Backend>::LANES>,
+    ) where
+        S: AsRef<StripedSequence<A, <Sse2 as Backend>::LANES>>,
         M: AsRef<ScoringMatrix<A>>,
     {
-        let seq = seq.as_ref();
-        let pssm = pssm.as_ref();
-        self.resize(seq.length - pssm.len() + 1, seq.data.rows() - seq.wrap);
-    }
-
-    /// Iterate over scores of individual sequence positions.
-    pub fn iter(&self) -> Iter<'_, C> {
-        Iter::new(&self)
-    }
-
-    /// Convert the striped scores to a vector of scores.
-    pub fn to_vec(&self) -> Vec<f32> {
-        self.iter().cloned().collect()
+        Sse2::score_into(seq, pssm, scores)
     }
 }
 
-impl<C: Unsigned + NonZero> AsRef<DenseMatrix<f32, C>> for StripedScores<C> {
-    fn as_ref(&self) -> &DenseMatrix<f32, C> {
-        self.matrix()
+impl<A: Alphabet> BestPosition<<Sse2 as Backend>::LANES> for Pipeline<A, Sse2> {
+    fn best_position(&self, scores: &StripedScores<<Sse2 as Backend>::LANES>) -> Option<usize> {
+        Sse2::best_position(scores)
     }
 }
 
-impl<C: Unsigned + NonZero> AsMut<DenseMatrix<f32, C>> for StripedScores<C> {
-    fn as_mut(&mut self) -> &mut DenseMatrix<f32, C> {
-        self.matrix_mut()
-    }
-}
+// --- AVX2 pipeline -----------------------------------------------------------
 
-impl<C: Unsigned + NonZero> Index<usize> for StripedScores<C> {
-    type Output = f32;
-    fn index(&self, index: usize) -> &f32 {
-        let col = index / self.data.rows();
-        let row = index % self.data.rows();
-        &self.data[row][col]
-    }
-}
-
-impl<C: Unsigned + NonZero> From<StripedScores<C>> for Vec<f32> {
-    fn from(scores: StripedScores<C>) -> Self {
-        scores.iter().cloned().collect()
-    }
-}
-
-// --- Iter --------------------------------------------------------------------
-
-pub struct Iter<'a, C: Unsigned + NonZero> {
-    scores: &'a StripedScores<C>,
-    indices: Range<usize>,
-}
-
-impl<'a, C: Unsigned + NonZero> Iter<'a, C> {
-    fn new(scores: &'a StripedScores<C>) -> Self {
-        Self {
-            scores,
-            indices: 0..scores.length,
+impl<A: Alphabet> Pipeline<A, Avx2> {
+    /// Attempt to create a new AVX2-accelerated pipeline.
+    pub fn avx2() -> Result<Self, UnsupportedBackend> {
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        if std::is_x86_feature_detected!("avx2") {
+            return Ok(Self::default());
         }
-    }
-
-    fn get(&self, i: usize) -> &'a f32 {
-        let col = i / self.scores.data.rows();
-        let row = i % self.scores.data.rows();
-        &self.scores.data[row][col]
-    }
-}
-
-impl<'a, C: Unsigned + NonZero> Iterator for Iter<'a, C> {
-    type Item = &'a f32;
-    fn next(&mut self) -> Option<Self::Item> {
-        self.indices.next().map(|i| self.get(i))
+        Err(UnsupportedBackend)
     }
 }
 
-impl<'a, C: Unsigned + NonZero> ExactSizeIterator for Iter<'a, C> {
-    fn len(&self) -> usize {
-        self.indices.len()
+impl Score<Dna, <Avx2 as Backend>::LANES> for Pipeline<Dna, Avx2> {
+    fn score_into<S, M>(
+        &self,
+        seq: S,
+        pssm: M,
+        scores: &mut StripedScores<<Avx2 as Backend>::LANES>,
+    ) where
+        S: AsRef<StripedSequence<Dna, <Avx2 as Backend>::LANES>>,
+        M: AsRef<ScoringMatrix<Dna>>,
+    {
+        Avx2::score_into(seq, pssm, scores)
     }
 }
 
-impl<'a, C: Unsigned + NonZero> FusedIterator for Iter<'a, C> {}
-
-impl<'a, C: Unsigned + NonZero> DoubleEndedIterator for Iter<'a, C> {
-    fn next_back(&mut self) -> Option<Self::Item> {
-        self.indices.next_back().map(|i| self.get(i))
+impl<A: Alphabet> BestPosition<<Avx2 as Backend>::LANES> for Pipeline<A, Avx2> {
+    fn best_position(&self, scores: &StripedScores<<Avx2 as Backend>::LANES>) -> Option<usize> {
+        Avx2::best_position(scores)
     }
 }
diff --git a/lightmotif/src/pli/avx2.rs b/lightmotif/src/pli/platform/avx2.rs
similarity index 77%
rename from lightmotif/src/pli/avx2.rs
rename to lightmotif/src/pli/platform/avx2.rs
index 2b3aefd..cdf042a 100644
--- a/lightmotif/src/pli/avx2.rs
+++ b/lightmotif/src/pli/platform/avx2.rs
@@ -1,24 +1,35 @@
 use std::arch::x86_64::*;
 
-use typenum::Unsigned;
+use typenum::consts::U32;
+use typenum::marker_traits::Unsigned;
 
-use super::Pipeline;
-use super::Score;
-use super::StripedScores;
-use super::Vector;
+use super::Backend;
 use crate::abc::Alphabet;
 use crate::abc::Dna;
 use crate::abc::Nucleotide;
 use crate::abc::Symbol;
+use crate::pli::scores::StripedScores;
+use crate::pli::Pipeline;
+use crate::pli::Vector;
 use crate::pwm::ScoringMatrix;
 use crate::seq::StripedSequence;
 
+/// A marker type for the AVX2 implementation of the pipeline.
+#[derive(Clone, Debug, Default)]
+pub struct Avx2;
+
+impl Backend for Avx2 {
+    type LANES = U32;
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[target_feature(enable = "avx2")]
 unsafe fn score_avx2(
-    seq: &StripedSequence<Dna, <__m256i as Vector>::LANES>,
+    seq: &StripedSequence<Dna, <Avx2 as Backend>::LANES>,
     pssm: &ScoringMatrix<Dna>,
-    scores: &mut StripedScores<<__m256i as Vector>::LANES>,
+    scores: &mut StripedScores<<Avx2 as Backend>::LANES>,
 ) {
+    let data = scores.matrix_mut();
     // constant vector for comparing unknown bases
     let n = _mm256_set1_epi8(Nucleotide::N as i8);
     // mask vectors for broadcasting uint8x32_t to uint32x8_t to floatx8_t
@@ -110,7 +121,7 @@ unsafe fn score_avx2(
         let r3 = _mm256_permute2f128_ps(s1, s2, 0x31);
         let r4 = _mm256_permute2f128_ps(s3, s4, 0x31);
         // record the score for the current position
-        let row = &mut scores.data[i];
+        let row = &mut data[i];
         _mm256_store_ps(row[0x00..].as_mut_ptr(), r1);
         _mm256_store_ps(row[0x08..].as_mut_ptr(), r2);
         _mm256_store_ps(row[0x10..].as_mut_ptr(), r3);
@@ -118,11 +129,13 @@ unsafe fn score_avx2(
     }
 }
 
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[target_feature(enable = "avx2")]
-unsafe fn best_position_avx2(scores: &StripedScores<<__m256i as Vector>::LANES>) -> Option<usize> {
-    if scores.length == 0 {
+unsafe fn best_position_avx2(scores: &StripedScores<<Avx2 as Backend>::LANES>) -> Option<usize> {
+    if scores.len() == 0 {
         None
     } else {
+        let data = scores.matrix();
         unsafe {
             // the row index for the best score in each column
             // (these are 32-bit integers but for use with `_mm256_blendv_ps`
@@ -132,12 +145,12 @@ unsafe fn best_position_avx2(scores: &StripedScores<<__m256i as Vector>::LANES>)
             let mut p3 = _mm256_setzero_ps();
             let mut p4 = _mm256_setzero_ps();
             // store the best scores for each column
-            let mut s1 = _mm256_load_ps(scores.data[0][0x00..].as_ptr());
-            let mut s2 = _mm256_load_ps(scores.data[0][0x08..].as_ptr());
-            let mut s3 = _mm256_load_ps(scores.data[0][0x10..].as_ptr());
-            let mut s4 = _mm256_load_ps(scores.data[0][0x18..].as_ptr());
+            let mut s1 = _mm256_load_ps(data[0][0x00..].as_ptr());
+            let mut s2 = _mm256_load_ps(data[0][0x08..].as_ptr());
+            let mut s3 = _mm256_load_ps(data[0][0x10..].as_ptr());
+            let mut s4 = _mm256_load_ps(data[0][0x18..].as_ptr());
             // process all rows iteratively
-            for (i, row) in scores.data.iter().enumerate() {
+            for (i, row) in data.iter().enumerate() {
                 // record the current row index
                 let index = _mm256_castsi256_ps(_mm256_set1_epi32(i as i32));
                 // load scores for the current row
@@ -170,9 +183,9 @@ unsafe fn best_position_avx2(scores: &StripedScores<<__m256i as Vector>::LANES>)
             let mut best_pos = 0;
             let mut best_score = -f32::INFINITY;
             for (col, &row) in x.iter().enumerate() {
-                if scores.data[row as usize][col] > best_score {
-                    best_score = scores.data[row as usize][col];
-                    best_pos = col * scores.data.rows() + row as usize;
+                if data[row as usize][col] > best_score {
+                    best_score = data[row as usize][col];
+                    best_pos = col * data.rows() + row as usize;
                 }
             }
             Some(best_pos)
@@ -181,15 +194,14 @@ unsafe fn best_position_avx2(scores: &StripedScores<<__m256i as Vector>::LANES>)
 }
 
 /// Intel 256-bit vector implementation, for 32 elements column width.
-impl Score<Dna, __m256i> for Pipeline<Dna, __m256i> {
-    fn score_into<S, M>(seq: S, pssm: M, scores: &mut StripedScores<<__m256i as Vector>::LANES>)
+impl Avx2 {
+    pub fn score_into<S, M>(seq: S, pssm: M, scores: &mut StripedScores<<Avx2 as Backend>::LANES>)
     where
-        S: AsRef<StripedSequence<Dna, <__m256i as Vector>::LANES>>,
+        S: AsRef<StripedSequence<Dna, <Avx2 as Backend>::LANES>>,
         M: AsRef<ScoringMatrix<Dna>>,
     {
         let seq = seq.as_ref();
         let pssm = pssm.as_ref();
-        let result = &mut scores.data;
 
         if seq.wrap < pssm.len() - 1 {
             panic!(
@@ -197,17 +209,22 @@ impl Score<Dna, __m256i> for Pipeline<Dna, __m256i> {
                 pssm.len()
             );
         }
-        if result.rows() < (seq.data.rows() - seq.wrap) {
-            panic!("not enough rows for scores: {}", pssm.len());
-        }
 
-        scores.length = seq.length - pssm.len() + 1;
+        scores.resize(seq.length - pssm.len() + 1, seq.data.rows() - seq.wrap);
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
         unsafe {
-            score_avx2(seq, pssm, scores);
-        }
+            score_avx2(seq, pssm, scores)
+        };
+        #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+        panic!("attempting to run AVX2 code on a non-x86 host")
     }
 
-    fn best_position(scores: &StripedScores<<__m256i as Vector>::LANES>) -> Option<usize> {
-        unsafe { best_position_avx2(scores) }
+    pub fn best_position(scores: &StripedScores<<Avx2 as Backend>::LANES>) -> Option<usize> {
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        unsafe {
+            best_position_avx2(scores)
+        }
+        #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+        panic!("attempting to run AVX2 code on a non-x86 host")
     }
 }
diff --git a/lightmotif/src/pli/platform/generic.rs b/lightmotif/src/pli/platform/generic.rs
new file mode 100644
index 0000000..50beef4
--- /dev/null
+++ b/lightmotif/src/pli/platform/generic.rs
@@ -0,0 +1,28 @@
+use typenum::consts::U1;
+use typenum::marker_traits::NonZero;
+use typenum::marker_traits::Unsigned;
+
+use super::Backend;
+use crate::abc::Alphabet;
+use crate::abc::Dna;
+use crate::abc::Nucleotide;
+use crate::abc::Symbol;
+use crate::pli::scores::StripedScores;
+use crate::pli::BestPosition;
+use crate::pli::Pipeline;
+use crate::pli::Score;
+use crate::pli::Vector;
+use crate::pwm::ScoringMatrix;
+use crate::seq::StripedSequence;
+
+/// A marker type for the generic implementation of the pipeline.
+#[derive(Clone, Debug, Default)]
+pub struct Generic;
+
+impl Backend for Generic {
+    type LANES = U1;
+}
+
+impl<A: Alphabet, C: NonZero + Unsigned> Score<A, C> for Generic {}
+
+impl<C: NonZero + Unsigned> BestPosition<C> for Generic {}
diff --git a/lightmotif/src/pli/platform/mod.rs b/lightmotif/src/pli/platform/mod.rs
new file mode 100644
index 0000000..30e3112
--- /dev/null
+++ b/lightmotif/src/pli/platform/mod.rs
@@ -0,0 +1,19 @@
+mod avx2;
+mod generic;
+mod sse2;
+
+pub use self::avx2::Avx2;
+pub use self::generic::Generic;
+pub use self::sse2::Sse2;
+
+use typenum::marker_traits::NonZero;
+use typenum::marker_traits::Unsigned;
+
+/// A marker trait for backends.
+pub trait Backend {
+    type LANES: Unsigned + NonZero;
+}
+
+/// An error marker when a pipeline backend is unsupported on the host platform.
+#[derive(Debug, Clone)]
+pub struct UnsupportedBackend;
diff --git a/lightmotif/src/pli/sse2.rs b/lightmotif/src/pli/platform/sse2.rs
similarity index 71%
rename from lightmotif/src/pli/sse2.rs
rename to lightmotif/src/pli/platform/sse2.rs
index 78838ef..3ac41d7 100644
--- a/lightmotif/src/pli/sse2.rs
+++ b/lightmotif/src/pli/platform/sse2.rs
@@ -1,24 +1,38 @@
+//! Intel 128-bit vector implementation, for 16 elements column width.
 use std::arch::x86_64::*;
 
-use typenum::Unsigned;
+use typenum::consts::U16;
+use typenum::marker_traits::Unsigned;
 
-use super::Pipeline;
-use super::Score;
-use super::StripedScores;
-use super::Vector;
+use super::Backend;
 use crate::abc::Alphabet;
 use crate::abc::Dna;
 use crate::abc::Nucleotide;
 use crate::abc::Symbol;
+use crate::pli::scores::StripedScores;
+use crate::pli::BestPosition;
+use crate::pli::Pipeline;
+use crate::pli::Score;
+use crate::pli::Vector;
 use crate::pwm::ScoringMatrix;
 use crate::seq::StripedSequence;
 
+/// A marker type for the SSE2 implementation of the pipeline.
+#[derive(Clone, Debug, Default)]
+pub struct Sse2;
+
+impl Backend for Sse2 {
+    type LANES = U16;
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[target_feature(enable = "sse2")]
 unsafe fn score_sse2<A: Alphabet>(
-    seq: &StripedSequence<A, <__m128i as Vector>::LANES>,
+    seq: &StripedSequence<A, <Sse2 as Backend>::LANES>,
     pssm: &ScoringMatrix<A>,
-    scores: &mut StripedScores<<__m128i as Vector>::LANES>,
+    scores: &mut StripedScores<<Sse2 as Backend>::LANES>,
 ) {
+    let data = scores.matrix_mut();
     // mask vectors for broadcasting uint8x16_t to uint32x4_t to floatx4_t
     let zero = _mm_setzero_si128();
     // process every position of the sequence data
@@ -55,7 +69,7 @@ unsafe fn score_sse2<A: Alphabet>(
             }
         }
         // record the score for the current position
-        let row = &mut scores.data[i];
+        let row = &mut data[i];
         _mm_storeu_ps(row[0..].as_mut_ptr(), s1);
         _mm_storeu_ps(row[4..].as_mut_ptr(), s2);
         _mm_storeu_ps(row[8..].as_mut_ptr(), s3);
@@ -63,11 +77,13 @@ unsafe fn score_sse2<A: Alphabet>(
     }
 }
 
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 #[target_feature(enable = "sse2")]
-unsafe fn best_position_sse2(scores: &StripedScores<<__m128i as Vector>::LANES>) -> Option<usize> {
-    if scores.length == 0 {
+unsafe fn best_position_sse2(scores: &StripedScores<<Sse2 as Backend>::LANES>) -> Option<usize> {
+    if scores.len() == 0 {
         None
     } else {
+        let data = scores.matrix();
         unsafe {
             // the row index for the best score in each column
             // (these are 32-bit integers but for use with `_mm256_blendv_ps`
@@ -77,12 +93,12 @@ unsafe fn best_position_sse2(scores: &StripedScores<<__m128i as Vector>::LANES>)
             let mut p3 = _mm_setzero_ps();
             let mut p4 = _mm_setzero_ps();
             // store the best scores for each column
-            let mut s1 = _mm_load_ps(scores.data[0][0x00..].as_ptr());
-            let mut s2 = _mm_load_ps(scores.data[0][0x04..].as_ptr());
-            let mut s3 = _mm_load_ps(scores.data[0][0x08..].as_ptr());
-            let mut s4 = _mm_load_ps(scores.data[0][0x0c..].as_ptr());
+            let mut s1 = _mm_load_ps(data[0][0x00..].as_ptr());
+            let mut s2 = _mm_load_ps(data[0][0x04..].as_ptr());
+            let mut s3 = _mm_load_ps(data[0][0x08..].as_ptr());
+            let mut s4 = _mm_load_ps(data[0][0x0c..].as_ptr());
             // process all rows iteratively
-            for (i, row) in scores.data.iter().enumerate() {
+            for (i, row) in data.iter().enumerate() {
                 // record the current row index
                 let index = _mm_castsi128_ps(_mm_set1_epi32(i as i32));
                 // load scores for the current row
@@ -119,9 +135,9 @@ unsafe fn best_position_sse2(scores: &StripedScores<<__m128i as Vector>::LANES>)
             let mut best_pos = 0;
             let mut best_score = -f32::INFINITY;
             for (col, &row) in x.iter().enumerate() {
-                if scores.data[row as usize][col] > best_score {
-                    best_score = scores.data[row as usize][col];
-                    best_pos = col * scores.data.rows() + row as usize;
+                if data[row as usize][col] > best_score {
+                    best_score = data[row as usize][col];
+                    best_pos = col * data.rows() + row as usize;
                 }
             }
             Some(best_pos)
@@ -129,11 +145,14 @@ unsafe fn best_position_sse2(scores: &StripedScores<<__m128i as Vector>::LANES>)
     }
 }
 
-/// Intel 128-bit vector implementation, for 16 elements column width.
-impl<A: Alphabet> Score<A, __m128i> for Pipeline<A, __m128i> {
-    fn score_into<S, M>(seq: S, pssm: M, scores: &mut StripedScores<<__m128i as Vector>::LANES>)
-    where
-        S: AsRef<StripedSequence<A, <__m128i as Vector>::LANES>>,
+impl Sse2 {
+    pub fn score_into<A, S, M>(
+        seq: S,
+        pssm: M,
+        scores: &mut StripedScores<<Sse2 as Backend>::LANES>,
+    ) where
+        A: Alphabet,
+        S: AsRef<StripedSequence<A, <Sse2 as Backend>::LANES>>,
         M: AsRef<ScoringMatrix<A>>,
     {
         let seq = seq.as_ref();
@@ -145,17 +164,22 @@ impl<A: Alphabet> Score<A, __m128i> for Pipeline<A, __m128i> {
                 pssm.len()
             );
         }
-        if scores.data.rows() < (seq.data.rows() - seq.wrap) {
-            panic!("not enough rows for scores: {}", pssm.len());
-        }
 
-        scores.length = seq.length - pssm.len() + 1;
+        scores.resize(seq.length - pssm.len() + 1, seq.data.rows() - seq.wrap);
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
         unsafe {
             score_sse2(seq, pssm, scores);
         }
+        #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+        panic!("attempting to run SSE2 code on a non-x86 host")
     }
 
-    fn best_position(scores: &StripedScores<<__m128i as Vector>::LANES>) -> Option<usize> {
-        unsafe { best_position_sse2(scores) }
+    pub fn best_position(scores: &StripedScores<<Sse2 as Backend>::LANES>) -> Option<usize> {
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        unsafe {
+            best_position_sse2(scores)
+        }
+        #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+        panic!("attempting to run SSE2 code on a non-x86 host")
     }
 }
diff --git a/lightmotif/src/pli/scores.rs b/lightmotif/src/pli/scores.rs
new file mode 100644
index 0000000..134290a
--- /dev/null
+++ b/lightmotif/src/pli/scores.rs
@@ -0,0 +1,156 @@
+use std::iter::DoubleEndedIterator;
+use std::iter::ExactSizeIterator;
+use std::iter::FusedIterator;
+use std::ops::Index;
+use std::ops::Range;
+
+use typenum::marker_traits::NonZero;
+use typenum::marker_traits::Unsigned;
+
+use crate::abc::Alphabet;
+use crate::dense::DenseMatrix;
+use crate::pwm::ScoringMatrix;
+use crate::seq::StripedSequence;
+
+#[derive(Clone, Debug)]
+pub struct StripedScores<C: Unsigned + NonZero> {
+    data: DenseMatrix<f32, C>,
+    length: usize,
+}
+
+impl<C: Unsigned + NonZero> StripedScores<C> {
+    /// Create a new striped score matrix with the given length and data.
+    pub fn new(length: usize, data: DenseMatrix<f32, C>) -> Self {
+        Self { length, data }
+    }
+
+    /// Create an empty score matrix with the given length and row count.
+    pub fn empty(length: usize, rows: usize) -> Self {
+        Self::new(length, DenseMatrix::new(rows))
+    }
+
+    /// Return the number of scored positions.
+    pub fn len(&self) -> usize {
+        self.length
+    }
+
+    /// Return a reference to the striped matrix storing the scores.
+    pub fn matrix(&self) -> &DenseMatrix<f32, C> {
+        &self.data
+    }
+
+    /// Return a mutable reference to the striped matrix storing the scores.
+    pub fn matrix_mut(&mut self) -> &mut DenseMatrix<f32, C> {
+        &mut self.data
+    }
+
+    /// Create a new matrix large enough to store the scores of `pssm` applied to `seq`.
+    pub fn new_for<S, M, A>(seq: S, pssm: M) -> Self
+    where
+        A: Alphabet,
+        S: AsRef<StripedSequence<A, C>>,
+        M: AsRef<ScoringMatrix<A>>,
+    {
+        let seq = seq.as_ref();
+        let pssm = pssm.as_ref();
+        Self::empty(seq.length - pssm.len() + 1, seq.data.rows() - seq.wrap)
+    }
+
+    /// Resize the striped scores storage to the given length and number of rows.
+    pub fn resize(&mut self, length: usize, rows: usize) {
+        self.length = length;
+        self.data.resize(rows);
+    }
+
+    /// Resize the striped scores storage to store the scores of `pssm` applied to `seq`.
+    pub fn resize_for<S, M, A>(&mut self, seq: S, pssm: M)
+    where
+        A: Alphabet,
+        S: AsRef<StripedSequence<A, C>>,
+        M: AsRef<ScoringMatrix<A>>,
+    {
+        let seq = seq.as_ref();
+        let pssm = pssm.as_ref();
+        self.resize(seq.length - pssm.len() + 1, seq.data.rows() - seq.wrap);
+    }
+
+    /// Iterate over scores of individual sequence positions.
+    pub fn iter(&self) -> Iter<'_, C> {
+        Iter::new(&self)
+    }
+
+    /// Convert the striped scores to a vector of scores.
+    pub fn to_vec(&self) -> Vec<f32> {
+        self.iter().cloned().collect()
+    }
+}
+
+impl<C: Unsigned + NonZero> AsRef<DenseMatrix<f32, C>> for StripedScores<C> {
+    fn as_ref(&self) -> &DenseMatrix<f32, C> {
+        self.matrix()
+    }
+}
+
+impl<C: Unsigned + NonZero> AsMut<DenseMatrix<f32, C>> for StripedScores<C> {
+    fn as_mut(&mut self) -> &mut DenseMatrix<f32, C> {
+        self.matrix_mut()
+    }
+}
+
+impl<C: Unsigned + NonZero> Index<usize> for StripedScores<C> {
+    type Output = f32;
+    fn index(&self, index: usize) -> &f32 {
+        let col = index / self.data.rows();
+        let row = index % self.data.rows();
+        &self.data[row][col]
+    }
+}
+
+impl<C: Unsigned + NonZero> From<StripedScores<C>> for Vec<f32> {
+    fn from(scores: StripedScores<C>) -> Self {
+        scores.iter().cloned().collect()
+    }
+}
+
+// --- Iter --------------------------------------------------------------------
+
+pub struct Iter<'a, C: Unsigned + NonZero> {
+    scores: &'a StripedScores<C>,
+    indices: Range<usize>,
+}
+
+impl<'a, C: Unsigned + NonZero> Iter<'a, C> {
+    fn new(scores: &'a StripedScores<C>) -> Self {
+        Self {
+            scores,
+            indices: 0..scores.length,
+        }
+    }
+
+    fn get(&self, i: usize) -> &'a f32 {
+        let col = i / self.scores.data.rows();
+        let row = i % self.scores.data.rows();
+        &self.scores.data[row][col]
+    }
+}
+
+impl<'a, C: Unsigned + NonZero> Iterator for Iter<'a, C> {
+    type Item = &'a f32;
+    fn next(&mut self) -> Option<Self::Item> {
+        self.indices.next().map(|i| self.get(i))
+    }
+}
+
+impl<'a, C: Unsigned + NonZero> ExactSizeIterator for Iter<'a, C> {
+    fn len(&self) -> usize {
+        self.indices.len()
+    }
+}
+
+impl<'a, C: Unsigned + NonZero> FusedIterator for Iter<'a, C> {}
+
+impl<'a, C: Unsigned + NonZero> DoubleEndedIterator for Iter<'a, C> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.indices.next_back().map(|i| self.get(i))
+    }
+}
diff --git a/lightmotif/src/pli/vector.rs b/lightmotif/src/pli/vector.rs
new file mode 100644
index 0000000..dc5f16e
--- /dev/null
+++ b/lightmotif/src/pli/vector.rs
@@ -0,0 +1,49 @@
+use typenum::consts::U1;
+use typenum::consts::U16;
+use typenum::consts::U32;
+use typenum::consts::U4;
+use typenum::marker_traits::NonZero;
+use typenum::marker_traits::Unsigned;
+
+/// Trait for concrete vector implementations.
+///
+/// The trait is defined for the loading vector type, which has `LANES`
+/// lanes of `Item` values.
+pub trait Vector {
+    type Item;
+    type LANES: Unsigned + NonZero;
+}
+
+impl Vector for u8 {
+    type Item = u8;
+    type LANES = U1;
+}
+
+impl Vector for f32 {
+    type Item = f32;
+    type LANES = U1;
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+impl Vector for std::arch::x86_64::__m128i {
+    type Item = u8;
+    type LANES = U16;
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+impl Vector for std::arch::x86_64::__m256i {
+    type Item = u8;
+    type LANES = U32;
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+impl Vector for std::arch::x86_64::__m128 {
+    type Item = f32;
+    type LANES = U4;
+}
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+impl Vector for std::arch::x86_64::__m256 {
+    type Item = f32;
+    type LANES = U4;
+}
diff --git a/lightmotif/src/seq.rs b/lightmotif/src/seq.rs
index 970ed6b..fdd47f8 100644
--- a/lightmotif/src/seq.rs
+++ b/lightmotif/src/seq.rs
@@ -38,11 +38,13 @@ impl<A: Alphabet> EncodedSequence<A> {
     }
 
     /// Return the number of symbols in the sequence.
+    #[inline]
     pub fn len(&self) -> usize {
         self.data.len()
     }
 
     /// Iterate over the symbols in the sequence.
+    #[inline]
     pub fn iter(&self) -> impl IntoIterator<Item = &A::Symbol> {
         self.data.iter()
     }
diff --git a/lightmotif/tests/dna.rs b/lightmotif/tests/dna.rs
index 25d6d88..1809132 100644
--- a/lightmotif/tests/dna.rs
+++ b/lightmotif/tests/dna.rs
@@ -7,11 +7,13 @@ use std::arch::x86_64::__m128i;
 use std::arch::x86_64::__m256i;
 use std::str::FromStr;
 
+use lightmotif::pli::BestPosition;
+use lightmotif::pli::Generic;
+use lightmotif::pli::Score;
 use lightmotif::CountMatrix;
 use lightmotif::Dna;
 use lightmotif::EncodedSequence;
 use lightmotif::Pipeline;
-use lightmotif::Score;
 
 use typenum::U32;
 
@@ -52,7 +54,8 @@ fn test_score_generic() {
     let pssm = pwm.into();
 
     striped.configure(&pssm);
-    let result = Pipeline::<Dna, u8>::score(&striped, &pssm);
+    let pli = Pipeline::generic();
+    let result = pli.score(&striped, &pssm);
     let scores = result.to_vec();
 
     assert_eq!(scores.len(), EXPECTED.len());
@@ -83,8 +86,9 @@ fn test_best_position_generic() {
     let pssm = pwm.into();
 
     striped.configure(&pssm);
-    let result = Pipeline::<Dna, u8>::score(&striped, &pssm);
-    assert_eq!(Pipeline::<Dna, u8>::best_position(&result), Some(18));
+    let pli = Pipeline::generic();
+    let result = pli.score(&striped, &pssm);
+    assert_eq!(pli.best_position(&result), Some(18));
 }
 
 #[cfg(target_feature = "sse2")]
@@ -104,7 +108,8 @@ fn test_score_sse2() {
     let pssm = pwm.into();
 
     striped.configure(&pssm);
-    let result = Pipeline::<Dna, __m128i>::score(&striped, &pssm);
+    let pli = Pipeline::sse2().unwrap();
+    let result = pli.score(&striped, &pssm);
 
     // for i in 0..result.data.rows() {
     //     println!("i={} {:?}", i, &result.data[i]);
@@ -140,8 +145,9 @@ fn test_best_position_sse2() {
     let pssm = pwm.into();
 
     striped.configure(&pssm);
-    let result = Pipeline::<Dna, __m128i>::score(&striped, &pssm);
-    assert_eq!(Pipeline::<Dna, __m128i>::best_position(&result), Some(18));
+    let pli = Pipeline::sse2().unwrap();
+    let result = pli.score(&striped, &pssm);
+    assert_eq!(pli.best_position(&result), Some(18));
 }
 
 #[cfg(target_feature = "avx2")]
@@ -161,7 +167,8 @@ fn test_score_avx2() {
     let pssm = pwm.into();
 
     striped.configure(&pssm);
-    let result = Pipeline::<_, __m256i>::score(&striped, &pssm);
+    let pli = Pipeline::avx2().unwrap();
+    let result = pli.score(&striped, &pssm);
     let scores = result.to_vec();
 
     assert_eq!(scores.len(), EXPECTED.len());
@@ -193,6 +200,7 @@ fn test_best_position_avx2() {
     let pssm = pwm.into();
 
     striped.configure(&pssm);
-    let result = Pipeline::<Dna, __m256i>::score(&striped, &pssm);
-    assert_eq!(Pipeline::<Dna, __m256i>::best_position(&result), Some(18));
+    let pli = Pipeline::avx2().unwrap();
+    let result = pli.score(&striped, &pssm);
+    assert_eq!(pli.best_position(&result), Some(18));
 }
-- 
GitLab