From b5ac289658f523b97418c9a2314e50a0ec7231e1 Mon Sep 17 00:00:00 2001 From: Martin Larralde <martin.larralde@embl.de> Date: Fri, 15 Dec 2023 14:10:39 +0100 Subject: [PATCH] Add trait to count the number of symbol occurences in an arbitrary collection --- lightmotif/src/seq.rs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/lightmotif/src/seq.rs b/lightmotif/src/seq.rs index 984b794..8647365 100644 --- a/lightmotif/src/seq.rs +++ b/lightmotif/src/seq.rs @@ -18,6 +18,19 @@ use super::err::InvalidSymbol; use super::num::StrictlyPositive; use super::pwm::ScoringMatrix; +// --- SymbolCount ------------------------------------------------------------- + +/// A trait for counting the number of occurences of a symbol in a sequence. +pub trait SymbolCount<A: Alphabet> { + fn count_symbol(&self, symbol: <A as Alphabet>::Symbol) -> usize; +} + +impl<'a, A: Alphabet, T: IntoIterator<Item = &'a A::Symbol> + Copy> SymbolCount<A> for T { + fn count_symbol(&self, symbol: <A as Alphabet>::Symbol) -> usize { + self.into_iter().filter(|&&c| c == symbol).count() + } +} + // --- EncodedSequence --------------------------------------------------------- /// A biological sequence encoded with an alphabet. @@ -53,8 +66,8 @@ impl<A: Alphabet> EncodedSequence<A> { /// Iterate over the symbols in the sequence. #[inline] - pub fn iter(&self) -> impl IntoIterator<Item = &A::Symbol> { - self.data.iter() + pub fn iter(&self) -> <&[A::Symbol] as IntoIterator>::IntoIter { + self.data.as_slice().into_iter() } /// Convert the encoded sequence to a striped matrix. @@ -233,7 +246,7 @@ impl<A: Alphabet, C: StrictlyPositive> AsRef<StripedSequence<A, C>> for StripedS } } -impl<A: Alphabet, C: Unsigned + NonZero> Index<usize> for StripedSequence<A, C> { +impl<A: Alphabet, C: StrictlyPositive> Index<usize> for StripedSequence<A, C> { type Output = <A as Alphabet>::Symbol; #[inline] fn index(&self, index: usize) -> &Self::Output { @@ -244,6 +257,15 @@ impl<A: Alphabet, C: Unsigned + NonZero> Index<usize> for StripedSequence<A, C> } } +impl<A: Alphabet, C: StrictlyPositive> SymbolCount<A> for &StripedSequence<A, C> { + fn count_symbol(&self, symbol: <A as Alphabet>::Symbol) -> usize { + self.data + .iter() + .map(|row| SymbolCount::<A>::count_symbol(&row, symbol)) + .sum() + } +} + #[cfg(test)] mod test { use typenum::consts::U2; -- GitLab