Skip to content
Snippets Groups Projects
Commit 6097e4e1 authored by Martin Larralde's avatar Martin Larralde
Browse files

Add sequence and alphabet interfaces

parent 43e05da5
No related branches found
No related tags found
No related merge requests found
use std::convert::TryFrom;
use std::fmt::Debug;
pub struct InvalidSymbol(char);
/// Common traits for a biological alphabet.
pub trait Alphabet: Debug + Copy + Default {
type Symbol: Default + Sized + Copy + TryFrom<char>;
}
#[derive(Clone, Copy)]
#[repr(u8)]
pub enum DnaSymbol {
A = 0,
C = 1,
T = 2,
G = 3,
N = 4,
}
impl TryFrom<char> for DnaSymbol {
type Error = InvalidSymbol;
fn try_from(c: char) -> Result<Self, Self::Error> {
match c {
'A' => Ok(DnaSymbol::A),
'C' => Ok(DnaSymbol::C),
'T' => Ok(DnaSymbol::T),
'G' => Ok(DnaSymbol::G),
'N' => Ok(DnaSymbol::N),
_ => Err(InvalidSymbol(c)),
}
}
}
impl Default for DnaSymbol {
fn default() -> DnaSymbol {
DnaSymbol::N
}
}
#[derive(Default, Debug, Clone, Copy)]
pub struct DnaAlphabet;
impl Alphabet for DnaAlphabet {
type Symbol = DnaSymbol;
}
\ No newline at end of file
//! Fast position-weight matrices using sequence striping and SIMD.
mod abc;
mod matrix;
mod seq;
use super::abc::Alphabet;
use super::abc::InvalidSymbol;
use super::matrix::DenseMatrix;
#[derive(Clone, Debug)]
pub struct EncodedSequence<A: Alphabet> {
pub alphabet: A,
pub data: Vec<A::Symbol>,
}
impl<A: Alphabet> EncodedSequence<A> {
/// Create a new encoded sequence from a textual representation.
pub fn from_text(sequence: &str) -> Result<Self, InvalidSymbol>
where InvalidSymbol: From<<A::Symbol as TryFrom<char>>::Error>
{
let data = sequence.chars()
.map(|c| A::Symbol::try_from(c))
.collect::<Result<_, _>>()?;
Ok(Self {
data,
alphabet: Default::default(),
})
}
/// Convert the encoded sequence to a striped matrix.
pub fn to_striped<const C: usize>(&self) -> StripedSequence<A, C> {
let length = self.data.len();
let n = (length + C) / C;
let mut data = DenseMatrix::new(n);
for (i, &x) in self.data.iter().enumerate() {
data[i%n][i/n] = x;
}
StripedSequence {
alphabet: self.alphabet,
data,
length,
}
}
}
#[derive(Clone, Debug)]
pub struct StripedSequence<A: Alphabet, const C: usize = 32> {
pub alphabet: A,
pub length: usize,
pub data: DenseMatrix<A::Symbol, C>,
}
impl<A: Alphabet, const C: usize> From<EncodedSequence<A>> for StripedSequence<A, C> {
fn from(encoded: EncodedSequence<A>) -> Self {
encoded.to_striped()
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment