Skip to content
Snippets Groups Projects
Commit c7f4996e authored by Martin Larralde's avatar Martin Larralde
Browse files

Add tests comparing to scores obtained with Biopython

parent a8a18027
No related branches found
No related tags found
No related merge requests found
extern crate fastpwm;
#[cfg(target_feature = "avx2")]
use std::arch::x86_64::__m256;
use fastpwm::Alphabet;
use fastpwm::CountMatrix;
use fastpwm::DnaAlphabet;
use fastpwm::EncodedSequence;
use fastpwm::Pipeline;
const SEQUENCE: &'static str = "ATGTCCCAACAACGATACCCCGAGCCCATCGCCGTCATCGGCTCGGCATGCAGATTCCCAGGCG";
const PATTERNS: &[&'static str] = &["GTTGACCTTATCAAC", "GTTGATCCAGTCAAC"];
// scores computed with Bio.motifs
#[rustfmt::skip]
const EXPECTED: &[f32] = &[
-23.07094 , -18.678621 , -15.219191 , -17.745737 , -18.678621 ,
-23.07094 , -17.745737 , -19.611507 , -27.463257 , -29.989803 ,
-14.286304 , -26.53037 , -15.219191 , -10.826873 , -10.826873 ,
-22.138054 , -38.774437 , -30.922688 , -5.50167 , -24.003826 ,
-18.678621 , -15.219191 , -35.315006 , -17.745737 , -10.826873 ,
-30.922688 , -23.07094 , -6.4345555, -31.855574 , -23.07094 ,
-15.219191 , -31.855574 , -8.961102 , -26.53037 , -27.463257 ,
-14.286304 , -15.219191 , -26.53037 , -23.07094 , -18.678621 ,
-14.286304 , -18.678621 , -26.53037 , -16.152077 , -17.745737 ,
-18.678621 , -17.745737 , -14.286304 , -30.922688 , -18.678621
];
#[test]
fn test_score_generic() {
let encoded = EncodedSequence::<DnaAlphabet>::from_text(SEQUENCE).unwrap();
let striped = encoded.to_striped::<2>();
let cm = CountMatrix::<DnaAlphabet, { DnaAlphabet::K }>::from_sequences(
"MX000001",
PATTERNS.iter().map(|x| EncodedSequence::from_text(x).unwrap()),
)
.unwrap();
let pbm = cm.to_probability([0.1, 0.1, 0.1, 0.1, 0.0]);
let pwm = pbm.to_weight([0.25, 0.25, 0.25, 0.25, 0.0]);
let pli = Pipeline::<_, f32>::new();
let result = pli.score(&striped, &pwm);
let scores = result.to_vec();
assert_eq!(scores.len(), EXPECTED.len());
for i in 0..scores.len() {
assert!(
(scores[i] - EXPECTED[i]).abs() < 1e-5,
"{} != {} at position {}",
scores[i],
EXPECTED[i],
i
);
}
// assert_eq!(result.data[0][0], -23.07094); // -23.07094
}
#[cfg(target_feature = "avx2")]
#[test]
fn test_score_avx2() {
let seq = "ATGTCCCAACAACGATACCCCGAGCCCATCGCCGTCATCGGCTCGGCATGCAGATTCCCAGGCGCATCCAGCTCGCCCTCCAAGCTGTGGAGTCTGCTCCAGGAACCTCGCGACGTCCTCAAGAAGTTCGACCCAGACCGCCTCAACCTGAAACGATTCCATCATACCAACGGTGACACTCACGGTGCGACCGACGTCAACAACAAATCATATCTCCTCGAAGAAAACACCCGACTCTTCGATGCCTCGTTCTTCGGAATCAGCCCCCTGGAGGCGGCCGGTATGGACCCCCAGCAGCGTCTGTTGCTGGAAACCGTCTACGAGTCGTTTGAGGCGGCTGGCGTGACCCTCGATCAGCTCAAGGGTTCTTTGACCTCGGTTCATGTTGGCGTCATGACCAACGACTACTCCTTTATCCAGCTCCGTGACCCAGAAACGCTGTCGAAGTACAACGCGACTGGCACGGCCAACAGCATCATGTCGAACCGTATTTCATATGTCTTTGACTTGAAAGGTCCATCAGAGACCATCGACACGGCGTGCTCCAGCTCGCTGGTCGCCCTGCACCACGCTGCTCAGGGCCTGCTCAGCGGCGACTGCGAGACTGCCGTCGTCGCCGGCGTCAACCTCATCTTCGACCCCTCTCCATACATCACAGAGTCCAAGCTACACATGCTGTCACCCGACTCCCAGTCTCGCATGTGGGACAAGTCTGCAAATGGCTACGCCCGCGGCGAGGGCGCTGCCGCGCTGCTCCTGAAGCCCCTCAGCCGCGCCCTGAGGGACGGCGATCACATCGAGGGCATTGTCCGAGGCACAGGAGTCAACTCGGACGGCCAGAGCTCCGGCATCACCATGCCTTTTGCCCCTGCGCAGTCGGCGCTCATTCGCCAAACTTATCTCCGTGCTGGCCTCGACCCGATCAAGGACCGGCCTCAGTACTTCGAGTGCCACGGCACCGGAACTCCAGCTGGTGACCCCGTGGAAGCGCGAGCCATCAGCGAGTCGTTGTTGGACGGTGAAAATGTCCCAACAACGATACCCCGAGCCCATCGCCGTCATCGGCTCGGCATGCAGATTCCCAGGCGCATCCAGCTCGCCCTCCAAGCTGTGGAGTCTGCTCCAGGAACCTCGCGACGTCCTCAAGAAGTTCGACCCAGACCGCCTCAACCTGAAACGATTCCATCATACCAACGGTGACACTCACGGTGCGACCGACGTCAACAACAAATCATATCTCCTCGAAGAAAACACCCGACTCTTCGATGCCTCGTTCTTCGGAATCAGCCCCCTGGAGGCGGCCGGTATGGACCCCCAGCAGCGTCTGTTGCTGGAAACCGTCTACGAGTCGTTTGAGGCGGCTGGCGTGACCCTCGATCAGCTCAAGGGTTCTTTGACCTCGGTTCATGTTGGCGTCATGACCAACGACTACTCCTTTATCCAGCTCCGTGACCCAGAAACGCTGTCGAAGTACAACGCGACTGGCACGGCCAACAGCATCATGTCGAACCGTATTTCATATGTCTTTGACTTGAAAGGTCCATCAGAGACCATCGACACGGCGTGCTCCAGCTCGCTGGTCGCCCTGCACCACGCTGCTCAGGGCCTGCTCAGCGGCGACTGCGAGACTGCCGTCGTCGCCGGCGTCAACCTCATCTTCGACCCCTCTCCATACATCACAGAGTCCAAGCTACACATGCTGTCACCCGACTCCCAGTCTCGCATGTGGGACAAGTCTGCAAATGGCTACGCCCGCGGCGAGGGCGCTGCCGCGCTGCTCCTGAAGCCCCTCAGCCGCGCCCTGAGGGACGGCGATCACATCGAGGGCATTGTCCGAGGCACAGGAGTCAACTCGGACGGCCAGAGCTCCGGCATCACCATGCCTTTTGCCCCTGCGCAGTCGGCGCTCATTCGCCAAACTTATCTCCGTGCTGGCCTCGACCCGATCAAGGACCGGCCTCAGTACTTCGAGTGCCACGGCACCGGAACTCCAGCTGGTGACCCCGTGGAAGCGCGAGCCATCAGCGAGTCGTTGTTGGACGGTGAAA";
let encoded = EncodedSequence::<DnaAlphabet>::from_text(&seq[..]).unwrap();
let striped = encoded.to_striped::<{ std::mem::size_of::<__m256>() }>();
let cm = CountMatrix::<DnaAlphabet, { DnaAlphabet::K }>::from_sequences(
"MX000001",
PATTERNS.iter().map(|x| EncodedSequence::from_text(x).unwrap()),
)
.unwrap();
let pbm = cm.to_probability([0.1, 0.1, 0.1, 0.1, 0.0]);
let pwm = pbm.to_weight([0.25, 0.25, 0.25, 0.25, 0.0]);
let pli = Pipeline::<_, __m256>::new();
let result = pli.score(&striped, &pwm);
let scores = result.to_vec();
// assert_eq!(scores.len(), seq.len() - cm.len() + 1);
// assert_eq!(scores[0], -23.07094); // -23.07094
for i in 0..EXPECTED.len() {
assert!(
(scores[i] - EXPECTED[i]).abs() < 1e-5,
"{} != {} at position {}",
scores[i],
EXPECTED[i],
i
);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment