From 4183147db3fd62bf4273ded6ac6f70e2ab358c41 Mon Sep 17 00:00:00 2001 From: Martin Larralde <martin.larralde@embl.de> Date: Thu, 20 Jun 2024 23:01:16 +0200 Subject: [PATCH] Reorganise code for benchmarks --- lightmotif-bench/dna.rs | 80 ++++++++++++- lightmotif/benches/score.rs | 184 ++++++++++++++++++++--------- lightmotif/benches/threshold.rs | 198 +++++++++++++++++++++++--------- 3 files changed, 344 insertions(+), 118 deletions(-) diff --git a/lightmotif-bench/dna.rs b/lightmotif-bench/dna.rs index 7a1612d..e2c96ae 100644 --- a/lightmotif-bench/dna.rs +++ b/lightmotif-bench/dna.rs @@ -38,12 +38,16 @@ fn bench_scanner_max_by(bencher: &mut test::Bencher) { let pssm = pbm.to_scoring(bg); striped.configure(&pssm); + let mut best = 0; bencher.iter(|| { - Scanner::new(&pssm, &striped) + best = Scanner::new(&pssm, &striped) .max_by(|x, y| x.score.partial_cmp(&y.score).unwrap()) - .unwrap(); + .unwrap() + .position; }); bencher.bytes = seq.len() as u64; + + println!("best: {:?}", best); } #[bench] @@ -62,8 +66,11 @@ fn bench_scanner_best(bencher: &mut test::Bencher) { let pssm = pbm.to_scoring(bg); striped.configure(&pssm); - bencher.iter(|| Scanner::new(&pssm, &striped).best().unwrap()); + let mut best = 0; + bencher.iter(|| best = Scanner::new(&pssm, &striped).best().unwrap().position); bencher.bytes = seq.len() as u64; + + println!("best: {:?}", best); } fn bench_lightmotif<C: StrictlyPositive, P: Score<f32, Dna, C> + Maximum<f32, C>>( @@ -87,10 +94,44 @@ fn bench_lightmotif<C: StrictlyPositive, P: Score<f32, Dna, C> + Maximum<f32, C> let mut scores = StripedScores::empty(); scores.resize(striped.matrix().rows(), striped.len()); bencher.bytes = seq.len() as u64; + let mut best = 0; bencher.iter(|| { test::black_box(pli.score_into(&pssm, &striped, &mut scores)); - test::black_box(pli.argmax(&scores)); + best = scores.offset(test::black_box(pli.argmax(&scores).unwrap())); }); + + println!("best: {:?}", best); +} + +fn bench_lightmotif_discrete<C: StrictlyPositive, P: Score<u8, Dna, C> + Maximum<u8, C>>( + bencher: &mut test::Bencher, + pli: &P, +) { + let seq = &SEQUENCE[..N]; + let encoded = EncodedSequence::<Dna>::encode(seq).unwrap(); + let mut striped = Pipeline::generic().stripe(encoded); + + let bg = Background::<Dna>::uniform(); + let cm = CountMatrix::<Dna>::from_sequences([ + EncodedSequence::encode("GTTGACCTTATCAAC").unwrap(), + EncodedSequence::encode("GTTGATCCAGTCAAC").unwrap(), + ]) + .unwrap(); + let pbm = cm.to_freq(0.1); + let pssm = pbm.to_scoring(bg); + let dm = pssm.to_discrete(); + + striped.configure(&pssm); + let mut scores = StripedScores::empty(); + let mut best = 0; + scores.resize(striped.matrix().rows(), striped.len()); + bencher.bytes = seq.len() as u64; + bencher.iter(|| { + test::black_box(pli.score_into(&dm, &striped, &mut scores)); + best = scores.offset(test::black_box(pli.argmax(&scores).unwrap())); + }); + + println!("best: {:?}", best); } #[bench] @@ -119,6 +160,32 @@ fn bench_avx2(bencher: &mut test::Bencher) { bench_lightmotif(bencher, &pli); } +#[bench] +fn bench_discrete_generic(bencher: &mut test::Bencher) { + let pli = Pipeline::generic(); + bench_lightmotif_discrete::<U1, _>(bencher, &pli); +} + +#[cfg(target_feature = "sse2")] +#[bench] +fn bench_discrete_sse2(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench_lightmotif_discrete::<U16, _>(bencher, &pli); +} + +#[bench] +fn bench_discrete_dispatch(bencher: &mut test::Bencher) { + let pli = Pipeline::dispatch(); + bench_lightmotif_discrete(bencher, &pli); +} + +#[cfg(target_feature = "avx2")] +#[bench] +fn bench_discrete_avx2(bencher: &mut test::Bencher) { + let pli = Pipeline::avx2().unwrap(); + bench_lightmotif_discrete(bencher, &pli); +} + #[bench] fn bench_bio(bencher: &mut test::Bencher) { use bio::pattern_matching::pssm::DNAMotif; @@ -133,5 +200,8 @@ fn bench_bio(bencher: &mut test::Bencher) { .unwrap(); bencher.bytes = seq.len() as u64; - bencher.iter(|| test::black_box(pssm.score(seq.as_bytes()).unwrap())); + let mut best = 0; + bencher.iter(|| best = test::black_box(pssm.score(seq.as_bytes()).unwrap()).loc); + + println!("best: {:?}", best); } diff --git a/lightmotif/benches/score.rs b/lightmotif/benches/score.rs index 54c16ea..44ee5df 100644 --- a/lightmotif/benches/score.rs +++ b/lightmotif/benches/score.rs @@ -20,65 +20,135 @@ mod dna { const SEQUENCE: &str = include_str!("ecoli.txt"); - fn bench<C: StrictlyPositive, P: Score<f32, Dna, C>>(bencher: &mut test::Bencher, pli: &P) { - let encoded = EncodedSequence::<Dna>::encode(SEQUENCE).unwrap(); - let mut striped = Pipeline::generic().stripe(encoded); - - let cm = CountMatrix::<Dna>::from_sequences([ - EncodedSequence::encode("GTTGACCTTATCAAC").unwrap(), - EncodedSequence::encode("GTTGATCCAGTCAAC").unwrap(), - ]) - .unwrap(); - let pbm = cm.to_freq(0.1); - let pssm = pbm.to_scoring(None); - - striped.configure(&pssm); - let mut scores = StripedScores::empty(); - scores.resize(striped.matrix().rows(), striped.len()); - bencher.bytes = SEQUENCE.len() as u64; - bencher.iter(|| { - test::black_box(pli.score_into(&pssm, &striped, &mut scores)); - }); - } - - #[bench] - fn bench_generic(bencher: &mut test::Bencher) { - let pli = Pipeline::generic(); - bench::<U32, _>(bencher, &pli); - } - - #[bench] - fn bench_dispatch(bencher: &mut test::Bencher) { - let pli = Pipeline::dispatch(); - bench(bencher, &pli); + mod f32 { + use super::*; + + fn bench<C: StrictlyPositive, P: Score<f32, Dna, C>>(bencher: &mut test::Bencher, pli: &P) { + let encoded = EncodedSequence::<Dna>::encode(SEQUENCE).unwrap(); + let mut striped = Pipeline::generic().stripe(encoded); + + let cm = CountMatrix::<Dna>::from_sequences([ + EncodedSequence::encode("GTTGACCTTATCAAC").unwrap(), + EncodedSequence::encode("GTTGATCCAGTCAAC").unwrap(), + ]) + .unwrap(); + let pbm = cm.to_freq(0.1); + let pssm = pbm.to_scoring(None); + + striped.configure(&pssm); + let mut scores = StripedScores::empty(); + scores.resize(striped.matrix().rows(), striped.len()); + bencher.bytes = SEQUENCE.len() as u64; + bencher.iter(|| { + test::black_box(pli.score_into(&pssm, &striped, &mut scores)); + }); + } + + #[bench] + fn bench_generic(bencher: &mut test::Bencher) { + let pli = Pipeline::generic(); + bench::<U32, _>(bencher, &pli); + } + + #[bench] + fn bench_dispatch(bencher: &mut test::Bencher) { + let pli = Pipeline::dispatch(); + bench(bencher, &pli); + } + + #[cfg(target_feature = "sse2")] + #[bench] + fn bench_sse2(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench::<U16, _>(bencher, &pli); + } + + #[cfg(target_feature = "sse2")] + #[bench] + fn bench_sse2_32(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench::<U32, _>(bencher, &pli); + } + + #[cfg(target_feature = "avx2")] + #[bench] + fn bench_avx2(bencher: &mut test::Bencher) { + let pli = Pipeline::avx2().unwrap(); + bench(bencher, &pli); + } + + #[cfg(target_feature = "neon")] + #[bench] + fn bench_neon(bencher: &mut test::Bencher) { + let pli = Pipeline::neon().unwrap(); + bench::<U16, _>(bencher, &pli); + } } - #[cfg(target_feature = "sse2")] - #[bench] - fn bench_sse2(bencher: &mut test::Bencher) { - let pli = Pipeline::sse2().unwrap(); - bench::<U16, _>(bencher, &pli); - } - - #[cfg(target_feature = "sse2")] - #[bench] - fn bench_sse2_32(bencher: &mut test::Bencher) { - let pli = Pipeline::sse2().unwrap(); - bench::<U32, _>(bencher, &pli); - } - - #[cfg(target_feature = "avx2")] - #[bench] - fn bench_avx2(bencher: &mut test::Bencher) { - let pli = Pipeline::avx2().unwrap(); - bench(bencher, &pli); - } - - #[cfg(target_feature = "neon")] - #[bench] - fn bench_neon(bencher: &mut test::Bencher) { - let pli = Pipeline::neon().unwrap(); - bench::<U16, _>(bencher, &pli); + mod u8 { + use super::*; + + fn bench<C: StrictlyPositive, P: Score<u8, Dna, C>>(bencher: &mut test::Bencher, pli: &P) { + let encoded = EncodedSequence::<Dna>::encode(SEQUENCE).unwrap(); + let mut striped = Pipeline::generic().stripe(encoded); + + let cm = CountMatrix::<Dna>::from_sequences([ + EncodedSequence::encode("GTTGACCTTATCAAC").unwrap(), + EncodedSequence::encode("GTTGATCCAGTCAAC").unwrap(), + ]) + .unwrap(); + let pbm = cm.to_freq(0.1); + let pssm = pbm.to_scoring(None); + let dm = pssm.to_discrete(); + + striped.configure(&pssm); + let mut scores = StripedScores::empty(); + scores.resize(striped.matrix().rows(), striped.len()); + bencher.bytes = SEQUENCE.len() as u64; + bencher.iter(|| { + test::black_box(pli.score_into(&dm, &striped, &mut scores)); + }); + } + + #[bench] + fn bench_generic(bencher: &mut test::Bencher) { + let pli = Pipeline::generic(); + bench::<U32, _>(bencher, &pli); + } + + #[bench] + fn bench_dispatch(bencher: &mut test::Bencher) { + let pli = Pipeline::dispatch(); + bench(bencher, &pli); + } + + #[cfg(target_feature = "sse2")] + #[bench] + fn bench_sse2(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench::<U16, _>(bencher, &pli); + } + + #[cfg(target_feature = "sse2")] + #[bench] + fn bench_sse2_32(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench::<U32, _>(bencher, &pli); + } + + #[cfg(target_feature = "avx2")] + #[bench] + fn bench_avx2(bencher: &mut test::Bencher) { + let pli = Pipeline::avx2().unwrap(); + bench(bencher, &pli); + } + + #[cfg(target_feature = "neon")] + #[bench] + fn bench_neon(bencher: &mut test::Bencher) { + let pli = Pipeline::neon().unwrap(); + bench::<U16, _>(bencher, &pli); + } } } diff --git a/lightmotif/benches/threshold.rs b/lightmotif/benches/threshold.rs index f377422..1e530e2 100644 --- a/lightmotif/benches/threshold.rs +++ b/lightmotif/benches/threshold.rs @@ -17,67 +17,153 @@ use lightmotif::seq::EncodedSequence; const SEQUENCE: &str = include_str!("ecoli.txt"); -fn bench<C: StrictlyPositive, P: Score<f32, Dna, C> + Threshold<f32, C>>( - bencher: &mut test::Bencher, - pli: &P, -) { - let encoded = EncodedSequence::<Dna>::encode(SEQUENCE).unwrap(); - let mut striped = Pipeline::generic().stripe(encoded); - - let cm = CountMatrix::<Dna>::from_sequences([ - EncodedSequence::encode("GTTGACCTTATCAAC").unwrap(), - EncodedSequence::encode("GTTGATCCAGTCAAC").unwrap(), - ]) - .unwrap(); - let pbm = cm.to_freq(0.1); - let pssm = pbm.to_scoring(None); - - striped.configure(&pssm); - let mut scores = StripedScores::empty(); - pli.score_into(&pssm, &striped, &mut scores); - - bencher.bytes = (std::mem::size_of::<f32>() * scores.matrix().rows() * C::USIZE) as u64; - bencher.iter(|| { - test::black_box(pli.threshold(&scores, 10.0)); - }); -} +mod f32 { -#[bench] -fn bench_generic(bencher: &mut test::Bencher) { - let pli = Pipeline::generic(); - bench::<U32, _>(bencher, &pli); -} + use super::*; -#[bench] -fn bench_dispatch(bencher: &mut test::Bencher) { - let pli = Pipeline::dispatch(); - bench(bencher, &pli); -} + fn bench<C: StrictlyPositive, P: Score<f32, Dna, C> + Threshold<f32, C>>( + bencher: &mut test::Bencher, + pli: &P, + ) { + let encoded = EncodedSequence::<Dna>::encode(SEQUENCE).unwrap(); + let mut striped = Pipeline::generic().stripe(encoded); -#[cfg(target_feature = "sse2")] -#[bench] -fn bench_sse2(bencher: &mut test::Bencher) { - let pli = Pipeline::sse2().unwrap(); - bench::<U16, _>(bencher, &pli); -} + let cm = CountMatrix::<Dna>::from_sequences([ + EncodedSequence::encode("GTTGACCTTATCAAC").unwrap(), + EncodedSequence::encode("GTTGATCCAGTCAAC").unwrap(), + ]) + .unwrap(); + let pbm = cm.to_freq(0.1); + let pssm = pbm.to_scoring(None); -#[cfg(target_feature = "sse2")] -#[bench] -fn bench_sse2_32(bencher: &mut test::Bencher) { - let pli = Pipeline::sse2().unwrap(); - bench::<U32, _>(bencher, &pli); -} + striped.configure(&pssm); + let mut scores = StripedScores::empty(); + pli.score_into(&pssm, &striped, &mut scores); + + bencher.bytes = (std::mem::size_of::<f32>() * scores.matrix().rows() * C::USIZE) as u64; + bencher.iter(|| { + test::black_box(pli.threshold(&scores, 10.0)); + }); + } -#[cfg(target_feature = "avx2")] -#[bench] -fn bench_avx2(bencher: &mut test::Bencher) { - let pli = Pipeline::avx2().unwrap(); - bench(bencher, &pli); + #[bench] + fn bench_generic(bencher: &mut test::Bencher) { + let pli = Pipeline::generic(); + bench::<U32, _>(bencher, &pli); + } + + #[bench] + fn bench_dispatch(bencher: &mut test::Bencher) { + let pli = Pipeline::dispatch(); + bench(bencher, &pli); + } + + #[cfg(target_feature = "sse2")] + #[bench] + fn bench_sse2(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench::<U16, _>(bencher, &pli); + } + + #[cfg(target_feature = "sse2")] + #[bench] + fn bench_sse2_32(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench::<U32, _>(bencher, &pli); + } + + #[cfg(target_feature = "avx2")] + #[bench] + fn bench_avx2(bencher: &mut test::Bencher) { + let pli = Pipeline::avx2().unwrap(); + bench(bencher, &pli); + } + + #[cfg(target_feature = "neon")] + #[bench] + fn bench_neon(bencher: &mut test::Bencher) { + let pli = Pipeline::neon().unwrap(); + bench::<U16, _>(bencher, &pli); + } } -#[cfg(target_feature = "neon")] -#[bench] -fn bench_neon(bencher: &mut test::Bencher) { - let pli = Pipeline::neon().unwrap(); - bench::<U16, _>(bencher, &pli); +mod u8 { + + use super::*; + + fn bench<C: StrictlyPositive, P: Score<u8, Dna, C> + Threshold<u8, C>>( + bencher: &mut test::Bencher, + pli: &P, + ) { + let encoded = EncodedSequence::<Dna>::encode(SEQUENCE).unwrap(); + let mut striped = Pipeline::generic().stripe(encoded); + + let cm = CountMatrix::<Dna>::from_sequences([ + EncodedSequence::encode("GTTGACCTTATCAAC").unwrap(), + EncodedSequence::encode("GTTGATCCAGTCAAC").unwrap(), + ]) + .unwrap(); + let pbm = cm.to_freq(0.1); + let pssm = pbm.to_scoring(None); + let dm = pssm.to_discrete(); + let threshold = dm.scale(10.0); + + striped.configure(&pssm); + let mut scores = StripedScores::empty(); + pli.score_into(&dm, &striped, &mut scores); + + let mut hits = Vec::new(); + bencher.bytes = (std::mem::size_of::<f32>() * scores.matrix().rows() * C::USIZE) as u64; + bencher.iter(|| { + hits.clear(); + for mc in pli.threshold(&scores, threshold) { + let i = scores.offset(mc); + if pssm.score_position(&striped, i) >= 10.0 { + hits.push(i); + } + } + }); + + println!("{:?}", hits.len()); + } + + #[bench] + fn bench_generic(bencher: &mut test::Bencher) { + let pli = Pipeline::generic(); + bench::<U32, _>(bencher, &pli); + } + + #[bench] + fn bench_dispatch(bencher: &mut test::Bencher) { + let pli = Pipeline::dispatch(); + bench(bencher, &pli); + } + + #[cfg(target_feature = "sse2")] + #[bench] + fn bench_sse2(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench::<U16, _>(bencher, &pli); + } + + #[cfg(target_feature = "sse2")] + #[bench] + fn bench_sse2_32(bencher: &mut test::Bencher) { + let pli = Pipeline::sse2().unwrap(); + bench::<U32, _>(bencher, &pli); + } + + #[cfg(target_feature = "avx2")] + #[bench] + fn bench_avx2(bencher: &mut test::Bencher) { + let pli = Pipeline::avx2().unwrap(); + bench(bencher, &pli); + } + + #[cfg(target_feature = "neon")] + #[bench] + fn bench_neon(bencher: &mut test::Bencher) { + let pli = Pipeline::neon().unwrap(); + bench::<U16, _>(bencher, &pli); + } } -- GitLab