Skip to content
Snippets Groups Projects
Commit 97bac0d7 authored by Martin Larralde's avatar Martin Larralde
Browse files

Use streaming intrinsics for storing data in AVX2 and SSE2 extensions

parent 2670dee0
No related branches found
No related tags found
No related merge requests found
......@@ -32,6 +32,7 @@ unsafe fn score_avx2(
scores: &mut StripedScores<<Avx2 as Backend>::LANES>,
) {
let data = scores.matrix_mut();
let mut rowptr = data[0].as_mut_ptr();
// constant vector for comparing unknown bases
let n = _mm256_set1_epi8(Nucleotide::N as i8);
// mask vectors for broadcasting uint8x32_t to uint32x8_t to floatx8_t
......@@ -107,11 +108,11 @@ unsafe fn score_avx2(
let r3 = _mm256_permute2f128_ps(s1, s2, 0x31);
let r4 = _mm256_permute2f128_ps(s3, s4, 0x31);
// record the score for the current position
let row = &mut data[i];
_mm256_store_ps(row[0x00..].as_mut_ptr(), r1);
_mm256_store_ps(row[0x08..].as_mut_ptr(), r2);
_mm256_store_ps(row[0x10..].as_mut_ptr(), r3);
_mm256_store_ps(row[0x18..].as_mut_ptr(), r4);
_mm256_stream_ps(rowptr.add(0x00), r1);
_mm256_stream_ps(rowptr.add(0x08), r2);
_mm256_stream_ps(rowptr.add(0x10), r3);
_mm256_stream_ps(rowptr.add(0x18), r4);
rowptr = rowptr.add(data.stride());
}
}
......
......@@ -82,10 +82,10 @@ unsafe fn score_sse2<A, C>(
}
// record the score for the current position
let row = &mut data[i];
_mm_storeu_ps(row[offset..].as_mut_ptr(), s1);
_mm_storeu_ps(row[offset + 4..].as_mut_ptr(), s2);
_mm_storeu_ps(row[offset + 8..].as_mut_ptr(), s3);
_mm_storeu_ps(row[offset + 12..].as_mut_ptr(), s4);
_mm_stream_ps(row[offset..].as_mut_ptr(), s1);
_mm_stream_ps(row[offset + 4..].as_mut_ptr(), s2);
_mm_stream_ps(row[offset + 8..].as_mut_ptr(), s3);
_mm_stream_ps(row[offset + 12..].as_mut_ptr(), s4);
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment