From 740829b3a50ccd6ddc7145bd1af97e1c643e1a4d Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Wed, 10 Jul 2024 20:50:58 -0500 Subject: [PATCH] Fix OOB write in AVX-512 codepath If the block size is not divisible by 64, we could overrun the buffer. --- deps/obl/oblas_lite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/obl/oblas_lite.c b/deps/obl/oblas_lite.c index 7c3a943..9c76792 100644 --- a/deps/obl/oblas_lite.c +++ b/deps/obl/oblas_lite.c @@ -73,7 +73,7 @@ void obl_axpyb32_ref(u8 *a, u32 *b, u8 u, unsigned k) const __m128i uhi_128 = _mm_loadu_si128((__m128i *)u_hi); \ const __m512i urow_lo = _mm512_broadcast_i32x4(ulo_128); \ const __m512i urow_hi = _mm512_broadcast_i32x4(uhi_128); \ - __m512i *ap = (__m512i *)a, *ae = (__m512i *)(a + k - (k % sizeof(__m256i))), *bp = (__m512i *)b; \ + __m512i *ap = (__m512i *)a, *ae = (__m512i *)(a + k - (k % sizeof(__m512i))), *bp = (__m512i *)b; \ for (; ap < ae; ap++, bp++) { \ __m512i bx = _mm512_loadu_si512(bp); \ __m512i lo = _mm512_and_si512(bx, mask); \