From 230396bc5bb53c058b3b6d86af2bb401a3837c84 Mon Sep 17 00:00:00 2001 From: Eddie-Wang1120 Date: Thu, 20 Jun 2024 00:12:58 +0800 Subject: [PATCH] update avx2 --- ggml-quants.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index a3633fc53afee..f45ece1f25836 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -3806,18 +3806,27 @@ void ggml_vec_dot_q2_2_q8_0(int n, float * restrict s, size_t bs, const void * r const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(y[i].d) ); - __m256i xq8 = _mm256_set_epi32( - (int)q22_grid[x[i].qs[7]], - (int)q22_grid[x[i].qs[6]], - (int)q22_grid[x[i].qs[5]], - (int)q22_grid[x[i].qs[4]], - (int)q22_grid[x[i].qs[3]], - (int)q22_grid[x[i].qs[2]], - (int)q22_grid[x[i].qs[1]], - (int)q22_grid[x[i].qs[0]] - ); - - __m256i yq8 = _mm256_loadu_si256((const __m256i*)(y[i].qs)); + __m128i xq8b = _mm_loadu_si64(x[i].qs); + __m256i xq8 = MM256_SET_M128I(xq8b, xq8b); + __m256i xq8l = _mm256_shuffle_epi8(xq8, _mm256_set_epi8(5, -1, 5, -1, 5, -1, 5, -1, + 4, -1, 4, -1, 4, -1, 4, -1, + 1, -1, 1, -1, 1, -1, 1, -1, + 0, -1, 0, -1, 0, -1, 0, -1)); + __m256i xq8h = _mm256_shuffle_epi8(xq8, _mm256_set_epi8(7, -1, 7, -1, 7, -1, 7, -1, + 6, -1, 6, -1, 6, -1, 6, -1, + 3, -1, 3, -1, 3, -1, 3, -1, + 2, -1, 2, -1, 2, -1, 2, -1)); + __m256i shift = _mm256_set_epi16(64, 16, 4, 1, + 64, 16, 4, 1, + 64, 16, 4, 1, + 64, 16, 4, 1); + xq8l = _mm256_mullo_epi16(xq8l, shift); + xq8h = _mm256_mullo_epi16(xq8h, shift); + xq8l = _mm256_srai_epi16(xq8l, 14); + xq8h = _mm256_srai_epi16(xq8h, 14); + xq8 = _mm256_packs_epi16(xq8l, xq8h); + + __m256i yq8 = _mm256_lddqu_si256((const __m256i*)(y[i].qs)); const __m256 q = mul_sum_i8_pairs_float(xq8, yq8); acc = _mm256_fmadd_ps( d, q, acc );