From 6b0b37b33579b3393cdd196fd700fe1bc8a7143b Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Mon, 26 Feb 2024 14:46:27 -0800 Subject: [PATCH 1/7] Use smart pivot in key-value --- src/avx512-64bit-common.h | 12 ++++++++++++ src/xss-common-keyvaluesort.hpp | 5 ++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/avx512-64bit-common.h b/src/avx512-64bit-common.h index 689c317..fcbb8d1 100644 --- a/src/avx512-64bit-common.h +++ b/src/avx512-64bit-common.h @@ -34,6 +34,7 @@ struct ymm_vector { using opmask_t = __mmask8; static const uint8_t numlanes = 8; static constexpr simd_type vec_type = simd_type::AVX512; + using swizzle_ops = avx512_ymm_64bit_swizzle_ops; using swizzle_ops = avx512_ymm_64bit_swizzle_ops; @@ -210,6 +211,9 @@ struct ymm_vector { { return _mm256_castps_si256(v); } + static bool all_false(opmask_t k){ + return k == 0; + } static reg_t reverse(reg_t ymm) { const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2); @@ -232,6 +236,7 @@ struct ymm_vector { using opmask_t = __mmask8; static const uint8_t numlanes = 8; static constexpr simd_type vec_type = simd_type::AVX512; + using swizzle_ops = avx512_ymm_64bit_swizzle_ops; using swizzle_ops = avx512_ymm_64bit_swizzle_ops; @@ -394,6 +399,9 @@ struct ymm_vector { { return v; } + static bool all_false(opmask_t k){ + return k == 0; + } static reg_t reverse(reg_t ymm) { const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2); @@ -416,6 +424,7 @@ struct ymm_vector { using opmask_t = __mmask8; static const uint8_t numlanes = 8; static constexpr simd_type vec_type = simd_type::AVX512; + using swizzle_ops = avx512_ymm_64bit_swizzle_ops; using swizzle_ops = avx512_ymm_64bit_swizzle_ops; @@ -578,6 +587,9 @@ struct ymm_vector { { return v; } + static bool all_false(opmask_t k){ + return k == 0; + } static reg_t reverse(reg_t ymm) { const __m256i rev_index = _mm256_set_epi32(NETWORK_32BIT_AVX2_2); diff --git a/src/xss-common-keyvaluesort.hpp b/src/xss-common-keyvaluesort.hpp index 88552ce..b61f5b1 100644 --- a/src/xss-common-keyvaluesort.hpp +++ b/src/xss-common-keyvaluesort.hpp @@ -392,7 +392,10 @@ X86_SIMD_SORT_INLINE void kvsort_(type1_t *keys, return; } - type1_t pivot = get_pivot_blocks(keys, left, right); + type1_t pivot; + auto pivot_result = get_pivot_smart(keys, left, right); + pivot = pivot_result.pivot; + type1_t smallest = vtype1::type_max(); type1_t biggest = vtype1::type_min(); arrsize_t pivot_index = kvpartition_unrolled( From 84e16665479c4d520758b54544d446f0bb6ce264 Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 27 Feb 2024 09:20:48 -0800 Subject: [PATCH 2/7] format fixes --- src/avx512-64bit-common.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/avx512-64bit-common.h b/src/avx512-64bit-common.h index fcbb8d1..f650c24 100644 --- a/src/avx512-64bit-common.h +++ b/src/avx512-64bit-common.h @@ -211,7 +211,8 @@ struct ymm_vector { { return _mm256_castps_si256(v); } - static bool all_false(opmask_t k){ + static bool all_false(opmask_t k) + { return k == 0; } static reg_t reverse(reg_t ymm) @@ -399,7 +400,8 @@ struct ymm_vector { { return v; } - static bool all_false(opmask_t k){ + static bool all_false(opmask_t k) + { return k == 0; } static reg_t reverse(reg_t ymm) @@ -587,7 +589,8 @@ struct ymm_vector { { return v; } - static bool all_false(opmask_t k){ + static bool all_false(opmask_t k) + { return k == 0; } static reg_t reverse(reg_t ymm) From e2ff7056f0e170fab3df98cce4d6852da6ec874e Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 27 Feb 2024 14:57:14 -0800 Subject: [PATCH 3/7] Bug in rand_array.h --- utils/rand_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/rand_array.h b/utils/rand_array.h index cb99da2..74de254 100644 --- a/utils/rand_array.h +++ b/utils/rand_array.h @@ -137,7 +137,7 @@ static std::vector get_array(std::string arrtype, val = std::numeric_limits::max(); } for (size_t ii = 1; ii <= arrsize; ++ii) { - if (rand() % 0x1) { arr[ii] = val; } + if (rand() & 0x1) { arr[ii] = val; } } } else { From 1b0a37477a8b5be5e02f98f224a65047eee8151b Mon Sep 17 00:00:00 2001 From: Raghuveer Devulapalli Date: Tue, 27 Feb 2024 15:04:07 -0800 Subject: [PATCH 4/7] Remove dead code --- src/xss-pivot-selection.hpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/xss-pivot-selection.hpp b/src/xss-pivot-selection.hpp index 6ce0b88..c09dfc6 100644 --- a/src/xss-pivot-selection.hpp +++ b/src/xss-pivot-selection.hpp @@ -148,12 +148,7 @@ get_pivot_smart(type_t *arr, const arrsize_t left, const arrsize_t right) return pivot_results( comparator::choosePivotMedianIsLargest(median)); } - else { - // Should be unreachable - return pivot_results(median); - } - // Should be unreachable return pivot_results(median); } From f49088f92c39c81d0e3b9d0909d94ca57d954090 Mon Sep 17 00:00:00 2001 From: Matthew Sterrett Date: Wed, 2 Oct 2024 10:40:16 -0700 Subject: [PATCH 5/7] Adds missing functions to AVX2 half vectors --- src/avx2-32bit-half.hpp | 28 +++++++++++++++++++++++++++- src/avx512-64bit-common.h | 3 --- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/avx2-32bit-half.hpp b/src/avx2-32bit-half.hpp index 9100cbb..9e782bb 100644 --- a/src/avx2-32bit-half.hpp +++ b/src/avx2-32bit-half.hpp @@ -64,6 +64,11 @@ struct avx2_half_vector { { return _mm_set1_epi32(type_max()); } // TODO: this should broadcast bits as is? + static opmask_t knot_opmask(opmask_t x) + { + auto allOnes = seti(-1, -1, -1, -1); + return _mm_xor_si128(x, allOnes); + } static opmask_t get_partial_loadmask(uint64_t num_to_read) { auto mask = ((0x1ull << num_to_read) - 0x1ull); @@ -186,6 +191,10 @@ struct avx2_half_vector { { return v; } + static bool all_false(opmask_t k) + { + return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0; + } static int double_compressstore(type_t *left_addr, type_t *right_addr, opmask_t k, @@ -218,6 +227,11 @@ struct avx2_half_vector { { return _mm_set1_epi32(type_max()); } + static opmask_t knot_opmask(opmask_t x) + { + auto allOnes = seti(-1, -1, -1, -1); + return _mm_xor_si128(x, allOnes); + } static opmask_t get_partial_loadmask(uint64_t num_to_read) { auto mask = ((0x1ull << num_to_read) - 0x1ull); @@ -331,6 +345,10 @@ struct avx2_half_vector { { return v; } + static bool all_false(opmask_t k) + { + return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0; + } static int double_compressstore(type_t *left_addr, type_t *right_addr, opmask_t k, @@ -363,7 +381,11 @@ struct avx2_half_vector { { return _mm_set1_ps(type_max()); } - + static opmask_t knot_opmask(opmask_t x) + { + auto allOnes = seti(-1, -1, -1, -1); + return _mm_xor_si128(x, allOnes); + } static regi_t seti(int v1, int v2, int v3, int v4) { return _mm_set_epi32(v1, v2, v3, v4); @@ -492,6 +514,10 @@ struct avx2_half_vector { { return _mm_castps_si128(v); } + static bool all_false(opmask_t k) + { + return _mm_movemask_ps(_mm_castsi128_ps(k)) == 0; + } static int double_compressstore(type_t *left_addr, type_t *right_addr, opmask_t k, diff --git a/src/avx512-64bit-common.h b/src/avx512-64bit-common.h index f650c24..14201d1 100644 --- a/src/avx512-64bit-common.h +++ b/src/avx512-64bit-common.h @@ -34,7 +34,6 @@ struct ymm_vector { using opmask_t = __mmask8; static const uint8_t numlanes = 8; static constexpr simd_type vec_type = simd_type::AVX512; - using swizzle_ops = avx512_ymm_64bit_swizzle_ops; using swizzle_ops = avx512_ymm_64bit_swizzle_ops; @@ -237,7 +236,6 @@ struct ymm_vector { using opmask_t = __mmask8; static const uint8_t numlanes = 8; static constexpr simd_type vec_type = simd_type::AVX512; - using swizzle_ops = avx512_ymm_64bit_swizzle_ops; using swizzle_ops = avx512_ymm_64bit_swizzle_ops; @@ -426,7 +424,6 @@ struct ymm_vector { using opmask_t = __mmask8; static const uint8_t numlanes = 8; static constexpr simd_type vec_type = simd_type::AVX512; - using swizzle_ops = avx512_ymm_64bit_swizzle_ops; using swizzle_ops = avx512_ymm_64bit_swizzle_ops; From acae7cfc54ef4a0b02c7911a2e2870d3315c6f72 Mon Sep 17 00:00:00 2001 From: Matthew Sterrett Date: Thu, 3 Oct 2024 11:11:14 -0700 Subject: [PATCH 6/7] Added early exit conditions --- src/xss-common-keyvaluesort.hpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/xss-common-keyvaluesort.hpp b/src/xss-common-keyvaluesort.hpp index b61f5b1..79b2af7 100644 --- a/src/xss-common-keyvaluesort.hpp +++ b/src/xss-common-keyvaluesort.hpp @@ -72,7 +72,7 @@ X86_SIMD_SORT_INLINE arrsize_t kvpartition(type_t1 *keys, for (int32_t i = (right - left) % vtype1::numlanes; i > 0; --i) { *smallest = std::min(*smallest, keys[left]); *biggest = std::max(*biggest, keys[left]); - if (keys[left] > pivot) { + if (keys[left] >= pivot) { right--; std::swap(keys[left], keys[right]); std::swap(indexes[left], indexes[right]); @@ -204,12 +204,13 @@ X86_SIMD_SORT_INLINE arrsize_t kvpartition_unrolled(type_t1 *keys, return kvpartition( keys, indexes, left, right, pivot, smallest, biggest); } + /* make array length divisible by vtype1::numlanes , shortening the array */ for (int32_t i = ((right - left) % (num_unroll * vtype1::numlanes)); i > 0; --i) { *smallest = std::min(*smallest, keys[left]); *biggest = std::max(*biggest, keys[left]); - if (keys[left] > pivot) { + if (keys[left] >= pivot) { right--; std::swap(keys[left], keys[right]); std::swap(indexes[left], indexes[right]); @@ -386,21 +387,27 @@ X86_SIMD_SORT_INLINE void kvsort_(type1_t *keys, * Base case: use bitonic networks to sort arrays <= 128 */ if (right + 1 - left <= 128) { - kvsort_n( keys + left, indexes + left, (int32_t)(right + 1 - left)); return; } + // Ascending comparator for this vtype + using comparator = Comparator; type1_t pivot; - auto pivot_result = get_pivot_smart(keys, left, right); + auto pivot_result + = get_pivot_smart(keys, left, right); pivot = pivot_result.pivot; + if (pivot_result.result == pivot_result_t::Sorted) { return; } + type1_t smallest = vtype1::type_max(); type1_t biggest = vtype1::type_min(); arrsize_t pivot_index = kvpartition_unrolled( keys, indexes, left, right + 1, pivot, &smallest, &biggest); + if (pivot_result.result == pivot_result_t::Only2Values) { return; } + #ifdef XSS_COMPILE_OPENMP if (pivot != smallest) { bool parallel_left = (pivot_index - left) > task_threshold; From 8d378c9c9d43eac5bf840b26e86db8387beb27bf Mon Sep 17 00:00:00 2001 From: Matthew Sterrett Date: Fri, 4 Oct 2024 14:56:57 -0700 Subject: [PATCH 7/7] Fix indexing problem --- utils/rand_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/rand_array.h b/utils/rand_array.h index 74de254..dccbacd 100644 --- a/utils/rand_array.h +++ b/utils/rand_array.h @@ -136,7 +136,7 @@ static std::vector get_array(std::string arrtype, else { val = std::numeric_limits::max(); } - for (size_t ii = 1; ii <= arrsize; ++ii) { + for (size_t ii = 0; ii < arrsize; ++ii) { if (rand() & 0x1) { arr[ii] = val; } } }