Skip to content

Commit 6cf6829

Browse files
committed
Doc
1 parent e792eaf commit 6cf6829

File tree

1 file changed

+11
-8
lines changed

1 file changed

+11
-8
lines changed

cpp/src/arrow/util/bpacking_simd_impl_internal.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ namespace arrow::internal {
4040
// - Shifts per swizzle can be improved when self.packed_max_byte_spread == 1 and the
4141
// byte can be reused (when val_bit_width divides packed_max_byte_spread).
4242
// - Try for uint16_t and uint8_t and bool (currently copy)
43+
// - Add unpack_exact to benchmarks
44+
// - Reduce input size on small bit width using a broadcast.
4345
// - For Avx2:
4446
// - Inspect how swizzle across lanes are handled: _mm256_shuffle_epi8 not used?
4547
// - Investigate AVX2 with 128 bit register
@@ -287,13 +289,14 @@ constexpr auto make_batch_constant() {
287289
// Intel x86-64 does not have variable left shifts before AVX2.
288290
//
289291
// We replace the variable left shift by a variable multiply with a power of two.
292+
// The behaviour is the same sa long as there are no overflow.
290293
//
291294
// This trick is borrowed from Daniel Lemire and Leonid Boytsov, Decoding billions of
292295
// integers per second through vectorization, Software Practice & Experience 45 (1), 2015.
293296
// http://arxiv.org/abs/1209.2137
294297
template <typename Arch, typename Int, Int... kShifts>
295-
auto left_shift(const xsimd::batch<Int, Arch>& batch,
296-
xsimd::batch_constant<Int, Arch, kShifts...> shifts) {
298+
auto left_shift_no_overflow(const xsimd::batch<Int, Arch>& batch,
299+
xsimd::batch_constant<Int, Arch, kShifts...> shifts) {
297300
constexpr bool kHasSse2 = xsimd::supported_architectures::contains<xsimd::sse2>();
298301
constexpr bool kHasAvx2 = xsimd::supported_architectures::contains<xsimd::avx2>();
299302

@@ -324,8 +327,8 @@ auto left_shift(const xsimd::batch<Int, Arch>& batch,
324327
// integers per second through vectorization, Software Practice & Experience 45 (1), 2015.
325328
// http://arxiv.org/abs/1209.2137
326329
template <typename Arch, typename Int, Int... kShifts>
327-
auto overflow_right_shift(const xsimd::batch<Int, Arch>& batch,
328-
xsimd::batch_constant<Int, Arch, kShifts...> shifts) {
330+
auto right_shift_by_excess(const xsimd::batch<Int, Arch>& batch,
331+
xsimd::batch_constant<Int, Arch, kShifts...> shifts) {
329332
constexpr bool kHasSse2 = xsimd::supported_architectures::contains<xsimd::sse2>();
330333
constexpr bool kHasAvx2 = xsimd::supported_architectures::contains<xsimd::avx2>();
331334

@@ -375,7 +378,7 @@ struct MediumKernel {
375378
// Intel x86-64 does not have variable right shifts before AVX2.
376379
// We know the packed value can safely be left shifted up to the largest offset so we
377380
// can use the fallback on these platforms.
378-
const auto shifted = overflow_right_shift(words, kRightShifts);
381+
const auto shifted = right_shift_by_excess(words, kRightShifts);
379382
const auto vals = shifted & kMask;
380383
xsimd::store_unaligned(out + kOutOffset, vals);
381384
}
@@ -515,7 +518,7 @@ struct LargeKernel {
515518
// Intel x86-64 does not have variable right shifts before AVX2.
516519
// We know the packed value can safely be left shifted up to the largest offset so we
517520
// can use the fallback on these platforms.
518-
const auto shifted = overflow_right_shift(words, kRightShifts);
521+
const auto shifted = right_shift_by_excess(words, kRightShifts);
519522
const auto vals = shifted & kMask;
520523
xsimd::store_unaligned(out + kOutOffset, vals);
521524
}
@@ -547,12 +550,12 @@ struct LargeKernel {
547550

548551
const auto low_swizzled = xsimd::swizzle(bytes, kLowSwizzles);
549552
const auto low_words = xsimd::bitwise_cast<unpacked_type>(low_swizzled);
550-
const auto low_shifted = overflow_right_shift(low_words, kLowRShifts);
553+
const auto low_shifted = right_shift_by_excess(low_words, kLowRShifts);
551554
const auto low_half_vals = low_shifted & kPlan.low_mask;
552555

553556
const auto high_swizzled = xsimd::swizzle(bytes, kHighSwizzles);
554557
const auto high_words = xsimd::bitwise_cast<unpacked_type>(high_swizzled);
555-
const auto high_shifted = left_shift(high_words, kHighLShifts);
558+
const auto high_shifted = left_shift_no_overflow(high_words, kHighLShifts);
556559
const auto high_half_vals = high_shifted & kPlan.high_mask;
557560

558561
const auto vals = low_half_vals | high_half_vals;

0 commit comments

Comments
 (0)