From 3379917930cfc603b1ede76e3bfeec8023ac10a6 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Wed, 17 Jan 2024 08:43:05 -0800 Subject: [PATCH] Extract storeBitsToByte to be reused (#8416) Summary: We will need this to fix `simd::gatherBits` for non-AVX cases. See https://github.com/facebookincubator/velox/pull/8415 Differential Revision: D52837098 --- velox/common/base/BitUtil.h | 22 ++++++++++++++++++++++ velox/common/base/tests/BitUtilTest.cpp | 17 +++++++++++++++++ velox/dwio/common/DecoderUtil.cpp | 22 ++-------------------- 3 files changed, 41 insertions(+), 20 deletions(-) diff --git a/velox/common/base/BitUtil.h b/velox/common/base/BitUtil.h index 350b05fa48db..746d5dfaeb32 100644 --- a/velox/common/base/BitUtil.h +++ b/velox/common/base/BitUtil.h @@ -16,6 +16,8 @@ #pragma once +#include "velox/common/base/Exceptions.h" + #include #include #include @@ -977,6 +979,26 @@ inline __int128_t builtin_bswap128(__int128_t value) { #endif } +/// Store `bits' into the memory region pointed by `byte', at `index' (bit +/// index). If `kSize' is 8, we store the whole byte directly; otherwise it +/// must be 4 and we store either the whole byte or the upper 4 bits only, +/// depending on the `index'. +template +void storeBitsToByte(uint8_t bits, uint8_t* bytes, unsigned index) { + VELOX_DCHECK_EQ(index % kSize, 0); + VELOX_DCHECK_EQ(bits >> kSize, 0); + if constexpr (kSize == 8) { + bytes[index / 8] = bits; + } else { + VELOX_DCHECK_EQ(kSize, 4); + if (index % 8 == 0) { + bytes[index / 8] = bits; + } else { + bytes[index / 8] |= bits << 4; + } + } +} + } // namespace bits } // namespace velox } // namespace facebook diff --git a/velox/common/base/tests/BitUtilTest.cpp b/velox/common/base/tests/BitUtilTest.cpp index 754e001ef5e9..f181e412d900 100644 --- a/velox/common/base/tests/BitUtilTest.cpp +++ b/velox/common/base/tests/BitUtilTest.cpp @@ -853,6 +853,23 @@ TEST_F(BitUtilTest, countLeadingZeros) { EXPECT_EQ( countLeadingZeros<__uint128_t>(HugeInt::build(0x08FFFFFFFFFFFFFF, 0)), 4); } + +TEST_F(BitUtilTest, storeBitsToByte) { + uint8_t bytes[3]{}; + storeBitsToByte<8>(0xAA, bytes, 0); + ASSERT_EQ(bytes[0], 0xAA); + ASSERT_EQ(bytes[1], 0); + ASSERT_EQ(bytes[2], 0); + storeBitsToByte<4>(0x5, bytes, 8); + ASSERT_EQ(bytes[0], 0xAA); + ASSERT_EQ(bytes[1], 0x5); + ASSERT_EQ(bytes[2], 0); + storeBitsToByte<4>(0xA, bytes, 12); + ASSERT_EQ(bytes[0], 0xAA); + ASSERT_EQ(bytes[1], 0xA5); + ASSERT_EQ(bytes[2], 0); +} + } // namespace bits } // namespace velox } // namespace facebook diff --git a/velox/dwio/common/DecoderUtil.cpp b/velox/dwio/common/DecoderUtil.cpp index 7cdf943db390..460b2abcdf5d 100644 --- a/velox/dwio/common/DecoderUtil.cpp +++ b/velox/dwio/common/DecoderUtil.cpp @@ -92,16 +92,7 @@ bool nonNullRowsFromSparse( if (isDense(rows.data() + i, width)) { uint16_t flags = load8Bits(nulls, rows[i]) & widthMask; if (outputNulls) { - if constexpr (kStep == 8) { - resultNullBytes[i / 8] = flags; - } else { - VELOX_DCHECK_EQ(kStep, 4); - if (i % 8 == 0) { - resultNullBytes[i / 8] = flags; - } else { - resultNullBytes[i / 8] |= flags << 4; - } - } + bits::storeBitsToByte(flags, resultNullBytes, i); anyNull |= flags != widthMask; } if (!flags) { @@ -131,16 +122,7 @@ bool nonNullRowsFromSparse( auto next8Rows = xsimd::load_unaligned(rows.data() + i); uint16_t flags = simd::gather8Bits(nulls, next8Rows, width); if (outputNulls) { - if constexpr (kStep == 8) { - resultNullBytes[i / 8] = flags; - } else { - VELOX_DCHECK_EQ(kStep, 4); - if (i % 8 == 0) { - resultNullBytes[i / 8] = flags; - } else { - resultNullBytes[i / 8] |= flags << 4; - } - } + bits::storeBitsToByte(flags, resultNullBytes, i); anyNull |= flags != widthMask; } if (!flags) {