From 87ce0eee5c18d58827d0717e74180e95942865c4 Mon Sep 17 00:00:00 2001 From: zuyu Date: Sun, 8 Dec 2024 16:18:58 -0800 Subject: [PATCH] refactor: IntDecoder --- velox/dwio/common/IntCodecCommon.h | 1 + velox/dwio/common/IntDecoder.h | 85 ++++++++++++++---------------- 2 files changed, 41 insertions(+), 45 deletions(-) diff --git a/velox/dwio/common/IntCodecCommon.h b/velox/dwio/common/IntCodecCommon.h index a83f663689fb8..f17cc12287b76 100644 --- a/velox/dwio/common/IntCodecCommon.h +++ b/velox/dwio/common/IntCodecCommon.h @@ -26,6 +26,7 @@ constexpr uint32_t LONG_BYTE_SIZE = 8; constexpr uint64_t BASE_128_MASK = 0x7f; constexpr uint64_t BASE_256_MASK = 0xff; +constexpr __int128_t INT128_BASE_128_MASK = 0x7f; constexpr __int128_t INT128_BASE_256_MASK = 0xff; // Timezone constants diff --git a/velox/dwio/common/IntDecoder.h b/velox/dwio/common/IntDecoder.h index aeba2e2d01d86..29f1cf8709b1e 100644 --- a/velox/dwio/common/IntDecoder.h +++ b/velox/dwio/common/IntDecoder.h @@ -293,7 +293,7 @@ FOLLY_ALWAYS_INLINE uint64_t IntDecoder::readVuLong() { } int64_t result = 0; - int64_t offset = 0; + uint32_t offset = 0; signed char ch; do { ch = readByte(); @@ -311,42 +311,44 @@ FOLLY_ALWAYS_INLINE int64_t IntDecoder::readVsLong() { template inline int64_t IntDecoder::readLongLE() { VELOX_DCHECK_EQ(pendingSkip_, 0); - int64_t result = 0; - if (bufferStart_ && bufferStart_ + sizeof(int64_t) <= bufferEnd_) { + if (bufferStart_ && bufferStart_ + numBytes_ <= bufferEnd_) { bufferStart_ += numBytes_; if (numBytes_ == 8) { return *reinterpret_cast(bufferStart_ - 8); } - if (numBytes_ == 4) { - if (isSigned) { + + if constexpr (isSigned) { + if (numBytes_ == 4) { return *reinterpret_cast(bufferStart_ - 4); } - return *reinterpret_cast(bufferStart_ - 4); - } - if (isSigned) { return *reinterpret_cast(bufferStart_ - 2); + } else { + if (numBytes_ == 4) { + return *reinterpret_cast(bufferStart_ - 4); + } + return *reinterpret_cast(bufferStart_ - 2); } - return *reinterpret_cast(bufferStart_ - 2); } - char b; - int64_t offset = 0; - for (uint32_t i = 0; i < numBytes_; ++i) { - b = readByte(); + int64_t result = 0; + for (uint32_t offset = 0; offset < numBytes_ * 8; offset += 8) { + const char b = readByte(); result |= (b & BASE_256_MASK) << offset; - offset += 8; } - if (isSigned && numBytes_ < 8) { - if (numBytes_ == 2) { - return static_cast(result); + if constexpr (isSigned) { + if (numBytes_ >= 8) { + return result; } if (numBytes_ == 4) { return static_cast(result); } - VELOX_DCHECK(false, "Bad width for signed fixed width: {}", numBytes_); + VELOX_DCHECK( + numBytes_ == 2, "Bad width for signed fixed width: {}", numBytes_); + return static_cast(result); + } else { + return result; } - return result; } template @@ -356,7 +358,7 @@ inline cppType IntDecoder::readLittleEndianFromBigEndian() { cppType bigEndianValue = 0; // Input is in Big Endian layout of size numBytes. - if (bufferStart_ && (bufferStart_ + sizeof(int64_t) <= bufferEnd_)) { + if (bufferStart_ && (bufferStart_ + numBytes_ <= bufferEnd_)) { bufferStart_ += numBytes_; const auto valueOffset = bufferStart_ - numBytes_; // Use first byte to initialize bigEndianValue. @@ -376,18 +378,15 @@ inline cppType IntDecoder::readLittleEndianFromBigEndian() { } } - char b; - cppType offset = 0; - cppType numBytesBigEndian = 0; // Read numBytes input into numBytesBigEndian. - for (uint32_t i = 0; i < numBytes_; ++i) { - b = readByte(); + cppType numBytesBigEndian = 0; + for (uint32_t offset = 0; offset < numBytes_ * 8; offset += 8) { + const char b = readByte(); if constexpr (sizeof(cppType) == 16) { numBytesBigEndian |= (b & INT128_BASE_256_MASK) << offset; } else { numBytesBigEndian |= (b & BASE_256_MASK) << offset; } - offset += 8; } // Use first byte to initialize bigEndianValue. bigEndianValue = @@ -415,19 +414,13 @@ inline uint128_t IntDecoder::readVuHugeInt() { VELOX_DCHECK_EQ(pendingSkip_, 0); uint128_t value = 0; - uint128_t work; uint32_t offset = 0; signed char ch; - while (true) { + do { ch = readByte(); - work = ch & 0x7f; - work <<= offset; - value |= work; + value |= (ch & INT128_BASE_128_MASK) << offset; offset += 7; - if (!(ch & 0x80)) { - break; - } - } + } while (ch < 0); return value; } @@ -439,24 +432,26 @@ inline T IntDecoder::readInt() { } if (bigEndian_) { return readLittleEndianFromBigEndian(); - } else { - if constexpr (std::is_same_v) { - if (numBytes_ == 12) { - VELOX_DCHECK(!useVInts_, "Int96 should not be VInt encoded."); - return readInt96(); - } - VELOX_NYI(); + } + + if constexpr (std::is_same_v) { + if (numBytes_ == 12) { + return readInt96(); } + VELOX_NYI(); + } else { return readLongLE(); } } template inline int128_t IntDecoder::readInt96() { + VELOX_DCHECK(!useVInts_, "Int96 should not be VInt encoded."); + int128_t result = 0; - for (int i = 0; i < 12; ++i) { - auto ch = readByte(); - result |= static_cast(ch & BASE_256_MASK) << (i * 8); + for (uint32_t offset = 0; offset < 96; offset += 8) { + const auto ch = readByte(); + result |= (ch & INT128_BASE_256_MASK) << offset; } return result; }