diff --git a/velox/dwio/common/IntDecoder.h b/velox/dwio/common/IntDecoder.h index d88c01f0aaa10..913692b03eb90 100644 --- a/velox/dwio/common/IntDecoder.h +++ b/velox/dwio/common/IntDecoder.h @@ -453,27 +453,12 @@ inline T IntDecoder::readInt() { template inline int128_t IntDecoder::readInt96() { - int64_t offset = 0; - unsigned char ch; - - // Read 8 unsigned bytes. - uint64_t nanos = 0; - for (uint32_t i = 0; i < 8; ++i) { - ch = readByte(); - nanos |= (ch & BASE_256_MASK) << offset; - offset += 8; + int128_t result = 0; + for (int i = 0; i < 12; ++i) { + auto ch = readByte(); + result |= static_cast(ch & BASE_256_MASK) << (i * 8); } - - // Read 4 signed bytes. - int32_t days = 0; - offset = 0; - for (uint32_t i = 0; i < 4; ++i) { - ch = readByte(); - days |= (ch & BASE_256_MASK) << offset; - offset += 8; - } - auto ts = Timestamp::fromDaysAndNanos(days, nanos); - return reinterpret_cast(ts); + return result; } template diff --git a/velox/dwio/parquet/reader/PageReader.cpp b/velox/dwio/parquet/reader/PageReader.cpp index c185358470e68..3e9f4e0595140 100644 --- a/velox/dwio/parquet/reader/PageReader.cpp +++ b/velox/dwio/parquet/reader/PageReader.cpp @@ -377,7 +377,7 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) { break; } case thrift::Type::INT96: { - auto numVeloxBytes = dictionary_.numValues * sizeof(Timestamp); + auto numVeloxBytes = dictionary_.numValues * sizeof(int128_t); dictionary_.values = AlignedBuffer::allocate(numVeloxBytes, &pool_); auto numBytes = dictionary_.numValues * sizeof(Int96Timestamp); if (pageData_) { @@ -392,23 +392,16 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) { } // Expand the Parquet type length values to Velox type length. // We start from the end to allow in-place expansion. - auto values = dictionary_.values->asMutable(); + auto values = dictionary_.values->asMutable(); auto parquetValues = dictionary_.values->asMutable(); for (auto i = dictionary_.numValues - 1; i >= 0; --i) { - // Convert the timestamp into seconds and nanos since the Unix epoch, - // 00:00:00.000000 on 1 January 1970. - int64_t nanos; + int128_t result = 0; memcpy( - &nanos, + &result, parquetValues + i * sizeof(Int96Timestamp), - sizeof(int64_t)); - int32_t days; - memcpy( - &days, - parquetValues + i * sizeof(Int96Timestamp) + sizeof(int64_t), - sizeof(int32_t)); - values[i] = Timestamp::fromDaysAndNanos(days, nanos); + sizeof(Int96Timestamp)); + values[i] = result; } break; } diff --git a/velox/dwio/parquet/reader/TimestampColumnReader.h b/velox/dwio/parquet/reader/TimestampColumnReader.h index 03bc2dd6f5035..48b80a7a80277 100644 --- a/velox/dwio/parquet/reader/TimestampColumnReader.h +++ b/velox/dwio/parquet/reader/TimestampColumnReader.h @@ -49,8 +49,12 @@ class TimestampColumnReader : public IntegerColumnReader { if (resultVector->isNullAt(i)) { continue; } - const auto timestamp = rawValues[i]; - uint64_t nanos = timestamp.getNanos(); + const int128_t encoded = reinterpret_cast(rawValues[i]); + const int32_t days = static_cast(encoded >> 64); + uint64_t nanos = encoded & ((((1ULL << 63) - 1ULL) << 1) + 1); + const auto timestamp = Timestamp::fromDaysAndNanos(days, nanos); + + nanos = timestamp.getNanos(); switch (timestampPrecision_) { case TimestampPrecision::kMilliseconds: nanos = nanos / 1'000'000 * 1'000'000; diff --git a/velox/type/Filter.h b/velox/type/Filter.h index 4a84992f71d04..90d029065efc4 100644 --- a/velox/type/Filter.h +++ b/velox/type/Filter.h @@ -1825,7 +1825,9 @@ class TimestampRange final : public Filter { } bool testInt128(int128_t value) const final { - const auto& ts = reinterpret_cast(value); + const int32_t days = static_cast(value >> 64); + const uint64_t nanos = value & ((((1ULL << 63) - 1ULL) << 1) + 1); + const auto ts = Timestamp::fromDaysAndNanos(days, nanos); return ts >= lower_ && ts <= upper_; }