From 7e324fc1bfa227ee6bfbc930eb851af2d30b270c Mon Sep 17 00:00:00 2001 From: zuyu Date: Mon, 2 Dec 2024 13:21:28 -0800 Subject: [PATCH] refactor(parquet): Int96 timestamp column reader Co-authored-by: Kapil Singh --- .../parquet/reader/TimestampColumnReader.h | 32 ++++++------------- velox/type/Timestamp.h | 15 +++++++++ 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/velox/dwio/parquet/reader/TimestampColumnReader.h b/velox/dwio/parquet/reader/TimestampColumnReader.h index 308743ecea4fc..4ae2d772c2c07 100644 --- a/velox/dwio/parquet/reader/TimestampColumnReader.h +++ b/velox/dwio/parquet/reader/TimestampColumnReader.h @@ -22,8 +22,14 @@ namespace facebook::velox::parquet { namespace { +Timestamp toInt96Timestamp(const int128_t& value) { + const int32_t days = static_cast(value >> 64); + const uint64_t nanos = value & ((((1ULL << 63) - 1ULL) << 1) + 1); + return Timestamp::fromDaysAndNanos(days, nanos); +} + // Range filter for Parquet Int96 Timestamp. -class ParquetInt96TimestampRange : public common::TimestampRange { +class ParquetInt96TimestampRange final : public common::TimestampRange { public: // @param lower Lower end of the range, inclusive. // @param upper Upper end of the range, inclusive. @@ -37,9 +43,7 @@ class ParquetInt96TimestampRange : public common::TimestampRange { // Int96 is read as int128_t value and converted to Timestamp by extracting // days and nanos. bool testInt128(int128_t value) const final override { - const int32_t days = static_cast(value >> 64); - const uint64_t nanos = value & ((((1ULL << 63) - 1ULL) << 1) + 1); - const auto ts = Timestamp::fromDaysAndNanos(days, nanos); + const auto ts = toInt96Timestamp(value); return ts >= this->lower() && ts <= this->upper(); } }; @@ -77,22 +81,7 @@ class TimestampColumnReader : public IntegerColumnReader { // Convert int128_t to Timestamp by extracting days and nanos. const int128_t encoded = reinterpret_cast(rawValues[i]); - const int32_t days = static_cast(encoded >> 64); - uint64_t nanos = encoded & ((((1ULL << 63) - 1ULL) << 1) + 1); - const auto timestamp = Timestamp::fromDaysAndNanos(days, nanos); - - nanos = timestamp.getNanos(); - switch (timestampPrecision_) { - case TimestampPrecision::kMilliseconds: - nanos = nanos / 1'000'000 * 1'000'000; - break; - case TimestampPrecision::kMicroseconds: - nanos = nanos / 1'000 * 1'000; - break; - case TimestampPrecision::kNanoseconds: - break; - } - rawValues[i] = Timestamp(timestamp.getSeconds(), nanos); + rawValues[i] = toInt96Timestamp(encoded).toPrecision(timestampPrecision_); } } @@ -126,7 +115,6 @@ class TimestampColumnReader : public IntegerColumnReader { rows, extractValues)); } - return; } void read( @@ -143,7 +131,7 @@ class TimestampColumnReader : public IntegerColumnReader { private: // The requested precision can be specified from HiveConfig to read timestamp // from Parquet. - TimestampPrecision timestampPrecision_; + const TimestampPrecision timestampPrecision_; }; } // namespace facebook::velox::parquet diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index 88a31e72f62e7..a342867e0d54d 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -198,6 +198,21 @@ struct Timestamp { } } + Timestamp toPrecision(const TimestampPrecision& precision) const { + uint64_t nanos = nanos_; + switch (precision) { + case TimestampPrecision::kMilliseconds: + nanos = nanos / 1'000'000 * 1'000'000; + break; + case TimestampPrecision::kMicroseconds: + nanos = nanos / 1'000 * 1'000; + break; + case TimestampPrecision::kNanoseconds: + break; + } + return Timestamp(seconds_, nanos); + } + /// Exports the current timestamp as a std::chrono::time_point of millisecond /// precision. Note that the conversion may overflow since the internal /// `seconds_` value will need to be multiplied by 1000.