From 630f96c0068e870042a757dfed7c168a43a6d7a5 Mon Sep 17 00:00:00 2001 From: Kapil Singh Date: Wed, 7 Feb 2024 11:27:48 +0530 Subject: [PATCH] PR comments --- velox/dwio/common/TimestampDecoder.h | 8 +-- velox/dwio/parquet/reader/PageReader.cpp | 58 +++++++++------------ velox/dwio/parquet/reader/ParquetReader.cpp | 2 +- 3 files changed, 29 insertions(+), 39 deletions(-) diff --git a/velox/dwio/common/TimestampDecoder.h b/velox/dwio/common/TimestampDecoder.h index af8ab334868e1..ecc66c7e079db 100644 --- a/velox/dwio/common/TimestampDecoder.h +++ b/velox/dwio/common/TimestampDecoder.h @@ -20,12 +20,12 @@ namespace facebook::velox::dwio::common { -enum TIMESTAMP_PRECISION { MILLIS, MICROS }; +enum TimestampPrecision { kMillis, kMicros }; class TimestampDecoder : public DirectDecoder { public: TimestampDecoder( - TIMESTAMP_PRECISION precision, + TimestampPrecision precision, std::unique_ptr input, bool useVInts, uint32_t numBytes, @@ -62,7 +62,7 @@ class TimestampDecoder : public DirectDecoder { } if constexpr (std::is_same_v) { auto units = IntDecoder::template readInt(); - Timestamp timestamp = precision_ == TIMESTAMP_PRECISION::MILLIS + Timestamp timestamp = precision_ == TimestampPrecision::kMillis ? util::fromUTCMillis(units) : util::fromUTCMicros(units); @@ -85,6 +85,6 @@ class TimestampDecoder : public DirectDecoder { } private: - TIMESTAMP_PRECISION precision_; + TimestampPrecision precision_; }; } // namespace facebook::velox::dwio::common diff --git a/velox/dwio/parquet/reader/PageReader.cpp b/velox/dwio/parquet/reader/PageReader.cpp index e47c4affa7104..236f7b7fbaf32 100644 --- a/velox/dwio/parquet/reader/PageReader.cpp +++ b/velox/dwio/parquet/reader/PageReader.cpp @@ -336,8 +336,10 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) { ? sizeof(float) : sizeof(double); auto numBytes = dictionary_.numValues * typeSize; - if (type_->type()->isShortDecimal() && - parquetType == thrift::Type::INT32) { + if ((type_->type()->isShortDecimal() && + parquetType == thrift::Type::INT32) || + (type_->type()->isTimestamp() && + parquetType == thrift::Type::INT64)) { auto veloxTypeLength = type_->type()->cppSizeInBytes(); auto numVeloxBytes = dictionary_.numValues * veloxTypeLength; dictionary_.values = @@ -365,13 +367,13 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) { values[i] = parquetValues[i]; } } else if ( - parquetType == thrift::Type::INT64 && - type_->logicalType_.has_value()) { + type_->type()->isTimestamp() && parquetType == thrift::Type::INT64) { + VELOX_CHECK(type_->logicalType_.has_value()); auto logicalType = type_->logicalType_.value(); if (logicalType.__isset.TIMESTAMP) { VELOX_CHECK( logicalType.TIMESTAMP.isAdjustedToUTC, - "Only UTC adjusted Timestamp is supported"); + "Only UTC adjusted Timestamp is supported."); auto values = dictionary_.values->asMutable(); auto parquetValues = dictionary_.values->asMutable(); int64_t units; @@ -387,7 +389,7 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) { values[i] = util::fromUTCMillis(units); } } else { - VELOX_NYI("Unsupported timestamp unit"); + VELOX_NYI("Nano Timestamp unit is not supported."); } } } @@ -654,39 +656,27 @@ void PageReader::makeDecoder() { true); break; case thrift::Type::INT64: - if (type_->logicalType_.has_value()) { + if (type_->logicalType_.has_value() && + type_->logicalType_.value().__isset.TIMESTAMP) { auto logicalType = type_->logicalType_.value(); - if (logicalType.__isset.TIMESTAMP) { - VELOX_CHECK( - logicalType.TIMESTAMP.isAdjustedToUTC, - "Only UTC adjusted Timestamp is supported"); - if (logicalType.TIMESTAMP.unit.__isset.MICROS) { - timestampDecoder_ = std::make_unique< - dwio::common::TimestampDecoder>( - dwio::common::TIMESTAMP_PRECISION::MICROS, - std::make_unique( - pageData_, encodedDataSize_), - false, - parquetTypeBytes(type_->parquetType_.value())); - } else if (logicalType.TIMESTAMP.unit.__isset.MILLIS) { - timestampDecoder_ = std::make_unique< - dwio::common::TimestampDecoder>( - dwio::common::TIMESTAMP_PRECISION::MILLIS, + + VELOX_CHECK( + logicalType.TIMESTAMP.isAdjustedToUTC, + "Only UTC adjusted Timestamp is supported."); + VELOX_CHECK( + !logicalType.TIMESTAMP.unit.__isset.NANOS, + "Nano Timestamp unit not supported."); + + auto precisionUnit = logicalType.TIMESTAMP.unit.__isset.MICROS + ? dwio::common::TimestampPrecision::kMicros + : dwio::common::TimestampPrecision::kMillis; + timestampDecoder_ = + std::make_unique( + precisionUnit, std::make_unique( pageData_, encodedDataSize_), false, parquetTypeBytes(type_->parquetType_.value())); - } else { - VELOX_NYI("Timestamp unit not supported"); - } - } else { - directDecoder_ = - std::make_unique>( - std::make_unique( - pageData_, encodedDataSize_), - false, - parquetTypeBytes(type_->parquetType_.value())); - } } else { directDecoder_ = std::make_unique>( diff --git a/velox/dwio/parquet/reader/ParquetReader.cpp b/velox/dwio/parquet/reader/ParquetReader.cpp index 01399fe211320..3a20ab779ae81 100644 --- a/velox/dwio/parquet/reader/ParquetReader.cpp +++ b/velox/dwio/parquet/reader/ParquetReader.cpp @@ -411,7 +411,7 @@ std::shared_ptr ReaderBase::getParquetColumnInfo( unit.__set_MILLIS(millis); } else { VELOX_NYI( - "{} Timestamp unit not supported", schemaElement.converted_type); + "{} Timestamp unit not supported.", schemaElement.converted_type); } timestamp.__set_unit(unit);