From 4dd0e60259575fa740b62a19aa6231928c31aa6c Mon Sep 17 00:00:00 2001 From: wypb Date: Thu, 5 Dec 2024 11:12:25 +0800 Subject: [PATCH] fix: Support timestamp type partition filter --- velox/connectors/hive/HiveConnectorUtil.cpp | 7 +++ velox/exec/tests/TableScanTest.cpp | 68 ++++++++++++++++++++- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index fb64001db0d0d..e1694486b9de0 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -665,6 +665,13 @@ bool applyPartitionFilter( case TypeKind::BOOLEAN: { return applyFilter(*filter, folly::to(partitionValue)); } + case TypeKind::TIMESTAMP: { + auto result = util::fromTimestampString( + StringView(partitionValue), util::TimestampParseMode::kPrestoCast); + VELOX_CHECK(!result.hasError()); + result.value().toGMT(Timestamp::defaultTimezone()); + return applyFilter(*filter, result.value()); + } case TypeKind::VARCHAR: { return applyFilter(*filter, partitionValue); } diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index 0ebd1222c9886..bdb4f3c266c48 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -199,8 +199,20 @@ class TableScanTest : public virtual HiveConnectorTestBase { .endTableScan() .planNode(); - std::string partitionValueStr = - partitionValue.has_value() ? "'" + *partitionValue + "'" : "null"; + std::string partitionValueStr; + if (partitionType->isTimestamp() && partitionValue.has_value()) { + auto t = util::fromTimestampString( + StringView(*partitionValue), + util::TimestampParseMode::kPrestoCast) + .thenOrThrow(folly::identity, [&](const Status& status) { + VELOX_USER_FAIL("{}", status.message()); + }); + t.toGMT(Timestamp::defaultTimezone()); + partitionValueStr = "'" + t.toString() + "'"; + } else { + partitionValueStr = + partitionValue.has_value() ? "'" + *partitionValue + "'" : "null"; + } assertQuery( op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr)); @@ -1980,6 +1992,58 @@ TEST_F(TableScanTest, partitionedTableDateKey) { } } +TEST_F(TableScanTest, partitionedTableTimestampKey) { + auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()}); + auto vectors = makeVectors(10, 1'000, rowType); + auto filePath = TempFilePath::create(); + writeToFile(filePath->getPath(), vectors); + createDuckDbTable(vectors); + const std::string partitionValue = "2023-10-27 00:12:35"; + testPartitionedTable(filePath->getPath(), TIMESTAMP(), partitionValue); + + // Test partition filter on TIMESTAMP column. + { + auto split = exec::test::HiveConnectorSplitBuilder(filePath->getPath()) + .partitionKey("pkey", partitionValue) + .build(); + auto outputType = + ROW({"pkey", "c0", "c1"}, {TIMESTAMP(), BIGINT(), DOUBLE()}); + ColumnHandleMap assignments = { + {"pkey", partitionKey("pkey", TIMESTAMP())}, + {"c0", regularColumn("c0", BIGINT())}, + {"c1", regularColumn("c1", DOUBLE())}}; + + SubfieldFilters filters; + // pkey = 2023-10-27 00:12:35. + auto lower = util::fromTimestampString( + StringView("2023-10-27 00:12:35"), + util::TimestampParseMode::kPrestoCast) + .value(); + lower.toGMT(Timestamp::defaultTimezone()); + filters[common::Subfield("pkey")] = + std::make_unique(lower, lower, false); + + auto tableHandle = std::make_shared( + "test-hive", "hive_table", true, std::move(filters), nullptr, nullptr); + auto op = std::make_shared( + "0", + std::move(outputType), + std::move(tableHandle), + std::move(assignments)); + + auto t = + util::fromTimestampString( + StringView(partitionValue), util::TimestampParseMode::kPrestoCast) + .thenOrThrow(folly::identity, [&](const Status& status) { + VELOX_USER_FAIL("{}", status.message()); + }); + t.toGMT(Timestamp::defaultTimezone()); + std::string partitionValueStr = "'" + t.toString() + "'"; + assertQuery( + op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr)); + } +} + std::vector toStringViews(const std::vector& values) { std::vector views; views.reserve(values.size());