Skip to content

Commit

Permalink
fix: Support timestamp type partition filter
Browse files Browse the repository at this point in the history
  • Loading branch information
wypb committed Dec 5, 2024
1 parent 03f0894 commit 4dd0e60
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 2 deletions.
7 changes: 7 additions & 0 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,13 @@ bool applyPartitionFilter(
case TypeKind::BOOLEAN: {
return applyFilter(*filter, folly::to<bool>(partitionValue));
}
case TypeKind::TIMESTAMP: {
auto result = util::fromTimestampString(
StringView(partitionValue), util::TimestampParseMode::kPrestoCast);
VELOX_CHECK(!result.hasError());
result.value().toGMT(Timestamp::defaultTimezone());
return applyFilter(*filter, result.value());
}
case TypeKind::VARCHAR: {
return applyFilter(*filter, partitionValue);
}
Expand Down
68 changes: 66 additions & 2 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,20 @@ class TableScanTest : public virtual HiveConnectorTestBase {
.endTableScan()
.planNode();

std::string partitionValueStr =
partitionValue.has_value() ? "'" + *partitionValue + "'" : "null";
std::string partitionValueStr;
if (partitionType->isTimestamp() && partitionValue.has_value()) {
auto t = util::fromTimestampString(
StringView(*partitionValue),
util::TimestampParseMode::kPrestoCast)
.thenOrThrow(folly::identity, [&](const Status& status) {
VELOX_USER_FAIL("{}", status.message());
});
t.toGMT(Timestamp::defaultTimezone());
partitionValueStr = "'" + t.toString() + "'";
} else {
partitionValueStr =
partitionValue.has_value() ? "'" + *partitionValue + "'" : "null";
}
assertQuery(
op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr));

Expand Down Expand Up @@ -1980,6 +1992,58 @@ TEST_F(TableScanTest, partitionedTableDateKey) {
}
}

TEST_F(TableScanTest, partitionedTableTimestampKey) {
auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()});
auto vectors = makeVectors(10, 1'000, rowType);
auto filePath = TempFilePath::create();
writeToFile(filePath->getPath(), vectors);
createDuckDbTable(vectors);
const std::string partitionValue = "2023-10-27 00:12:35";
testPartitionedTable(filePath->getPath(), TIMESTAMP(), partitionValue);

// Test partition filter on TIMESTAMP column.
{
auto split = exec::test::HiveConnectorSplitBuilder(filePath->getPath())
.partitionKey("pkey", partitionValue)
.build();
auto outputType =
ROW({"pkey", "c0", "c1"}, {TIMESTAMP(), BIGINT(), DOUBLE()});
ColumnHandleMap assignments = {
{"pkey", partitionKey("pkey", TIMESTAMP())},
{"c0", regularColumn("c0", BIGINT())},
{"c1", regularColumn("c1", DOUBLE())}};

SubfieldFilters filters;
// pkey = 2023-10-27 00:12:35.
auto lower = util::fromTimestampString(
StringView("2023-10-27 00:12:35"),
util::TimestampParseMode::kPrestoCast)
.value();
lower.toGMT(Timestamp::defaultTimezone());
filters[common::Subfield("pkey")] =
std::make_unique<common::TimestampRange>(lower, lower, false);

auto tableHandle = std::make_shared<HiveTableHandle>(
"test-hive", "hive_table", true, std::move(filters), nullptr, nullptr);
auto op = std::make_shared<TableScanNode>(
"0",
std::move(outputType),
std::move(tableHandle),
std::move(assignments));

auto t =
util::fromTimestampString(
StringView(partitionValue), util::TimestampParseMode::kPrestoCast)
.thenOrThrow(folly::identity, [&](const Status& status) {
VELOX_USER_FAIL("{}", status.message());
});
t.toGMT(Timestamp::defaultTimezone());
std::string partitionValueStr = "'" + t.toString() + "'";
assertQuery(
op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr));
}
}

std::vector<StringView> toStringViews(const std::vector<std::string>& values) {
std::vector<StringView> views;
views.reserve(values.size());
Expand Down

0 comments on commit 4dd0e60

Please sign in to comment.