Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support date-type partition filter #9937

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Fix date filter
  • Loading branch information
rui-mo committed May 29, 2024
commit 99709158d4b895db66de02e35b06e85934eaf6d5
17 changes: 13 additions & 4 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "velox/dwio/common/Reader.h"
#include "velox/expression/Expr.h"
#include "velox/expression/ExprToSubfieldFilter.h"
#include "velox/type/TimestampConversion.h"

namespace facebook::velox::connector::hive {

Expand Down Expand Up @@ -577,10 +578,17 @@ void configureRowReaderOptions(
namespace {

bool applyPartitionFilter(
TypeKind kind,
const TypePtr& type,
const std::string& partitionValue,
common::Filter* filter) {
switch (kind) {
if (type->isDate()) {
const auto result = util::castFromDateString(
StringView(partitionValue), util::ParseMode::kStandardCast);
VELOX_CHECK(!result.hasError());
return applyFilter(*filter, result.value());
}

switch (type->kind()) {
case TypeKind::BIGINT:
case TypeKind::INTEGER:
case TypeKind::SMALLINT:
Expand All @@ -598,7 +606,8 @@ bool applyPartitionFilter(
return applyFilter(*filter, partitionValue);
}
default:
VELOX_FAIL("Bad type {} for partition value: {}", kind, partitionValue);
VELOX_FAIL(
"Bad type {} for partition value: {}", type->kind(), partitionValue);
}
}

Expand Down Expand Up @@ -629,7 +638,7 @@ bool testFilters(

// This is a non-null partition key
return applyPartitionFilter(
handlesIter->second->dataType()->kind(),
handlesIter->second->dataType(),
iter->second.value(),
child->filter());
}
Expand Down
32 changes: 31 additions & 1 deletion velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1753,7 +1753,37 @@ TEST_F(TableScanTest, partitionedTableDateKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->getPath(), vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->getPath(), DATE(), "2023-10-27");
const std::string partitionValue = "2023-10-27";
testPartitionedTable(filePath->getPath(), DATE(), partitionValue);

// Test partition filter on date column.
{
auto split = HiveConnectorSplitBuilder(filePath->getPath())
.partitionKey("pkey", partitionValue)
.build();
auto outputType = ROW({"pkey", "c0", "c1"}, {DATE(), BIGINT(), DOUBLE()});
ColumnHandleMap assignments = {
{"pkey", partitionKey("pkey", DATE())},
{"c0", regularColumn("c0", BIGINT())},
{"c1", regularColumn("c1", DOUBLE())}};

SubfieldFilters filters;
// pkey > 2020-09-01.
filters[common::Subfield("pkey")] = std::make_unique<common::BigintRange>(
18506, std::numeric_limits<int64_t>::max(), false);

auto tableHandle = std::make_shared<HiveTableHandle>(
"test-hive", "hive_table", true, std::move(filters), nullptr, nullptr);
auto op = std::make_shared<TableScanNode>(
"0",
std::move(outputType),
std::move(tableHandle),
std::move(assignments));

std::string partitionValueStr = "'" + partitionValue + "'";
assertQuery(
op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr));
}
}

std::vector<StringView> toStringViews(const std::vector<std::string>& values) {
Expand Down
Loading