Skip to content

Commit

Permalink
Remove hard-coded special treatment for $path and $bucket
Browse files Browse the repository at this point in the history
Summary: These special columns are engine-specific and should be handled during engine split generation by setting the corresponding values in split `infoColumns`.  For example in Prestissimo this is done in https://github.com/prestodb/presto/blob/48f0a0c1d380b1155dfd7c99b134a350627c7260/presto-native-execution/presto_cpp/main/types/PrestoToVeloxConnector.cpp#L1112-L1120.

Differential Revision: D64180231
  • Loading branch information
Yuhta authored and facebook-github-bot committed Oct 10, 2024
1 parent 9cf4ee0 commit ba9d540
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 25 deletions.
2 changes: 1 addition & 1 deletion velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ inline bool isSynthesizedColumn(
const std::string& name,
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>&
infoColumns) {
return name == kPath || name == kBucket || infoColumns.count(name) != 0;
return infoColumns.count(name) != 0;
}

inline bool isRowIndexColumn(
Expand Down
3 changes: 0 additions & 3 deletions velox/connectors/hive/HiveConnectorUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ struct HiveConnectorSplit;
using SubfieldFilters =
std::unordered_map<common::Subfield, std::unique_ptr<common::Filter>>;

constexpr const char* kPath = "$path";
constexpr const char* kBucket = "$bucket";

const std::string& getColumnName(const common::Subfield& subfield);

void checkColumnNameLowerCase(const std::shared_ptr<const Type>& type);
Expand Down
19 changes: 0 additions & 19 deletions velox/connectors/hive/SplitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,25 +336,6 @@ std::vector<TypePtr> SplitReader::adaptColumns(
if (auto it = hiveSplit_->partitionKeys.find(fieldName);
it != hiveSplit_->partitionKeys.end()) {
setPartitionValue(childSpec, fieldName, it->second);
} else if (fieldName == kPath) {
auto constantVec = std::make_shared<ConstantVector<StringView>>(
connectorQueryCtx_->memoryPool(),
1,
false,
VARCHAR(),
StringView(hiveSplit_->filePath));
childSpec->setConstantValue(constantVec);
} else if (fieldName == kBucket) {
if (hiveSplit_->tableBucketNumber.has_value()) {
int32_t bucket = hiveSplit_->tableBucketNumber.value();
auto constantVec = std::make_shared<ConstantVector<int32_t>>(
connectorQueryCtx_->memoryPool(),
1,
false,
INTEGER(),
std::move(bucket));
childSpec->setConstantValue(constantVec);
}
} else if (auto iter = hiveSplit_->infoColumns.find(fieldName);
iter != hiveSplit_->infoColumns.end()) {
auto infoColumnType =
Expand Down
7 changes: 5 additions & 2 deletions velox/exec/tests/utils/HiveConnectorTestBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,9 @@ class HiveConnectorTestBase : public OperatorTestBase {
class HiveConnectorSplitBuilder {
public:
HiveConnectorSplitBuilder(std::string filePath)
: filePath_{std::move(filePath)} {}
: filePath_{filePath.find("/") == 0 ? "file:" + filePath : filePath} {
infoColumns_["$path"] = filePath_;
}

HiveConnectorSplitBuilder& start(uint64_t start) {
start_ = start;
Expand Down Expand Up @@ -264,6 +266,7 @@ class HiveConnectorSplitBuilder {

HiveConnectorSplitBuilder& tableBucketNumber(int32_t bucket) {
tableBucketNumber_ = bucket;
infoColumns_["$bucket"] = std::to_string(bucket);
return *this;
}

Expand Down Expand Up @@ -296,7 +299,7 @@ class HiveConnectorSplitBuilder {
static const std::unordered_map<std::string, std::string> serdeParameters;
return std::make_shared<connector::hive::HiveConnectorSplit>(
connectorId_,
filePath_.find("/") == 0 ? "file:" + filePath_ : filePath_,
filePath_,
fileFormat_,
start_,
length_,
Expand Down

0 comments on commit ba9d540

Please sign in to comment.