Skip to content

Commit

Permalink
Rename tableStorageFormat in HiveInsertTableHandle (#11458)
Browse files Browse the repository at this point in the history
Summary:
Each partition can have its own storage format, the format info in
HiveInsertTableHandle is not necessarily table format

Pull Request resolved: #11458

Reviewed By: Yuhta

Differential Revision: D65550562

Pulled By: kewang1024

fbshipit-source-id: f67b35862238c3d6c10d66cef197ac1ce1722b95
  • Loading branch information
kewang1024 authored and facebook-github-bot committed Nov 6, 2024
1 parent aba7ba8 commit 789ce65
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 16 deletions.
16 changes: 7 additions & 9 deletions velox/connectors/hive/HiveDataSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,8 @@ HiveDataSink::HiveDataSink(
*insertTableHandle_->bucketProperty(),
inputType_)
: nullptr),
writerFactory_(dwio::common::getWriterFactory(
insertTableHandle_->tableStorageFormat())),
writerFactory_(
dwio::common::getWriterFactory(insertTableHandle_->storageFormat())),
spillConfig_(connectorQueryCtx->spillConfig()),
sortWriterFinishTimeSliceLimitMs_(getFinishTimeSliceLimitMsFromHiveConfig(
hiveConfig_,
Expand Down Expand Up @@ -438,8 +438,7 @@ HiveDataSink::HiveDataSink(
bool HiveDataSink::canReclaim() const {
// Currently, we only support memory reclaim on dwrf file writer.
return (spillConfig_ != nullptr) &&
(insertTableHandle_->tableStorageFormat() ==
dwio::common::FileFormat::DWRF);
(insertTableHandle_->storageFormat() == dwio::common::FileFormat::DWRF);
}

void HiveDataSink::appendData(RowVectorPtr input) {
Expand Down Expand Up @@ -782,7 +781,7 @@ uint32_t HiveDataSink::appendWriter(const HiveWriterId& id) {
}

updateWriterOptionsFromHiveConfig(
insertTableHandle_->tableStorageFormat(),
insertTableHandle_->storageFormat(),
hiveConfig_,
connectorSessionProperties,
options);
Expand Down Expand Up @@ -937,7 +936,7 @@ std::pair<std::string, std::string> HiveDataSink::getWriterFileNames(
? fmt::format(".tmp.velox.{}_{}", targetFileName, makeUuid())
: targetFileName;
if (generateFileName &&
insertTableHandle_->tableStorageFormat() ==
insertTableHandle_->storageFormat() ==
dwio::common::FileFormat::PARQUET) {
return {
fmt::format("{}{}", targetFileName, ".parquet"),
Expand Down Expand Up @@ -1005,7 +1004,7 @@ folly::dynamic HiveInsertTableHandle::serialize() const {

obj["inputColumns"] = arr;
obj["locationHandle"] = locationHandle_->serialize();
obj["tableStorageFormat"] = dwio::common::toString(tableStorageFormat_);
obj["tableStorageFormat"] = dwio::common::toString(storageFormat_);

if (bucketProperty_) {
obj["bucketProperty"] = bucketProperty_->serialize();
Expand Down Expand Up @@ -1065,8 +1064,7 @@ void HiveInsertTableHandle::registerSerDe() {

std::string HiveInsertTableHandle::toString() const {
std::ostringstream out;
out << "HiveInsertTableHandle ["
<< dwio::common::toString(tableStorageFormat_);
out << "HiveInsertTableHandle [" << dwio::common::toString(storageFormat_);
if (compressionKind_.has_value()) {
out << " " << common::compressionKindToString(compressionKind_.value());
} else {
Expand Down
11 changes: 5 additions & 6 deletions velox/connectors/hive/HiveDataSink.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,16 +201,15 @@ class HiveInsertTableHandle : public ConnectorInsertTableHandle {
HiveInsertTableHandle(
std::vector<std::shared_ptr<const HiveColumnHandle>> inputColumns,
std::shared_ptr<const LocationHandle> locationHandle,
dwio::common::FileFormat tableStorageFormat =
dwio::common::FileFormat::DWRF,
dwio::common::FileFormat storageFormat = dwio::common::FileFormat::DWRF,
std::shared_ptr<const HiveBucketProperty> bucketProperty = nullptr,
std::optional<common::CompressionKind> compressionKind = {},
const std::unordered_map<std::string, std::string>& serdeParameters = {},
const std::shared_ptr<dwio::common::WriterOptions>& writerOptions =
nullptr)
: inputColumns_(std::move(inputColumns)),
locationHandle_(std::move(locationHandle)),
tableStorageFormat_(tableStorageFormat),
storageFormat_(storageFormat),
bucketProperty_(std::move(bucketProperty)),
compressionKind_(compressionKind),
serdeParameters_(serdeParameters),
Expand All @@ -237,8 +236,8 @@ class HiveInsertTableHandle : public ConnectorInsertTableHandle {
return compressionKind_;
}

dwio::common::FileFormat tableStorageFormat() const {
return tableStorageFormat_;
dwio::common::FileFormat storageFormat() const {
return storageFormat_;
}

const std::unordered_map<std::string, std::string>& serdeParameters() const {
Expand Down Expand Up @@ -272,7 +271,7 @@ class HiveInsertTableHandle : public ConnectorInsertTableHandle {
private:
const std::vector<std::shared_ptr<const HiveColumnHandle>> inputColumns_;
const std::shared_ptr<const LocationHandle> locationHandle_;
const dwio::common::FileFormat tableStorageFormat_;
const dwio::common::FileFormat storageFormat_;
const std::shared_ptr<const HiveBucketProperty> bucketProperty_;
const std::optional<common::CompressionKind> compressionKind_;
const std::unordered_map<std::string, std::string> serdeParameters_;
Expand Down
2 changes: 1 addition & 1 deletion velox/tool/trace/TableWriterReplayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ makeHiveInsertTableHandle(
const auto inputColumns = tracedHandle->inputColumns();
const auto compressionKind =
tracedHandle->compressionKind().value_or(common::CompressionKind_NONE);
const auto storageFormat = tracedHandle->tableStorageFormat();
const auto storageFormat = tracedHandle->storageFormat();
const auto serdeParameters = tracedHandle->serdeParameters();
const auto writerOptions = tracedHandle->writerOptions();
return std::make_shared<connector::hive::HiveInsertTableHandle>(
Expand Down

0 comments on commit 789ce65

Please sign in to comment.