Skip to content

Commit

Permalink
Rename "max-coalesced-distance-bytes" and add session property. (#11671)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11671

1. Ensure connector session property "orc_max_merge_distance" is used
to control the max coalesce distance in ReaderOptions.
2. Rename config property "max-coalesced-distance-bytes" to
"max-coalesced-distance" to reflect the units change.

Reviewed By: Yuhta

Differential Revision: D66528422

fbshipit-source-id: a522e09528aefeac21e7eeb7a47d9b4fc6663b03
  • Loading branch information
Sergey Pershin authored and facebook-github-bot committed Nov 28, 2024
1 parent 0d572d2 commit 4a4b4a0
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 18 deletions.
2 changes: 1 addition & 1 deletion velox/benchmarks/QueryBenchmarkBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void QueryBenchmarkBase::initialize() {
auto configurationValues = std::unordered_map<std::string, std::string>();
configurationValues[connector::hive::HiveConfig::kMaxCoalescedBytes] =
std::to_string(FLAGS_max_coalesced_bytes);
configurationValues[connector::hive::HiveConfig::kMaxCoalescedDistanceBytes] =
configurationValues[connector::hive::HiveConfig::kMaxCoalescedDistance] =
std::to_string(FLAGS_max_coalesced_distance_bytes);
configurationValues[connector::hive::HiveConfig::kPrefetchRowGroups] =
std::to_string(FLAGS_parquet_prefetch_rowgroups);
Expand Down
18 changes: 15 additions & 3 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,23 @@ bool HiveConfig::ignoreMissingFiles(const config::ConfigBase* session) const {
}

int64_t HiveConfig::maxCoalescedBytes() const {
return config_->get<int64_t>(kMaxCoalescedBytes, 128 << 20);
return config_->get<int64_t>(kMaxCoalescedBytes, 128 << 20); // 128MB
}

int32_t HiveConfig::maxCoalescedDistanceBytes() const {
return config_->get<int32_t>(kMaxCoalescedDistanceBytes, 512 << 10);
int32_t HiveConfig::maxCoalescedDistanceBytes(
const config::ConfigBase* session) const {
const auto distance = config::toCapacity(
session->get<std::string>(
kMaxCoalescedDistanceSession,
config_->get<std::string>(kMaxCoalescedDistance, "512kB")),
config::CapacityUnit::BYTE);
VELOX_USER_CHECK_LE(
distance,
std::numeric_limits<int32_t>::max(),
"The max merge distance to combine read requests must be less than 2GB."
" Got {} bytes.",
distance);
return int32_t(distance);
}

int32_t HiveConfig::prefetchRowGroups() const {
Expand Down
11 changes: 7 additions & 4 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,12 @@ class HiveConfig {
/// The max coalesce bytes for a request.
static constexpr const char* kMaxCoalescedBytes = "max-coalesced-bytes";

/// The max coalesce distance bytes for combining requests.
static constexpr const char* kMaxCoalescedDistanceBytes =
"max-coalesced-distance-bytes";
/// The max merge distance to combine read requests.
/// Note: The session property name differs from the constant name for
/// backward compatibility with Presto.
static constexpr const char* kMaxCoalescedDistance = "max-coalesced-distance";
static constexpr const char* kMaxCoalescedDistanceSession =
"orc_max_merge_distance";

/// The number of prefetch rowgroups
static constexpr const char* kPrefetchRowGroups = "prefetch-rowgroups";
Expand Down Expand Up @@ -236,7 +239,7 @@ class HiveConfig {

int64_t maxCoalescedBytes() const;

int32_t maxCoalescedDistanceBytes() const;
int32_t maxCoalescedDistanceBytes(const config::ConfigBase* session) const;

int32_t prefetchRowGroups() const;

Expand Down
3 changes: 2 additions & 1 deletion velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,8 @@ void configureReaderOptions(
auto sessionProperties = connectorQueryCtx->sessionProperties();
readerOptions.setLoadQuantum(hiveConfig->loadQuantum());
readerOptions.setMaxCoalesceBytes(hiveConfig->maxCoalescedBytes());
readerOptions.setMaxCoalesceDistance(hiveConfig->maxCoalescedDistanceBytes());
readerOptions.setMaxCoalesceDistance(
hiveConfig->maxCoalescedDistanceBytes(sessionProperties));
readerOptions.setFileColumnNamesReadAsLowerCase(
hiveConfig->isFileColumnNamesReadAsLowerCase(sessionProperties));
bool useColumnNamesForColumnMapping = false;
Expand Down
11 changes: 7 additions & 4 deletions velox/connectors/hive/tests/HiveConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ TEST(HiveConfigTest, defaultConfig) {
hiveConfig.isFileColumnNamesReadAsLowerCase(emptySession.get()), false);

ASSERT_EQ(hiveConfig.maxCoalescedBytes(), 128 << 20);
ASSERT_EQ(hiveConfig.maxCoalescedDistanceBytes(), 512 << 10);
ASSERT_EQ(
hiveConfig.maxCoalescedDistanceBytes(emptySession.get()), 512 << 10);
ASSERT_EQ(hiveConfig.numCacheFileHandles(), 20'000);
ASSERT_EQ(hiveConfig.isFileHandleCacheEnabled(), true);
ASSERT_EQ(
Expand Down Expand Up @@ -82,7 +83,7 @@ TEST(HiveConfigTest, overrideConfig) {
{HiveConfig::kFileColumnNamesReadAsLowerCase, "true"},
{HiveConfig::kAllowNullPartitionKeys, "false"},
{HiveConfig::kMaxCoalescedBytes, "100"},
{HiveConfig::kMaxCoalescedDistanceBytes, "100"},
{HiveConfig::kMaxCoalescedDistance, "100kB"},
{HiveConfig::kNumCacheFileHandles, "100"},
{HiveConfig::kEnableFileHandleCache, "false"},
{HiveConfig::kOrcWriterMaxStripeSize, "100MB"},
Expand Down Expand Up @@ -113,7 +114,8 @@ TEST(HiveConfigTest, overrideConfig) {
hiveConfig.isFileColumnNamesReadAsLowerCase(emptySession.get()), true);
ASSERT_EQ(hiveConfig.allowNullPartitionKeys(emptySession.get()), false);
ASSERT_EQ(hiveConfig.maxCoalescedBytes(), 100);
ASSERT_EQ(hiveConfig.maxCoalescedDistanceBytes(), 100);
ASSERT_EQ(
hiveConfig.maxCoalescedDistanceBytes(emptySession.get()), 100 << 10);
ASSERT_EQ(hiveConfig.numCacheFileHandles(), 100);
ASSERT_EQ(hiveConfig.isFileHandleCacheEnabled(), false);
ASSERT_EQ(
Expand Down Expand Up @@ -155,6 +157,7 @@ TEST(HiveConfigTest, overrideSession) {
{HiveConfig::kOrcWriterStringDictionaryEncodingEnabledSession, "false"},
{HiveConfig::kSortWriterMaxOutputRowsSession, "20"},
{HiveConfig::kSortWriterMaxOutputBytesSession, "20MB"},
{HiveConfig::kMaxCoalescedDistanceSession, "3MB"},
{HiveConfig::kSortWriterFinishTimeSliceLimitMsSession, "300"},
{HiveConfig::kPartitionPathAsLowerCaseSession, "false"},
{HiveConfig::kAllowNullPartitionKeysSession, "false"},
Expand All @@ -177,7 +180,7 @@ TEST(HiveConfigTest, overrideSession) {
ASSERT_EQ(hiveConfig.isFileColumnNamesReadAsLowerCase(session.get()), true);

ASSERT_EQ(hiveConfig.maxCoalescedBytes(), 128 << 20);
ASSERT_EQ(hiveConfig.maxCoalescedDistanceBytes(), 512 << 10);
ASSERT_EQ(hiveConfig.maxCoalescedDistanceBytes(session.get()), 3 << 20);
ASSERT_EQ(hiveConfig.numCacheFileHandles(), 20'000);
ASSERT_EQ(hiveConfig.isFileHandleCacheEnabled(), true);
ASSERT_EQ(
Expand Down
6 changes: 3 additions & 3 deletions velox/connectors/hive/tests/HiveConnectorUtilTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) {
EXPECT_EQ(readerOptions.maxCoalesceBytes(), hiveConfig->maxCoalescedBytes());
EXPECT_EQ(
readerOptions.maxCoalesceDistance(),
hiveConfig->maxCoalescedDistanceBytes());
hiveConfig->maxCoalescedDistanceBytes(&sessionProperties));
EXPECT_EQ(
readerOptions.fileColumnNamesReadAsLowerCase(),
hiveConfig->isFileColumnNamesReadAsLowerCase(&sessionProperties));
Expand Down Expand Up @@ -227,7 +227,7 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) {
std::unordered_map<std::string, std::string> customHiveConfigProps;
customHiveConfigProps[hive::HiveConfig::kLoadQuantum] = "321";
customHiveConfigProps[hive::HiveConfig::kMaxCoalescedBytes] = "129";
customHiveConfigProps[hive::HiveConfig::kMaxCoalescedDistanceBytes] = "513";
customHiveConfigProps[hive::HiveConfig::kMaxCoalescedDistance] = "513KB";
customHiveConfigProps[hive::HiveConfig::kFileColumnNamesReadAsLowerCase] =
"true";
customHiveConfigProps[hive::HiveConfig::kOrcUseColumnNames] = "true";
Expand All @@ -241,7 +241,7 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) {
EXPECT_EQ(readerOptions.maxCoalesceBytes(), hiveConfig->maxCoalescedBytes());
EXPECT_EQ(
readerOptions.maxCoalesceDistance(),
hiveConfig->maxCoalescedDistanceBytes());
hiveConfig->maxCoalescedDistanceBytes(&sessionProperties));
EXPECT_EQ(
readerOptions.fileColumnNamesReadAsLowerCase(),
hiveConfig->isFileColumnNamesReadAsLowerCase(&sessionProperties));
Expand Down
4 changes: 2 additions & 2 deletions velox/docs/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -464,11 +464,11 @@ Each query can override the config by setting corresponding query session proper
- integer
- 128MB
- Maximum size in bytes to coalesce requests to be fetched in a single request.
* - max-coalesced-distance-bytes
* - max-coalesced-distance
-
- integer
- 512KB
- Maximum distance in bytes between chunks to be fetched that may be coalesced into a single request.
- Maximum distance in capacity units between chunks to be fetched that may be coalesced into a single request.
* - load-quantum
-
- integer
Expand Down

0 comments on commit 4a4b4a0

Please sign in to comment.