Skip to content

Commit

Permalink
expose more s3 connection configs
Browse files Browse the repository at this point in the history
  • Loading branch information
wills-feng committed Apr 13, 2024
1 parent f51e34d commit c6f4f9b
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 0 deletions.
18 changes: 18 additions & 0 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,24 @@ std::string HiveConfig::s3IAMRoleSessionName() const {
return config_->get(kS3IamRoleSessionName, std::string("velox-session"));
}

uint64_t HiveConfig::s3ConnectTimeout() const {
auto duration = facebook::velox::core::toDuration(
config_->get<std::string>(kS3ConnectTimeout, "5s"));
return std::chrono::duration_cast<std::chrono::milliseconds>(duration)
.count();
}

uint64_t HiveConfig::s3SocketTimeout() const {
auto duration = facebook::velox::core::toDuration(
config_->get<std::string>(kS3SocketTimeout, "5s"));
return std::chrono::duration_cast<std::chrono::milliseconds>(duration)
.count();
}

uint32_t HiveConfig::s3MaxConnections() const {
return config_->get(kS3MaxConnections, 500);
}

std::string HiveConfig::gcsEndpoint() const {
return config_->get<std::string>(kGCSEndpoint, std::string(""));
}
Expand Down
15 changes: 15 additions & 0 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ class HiveConfig {
static constexpr const char* kS3IamRoleSessionName =
"hive.s3.iam-role-session-name";

/// TCP connect timeout.
static constexpr const char* kS3ConnectTimeout = "hive.s3.connect-timeout";

/// TCP socket read timeout.
static constexpr const char* kS3SocketTimeout = "hive.s3.socket-timeout";

/// Maximum concurrent TCP connections for a single http client.
static constexpr const char* kS3MaxConnections = "hive.s3.max-connections";

/// The GCS storage endpoint server.
static constexpr const char* kGCSEndpoint = "hive.gcs.endpoint";

Expand Down Expand Up @@ -209,6 +218,12 @@ class HiveConfig {

std::string s3IAMRoleSessionName() const;

uint64_t s3ConnectTimeout() const;

uint64_t s3SocketTimeout() const;

uint32_t s3MaxConnections() const;

std::string gcsEndpoint() const;

std::string gcsScheme() const;
Expand Down
4 changes: 4 additions & 0 deletions velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,10 @@ class S3FileSystem::Impl {
clientConfig.scheme = Aws::Http::Scheme::HTTP;
}

clientConfig.connectTimeoutMs = hiveConfig_->s3ConnectTimeout();
clientConfig.requestTimeoutMs = hiveConfig_->s3SocketTimeout();
clientConfig.maxConnections = hiveConfig_->s3MaxConnections();

auto credentialsProvider = getCredentialsProvider();

client_ = std::make_shared<Aws::S3::S3Client>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,4 +253,21 @@ TEST_F(S3FileSystemTest, writeFileAndRead) {
// Verify the last chunk.
ASSERT_EQ(readFile->pread(contentSize * 250'000, contentSize), dataContent);
}

TEST_F(S3FileSystemTest, connectionSettings) {
auto hiveConfig = minioServer_->hiveConfig(
{{"hive.s3.connect-timeout", "10s"},
{"hive.s3.socket-timeout", "500ms"},
{"hive.s3.max-connections", "400"}});
filesystems::S3FileSystem s3fs(hiveConfig);
}

TEST_F(S3FileSystemTest, invalidConnectionSettings) {
auto hiveConfig =
minioServer_->hiveConfig({{"hive.s3.connect-timeout", "400"}});
VELOX_ASSERT_THROW(filesystems::S3FileSystem(hiveConfig), "Invalid duration");

hiveConfig = minioServer_->hiveConfig({{"hive.s3.socket-timeout", "abc"}});
VELOX_ASSERT_THROW(filesystems::S3FileSystem(hiveConfig), "Invalid duration");
}
} // namespace facebook::velox
13 changes: 13 additions & 0 deletions velox/docs/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,19 @@ Each query can override the config by setting corresponding query session proper
- bool
- false
- Utilize the configuration of the environment variables http_proxy, https_proxy, and no_proxy for use with the S3 API.
* - hive.s3.connect-timeout
- string
- 5s
- TCP connect timeout.
* - hive.s3.socket-timeout
- string
- 5s
- TCP socket read timeout.
* - hive.s3.max-connections
- integer
- 500
- Maximum concurrent TCP connections for a single http client.


``Google Cloud Storage Configuration``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down

0 comments on commit c6f4f9b

Please sign in to comment.