Skip to content

Commit

Permalink
Add s3 connection settings
Browse files Browse the repository at this point in the history
  • Loading branch information
wills-feng committed Apr 17, 2024
1 parent f51e34d commit 7a196db
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 0 deletions.
29 changes: 29 additions & 0 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,35 @@ std::string HiveConfig::s3IAMRoleSessionName() const {
return config_->get(kS3IamRoleSessionName, std::string("velox-session"));
}

std::optional<int64_t> HiveConfig::s3ConnectTimeout() const {
auto duration = static_cast<std::optional<std::string>>(
config_->get<std::string>(kS3ConnectTimeout));
if (duration.has_value()) {
return std::chrono::duration_cast<std::chrono::milliseconds>(
facebook::velox::core::toDuration(duration.value()))
.count();
} else {
return std::nullopt;
}
}

std::optional<int64_t> HiveConfig::s3SocketTimeout() const {
auto duration = static_cast<std::optional<std::string>>(
config_->get<std::string>(kS3SocketTimeout));
if (duration.has_value()) {
return std::chrono::duration_cast<std::chrono::milliseconds>(
facebook::velox::core::toDuration(duration.value()))
.count();
} else {
return std::nullopt;
}
}

std::optional<uint32_t> HiveConfig::s3MaxConnections() const {
return static_cast<std::optional<std::uint32_t>>(
config_->get<uint32_t>(kS3MaxConnections));
}

std::string HiveConfig::gcsEndpoint() const {
return config_->get<std::string>(kGCSEndpoint, std::string(""));
}
Expand Down
15 changes: 15 additions & 0 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ class HiveConfig {
static constexpr const char* kS3IamRoleSessionName =
"hive.s3.iam-role-session-name";

/// Socket connect timeout.
static constexpr const char* kS3ConnectTimeout = "hive.s3.connect-timeout";

/// Socket read timeout.
static constexpr const char* kS3SocketTimeout = "hive.s3.socket-timeout";

/// Maximum concurrent TCP connections for a single http client.
static constexpr const char* kS3MaxConnections = "hive.s3.max-connections";

/// The GCS storage endpoint server.
static constexpr const char* kGCSEndpoint = "hive.gcs.endpoint";

Expand Down Expand Up @@ -209,6 +218,12 @@ class HiveConfig {

std::string s3IAMRoleSessionName() const;

std::optional<int64_t> s3ConnectTimeout() const;

std::optional<int64_t> s3SocketTimeout() const;

std::optional<uint32_t> s3MaxConnections() const;

std::string gcsEndpoint() const;

std::string gcsScheme() const;
Expand Down
12 changes: 12 additions & 0 deletions velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,18 @@ class S3FileSystem::Impl {
clientConfig.scheme = Aws::Http::Scheme::HTTP;
}

if (hiveConfig_->s3ConnectTimeout().has_value()) {
clientConfig.connectTimeoutMs = hiveConfig_->s3ConnectTimeout().value();
}

if (hiveConfig_->s3SocketTimeout().has_value()) {
clientConfig.requestTimeoutMs = hiveConfig_->s3SocketTimeout().value();
}

if (hiveConfig_->s3MaxConnections().has_value()) {
clientConfig.maxConnections = hiveConfig_->s3MaxConnections().value();
}

auto credentialsProvider = getCredentialsProvider();

client_ = std::make_shared<Aws::S3::S3Client>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,4 +253,13 @@ TEST_F(S3FileSystemTest, writeFileAndRead) {
// Verify the last chunk.
ASSERT_EQ(readFile->pread(contentSize * 250'000, contentSize), dataContent);
}

TEST_F(S3FileSystemTest, invalidConnectionSettings) {
auto hiveConfig =
minioServer_->hiveConfig({{"hive.s3.connect-timeout", "400"}});
VELOX_ASSERT_THROW(filesystems::S3FileSystem(hiveConfig), "Invalid duration");

hiveConfig = minioServer_->hiveConfig({{"hive.s3.socket-timeout", "abc"}});
VELOX_ASSERT_THROW(filesystems::S3FileSystem(hiveConfig), "Invalid duration");
}
} // namespace facebook::velox
13 changes: 13 additions & 0 deletions velox/docs/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,19 @@ Each query can override the config by setting corresponding query session proper
- bool
- false
- Utilize the configuration of the environment variables http_proxy, https_proxy, and no_proxy for use with the S3 API.
* - hive.s3.connect-timeout
- string
-
- Socket connect timeout.
* - hive.s3.socket-timeout
- string
-
- Socket read timeout.
* - hive.s3.max-connections
- integer
-
- Maximum concurrent TCP connections for a single http client.


``Google Cloud Storage Configuration``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Expand Down

0 comments on commit 7a196db

Please sign in to comment.