diff --git a/velox/connectors/hive/HiveConfig.cpp b/velox/connectors/hive/HiveConfig.cpp index ae88c700f8034..3cac62cb5deb4 100644 --- a/velox/connectors/hive/HiveConfig.cpp +++ b/velox/connectors/hive/HiveConfig.cpp @@ -105,6 +105,35 @@ std::string HiveConfig::s3IAMRoleSessionName() const { return config_->get(kS3IamRoleSessionName, std::string("velox-session")); } +std::optional HiveConfig::s3ConnectTimeout() const { + auto duration = static_cast>( + config_->get(kS3ConnectTimeout)); + if (duration.has_value()) { + return std::chrono::duration_cast( + facebook::velox::core::toDuration(duration.value())) + .count(); + } else { + return std::nullopt; + } +} + +std::optional HiveConfig::s3SocketTimeout() const { + auto duration = static_cast>( + config_->get(kS3SocketTimeout)); + if (duration.has_value()) { + return std::chrono::duration_cast( + facebook::velox::core::toDuration(duration.value())) + .count(); + } else { + return std::nullopt; + } +} + +std::optional HiveConfig::s3MaxConnections() const { + return static_cast>( + config_->get(kS3MaxConnections)); +} + std::string HiveConfig::gcsEndpoint() const { return config_->get(kGCSEndpoint, std::string("")); } diff --git a/velox/connectors/hive/HiveConfig.h b/velox/connectors/hive/HiveConfig.h index e1b0a3d2a19c0..bb4b0f1394c14 100644 --- a/velox/connectors/hive/HiveConfig.h +++ b/velox/connectors/hive/HiveConfig.h @@ -88,6 +88,15 @@ class HiveConfig { static constexpr const char* kS3IamRoleSessionName = "hive.s3.iam-role-session-name"; + /// Socket connect timeout. + static constexpr const char* kS3ConnectTimeout = "hive.s3.connect-timeout"; + + /// Socket read timeout. + static constexpr const char* kS3SocketTimeout = "hive.s3.socket-timeout"; + + /// Maximum concurrent TCP connections for a single http client. + static constexpr const char* kS3MaxConnections = "hive.s3.max-connections"; + /// The GCS storage endpoint server. static constexpr const char* kGCSEndpoint = "hive.gcs.endpoint"; @@ -209,6 +218,12 @@ class HiveConfig { std::string s3IAMRoleSessionName() const; + std::optional s3ConnectTimeout() const; + + std::optional s3SocketTimeout() const; + + std::optional s3MaxConnections() const; + std::string gcsEndpoint() const; std::string gcsScheme() const; diff --git a/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp b/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp index ff838a03a64d2..987683c344593 100644 --- a/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp @@ -541,6 +541,18 @@ class S3FileSystem::Impl { clientConfig.scheme = Aws::Http::Scheme::HTTP; } + if (hiveConfig_->s3ConnectTimeout().has_value()) { + clientConfig.connectTimeoutMs = hiveConfig_->s3ConnectTimeout().value(); + } + + if (hiveConfig_->s3SocketTimeout().has_value()) { + clientConfig.requestTimeoutMs = hiveConfig_->s3SocketTimeout().value(); + } + + if (hiveConfig_->s3MaxConnections().has_value()) { + clientConfig.maxConnections = hiveConfig_->s3MaxConnections().value(); + } + auto credentialsProvider = getCredentialsProvider(); client_ = std::make_shared( diff --git a/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp b/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp index 2a04297970797..93de7e79e5153 100644 --- a/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp @@ -253,4 +253,13 @@ TEST_F(S3FileSystemTest, writeFileAndRead) { // Verify the last chunk. ASSERT_EQ(readFile->pread(contentSize * 250'000, contentSize), dataContent); } + +TEST_F(S3FileSystemTest, invalidConnectionSettings) { + auto hiveConfig = + minioServer_->hiveConfig({{"hive.s3.connect-timeout", "400"}}); + VELOX_ASSERT_THROW(filesystems::S3FileSystem(hiveConfig), "Invalid duration"); + + hiveConfig = minioServer_->hiveConfig({{"hive.s3.socket-timeout", "abc"}}); + VELOX_ASSERT_THROW(filesystems::S3FileSystem(hiveConfig), "Invalid duration"); +} } // namespace facebook::velox diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst index ec84cf808da1b..7991ddacc560c 100644 --- a/velox/docs/configs.rst +++ b/velox/docs/configs.rst @@ -524,6 +524,19 @@ Each query can override the config by setting corresponding query session proper - bool - false - Utilize the configuration of the environment variables http_proxy, https_proxy, and no_proxy for use with the S3 API. + * - hive.s3.connect-timeout + - string + - 1s + - Socket connect timeout. + * - hive.s3.socket-timeout + - string + - 3s + - Socket read timeout. + * - hive.s3.max-connections + - integer + - 25 + - Maximum concurrent TCP connections for a single http client. + ``Google Cloud Storage Configuration`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^