diff --git a/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp b/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp index f3e5676004dc8..5f8ff8bb4aa22 100644 --- a/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/s3fs/S3FileSystem.cpp @@ -564,7 +564,9 @@ class S3FileSystem::Impl { } auto retryStrategy = getRetryStrategy(); - clientConfig.retryStrategy = retryStrategy; + if (retryStrategy.has_value()) { + clientConfig.retryStrategy = retryStrategy.value(); + } auto credentialsProvider = getCredentialsProvider(); @@ -645,7 +647,8 @@ class S3FileSystem::Impl { } // Return a client RetryStrategy based on the config. - std::shared_ptr getRetryStrategy() const { + std::optional> getRetryStrategy() + const { auto retryMode = hiveConfig_->s3RetryMode(); auto maxAttempts = hiveConfig_->s3MaxAttempts(); if (retryMode.has_value()) { @@ -671,10 +674,22 @@ class S3FileSystem::Impl { // Otherwise, use default value 3. return std::make_shared(); } + } else if (retryMode.value() == "legacy") { + if (maxAttempts.has_value()) { + VELOX_USER_CHECK( + (maxAttempts.value() > 0), + "Invalid configuration: specify 'max-attempts' > 0."); + return std::make_shared( + maxAttempts.value()); + } else { + // Otherwise, use default value maxRetries = 10, scaleFactor = 25 + return std::make_shared(); + } + } else { + VELOX_USER_FAIL("Invalid retry mode for S3: {}", retryMode.value()); } } - // By default use DefaultRetryStrategy. - return std::make_shared(); + return std::nullopt; } // Make it clear that the S3FileSystem instance owns the S3Client. diff --git a/velox/docs/configs.rst b/velox/docs/configs.rst index afd9695d10c70..0e8ba5a1ee6c2 100644 --- a/velox/docs/configs.rst +++ b/velox/docs/configs.rst @@ -539,11 +539,14 @@ Each query can override the config by setting corresponding query session proper * - hive.s3.max-attempts - integer - - - Maximum attempts for connections to a single http client. + - Maximum attempts for connections to a single http client, work together with retry-mode. By default, it's 3 for standard/adaptive mode + and 10 for legacy mode. * - hive.s3.retry-mode - string - - - 'standard' or 'adaptive', use DefaultRetryStrategy if it's empty. + - **Allowed values:** "standard", "adaptive", "legacy". By default use legacy mode, which only enables throttled retry for transient errors. + Standard mode is built on top of legacy mode and has throttled retry enabled for throttling errors apart from transient errors. + Adaptive retry mode dynamically limits the rate of AWS requests to maximize success rate. ``Google Cloud Storage Configuration`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. list-table::