From 726dc1a11283be6e811ab8f851ea32886f3e7e53 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Thu, 19 Dec 2024 14:19:13 +0100 Subject: [PATCH 01/15] AdaptiveRetryConfig --- .../ox/resilience/AdaptiveRetryConfig.scala | 80 +++++++++++ .../scala/ox/resilience/ResultPolicy.scala | 2 +- .../scala/ox/resilience/RetryConfig.scala | 127 +---------------- .../ox/resilience/StandardRetryConfig.scala | 128 ++++++++++++++++++ .../scala/ox/resilience/TokenBucket.scala | 16 +++ core/src/main/scala/ox/resilience/retry.scala | 15 +- .../ox/resilience/BackoffRetryTest.scala | 11 +- .../ox/resilience/DelayedRetryTest.scala | 6 +- .../ox/resilience/ImmediateRetryTest.scala | 55 ++++++-- .../scala/ox/resilience/OnRetryTest.scala | 4 +- .../ScheduleFallingBackRetryTest.scala | 4 +- generated-doc/out/utils/repeat.md | 4 +- generated-doc/out/utils/retries.md | 24 ++-- 13 files changed, 309 insertions(+), 167 deletions(-) create mode 100644 core/src/main/scala/ox/resilience/AdaptiveRetryConfig.scala create mode 100644 core/src/main/scala/ox/resilience/StandardRetryConfig.scala create mode 100644 core/src/main/scala/ox/resilience/TokenBucket.scala diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetryConfig.scala b/core/src/main/scala/ox/resilience/AdaptiveRetryConfig.scala new file mode 100644 index 00000000..6ef9682b --- /dev/null +++ b/core/src/main/scala/ox/resilience/AdaptiveRetryConfig.scala @@ -0,0 +1,80 @@ +package ox.resilience + +import ox.scheduling.{Schedule, ScheduledConfig, SleepMode} + +/** A config that defines how to retry a failed operation. + * + * It is a special case of [[ScheduledConfig]] with [[ScheduledConfig.sleepMode]] always set to [[SleepMode.Delay]]. It uses token bucket + * to determine if operation should be retried. Tokens are taken for every failure and returned on every successful operation, so in case + * of system failure client does not flood service with retry request. + * + * @param schedule + * The retry schedule which determines the maximum number of retries and the delay between subsequent attempts to execute the operation. + * See [[Schedule]] for more details. + * @param resultPolicy + * A policy that allows to customize when a non-erroneous result is considered successful and when an error is worth retrying (which + * allows for failing fast on certain errors). See [[ResultPolicy]] for more details. + * @param onRetry + * A function that is invoked after each retry attempt. The callback receives the number of the current retry attempt (starting from 1) + * and the result of the operation that was attempted. The result is either a successful value or an error. The callback can be used to + * log information about the retry attempts, or to perform other side effects. By default, the callback does nothing. + * @param tokenBucket + * Token bucket which backs up adaptive circuit breaker. If bucket is empty, there will be no more retries. Bucket can be provided by + * user and shared with different [[AdaptiveRetryConfig]] + * @param bucketSize + * Size of [[TokenBucket]]. Will be ignored if [[tokenBucket]] is provided. + * @param onFailureCost + * Cost of tokens for every failure. It is also number of token added to the bucket for successful operation. + * @tparam E + * The error type of the operation. For operations returning a `T` or a `Try[T]`, this is fixed to `Throwable`. For operations returning + * an `Either[E, T]`, this can be any `E`. + * @tparam T + * The successful result type for the operation. + */ +case class AdaptiveRetryConfig[E, T]( + schedule: Schedule, + resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T], + onRetry: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => (), + tokenBucket: Option[TokenBucket] = None, + bucketSize: Int = 100, + onFailureCost: Int = 1 +) extends RetryConfig[E, T]: + def toScheduledConfig: ScheduledConfig[E, T] = + val bucket = tokenBucket.getOrElse(TokenBucket(bucketSize)) + def shouldContinueOnError(e: E): Boolean = + // if we cannot acquire token we short circuit and stop retrying + bucket.tryAcquire(onFailureCost) && resultPolicy.isWorthRetrying(e) + + def shouldContinueOnResult(result: T): Boolean = + // if we consider this result as success token are given back to bucket + if resultPolicy.isSuccess(result) then + bucket.release(onFailureCost) + false + else true + + ScheduledConfig( + schedule, + onRetry, + shouldContinueOnError = shouldContinueOnError, + shouldContinueOnResult = shouldContinueOnResult, + sleepMode = SleepMode.Delay + ) + end toScheduledConfig +end AdaptiveRetryConfig + +object AdaptiveRetryConfig: + + /** Creates a config that retries up to a given number of times if there are enough token in the bucket, with no delay between subsequent + * attempts, using a default [[ResultPolicy]]. + * + * This is a shorthand for {{{AdaptiveRetryConfig(Schedule.Immediate(maxRetries))}}} + * + * @param maxRetries + * The maximum number of retries. + */ + def immediate[E, T](maxRetries: Int, bucketSize: Int = 100): RetryConfig[E, T] = + AdaptiveRetryConfig( + Schedule.Immediate(maxRetries), + bucketSize = bucketSize + ) +end AdaptiveRetryConfig diff --git a/core/src/main/scala/ox/resilience/ResultPolicy.scala b/core/src/main/scala/ox/resilience/ResultPolicy.scala index 6e44f11c..d230b735 100644 --- a/core/src/main/scala/ox/resilience/ResultPolicy.scala +++ b/core/src/main/scala/ox/resilience/ResultPolicy.scala @@ -29,7 +29,7 @@ object ResultPolicy: /** A policy that customizes which errors are retried, and considers every non-erroneous result successful * @param isWorthRetrying - * A predicate that indicates whether an erroneous result should be retried.. + * A predicate that indicates whether an erroneous result should be retried. */ def retryWhen[E, T](isWorthRetrying: E => Boolean): ResultPolicy[E, T] = ResultPolicy(isWorthRetrying = isWorthRetrying) diff --git a/core/src/main/scala/ox/resilience/RetryConfig.scala b/core/src/main/scala/ox/resilience/RetryConfig.scala index e07cd1b7..c8b11330 100644 --- a/core/src/main/scala/ox/resilience/RetryConfig.scala +++ b/core/src/main/scala/ox/resilience/RetryConfig.scala @@ -1,127 +1,6 @@ package ox.resilience -import ox.scheduling.{SleepMode, Jitter, Schedule, ScheduledConfig} +import ox.scheduling.ScheduledConfig -import scala.concurrent.duration.* - -/** A config that defines how to retry a failed operation. - * - * It is a special case of [[ScheduledConfig]] with [[ScheduledConfig.sleepMode]] always set to [[SleepMode.Delay]] - * - * @param schedule - * The retry schedule which determines the maximum number of retries and the delay between subsequent attempts to execute the operation. - * See [[Schedule]] for more details. - * @param resultPolicy - * A policy that allows to customize when a non-erroneous result is considered successful and when an error is worth retrying (which - * allows for failing fast on certain errors). See [[ResultPolicy]] for more details. - * @param onRetry - * A function that is invoked after each retry attempt. The callback receives the number of the current retry attempt (starting from 1) - * and the result of the operation that was attempted. The result is either a successful value or an error. The callback can be used to - * log information about the retry attempts, or to perform other side effects. By default, the callback does nothing. - * @tparam E - * The error type of the operation. For operations returning a `T` or a `Try[T]`, this is fixed to `Throwable`. For operations returning - * an `Either[E, T]`, this can be any `E`. - * @tparam T - * The successful result type for the operation. - */ -case class RetryConfig[E, T]( - schedule: Schedule, - resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T], - onRetry: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => () -): - def toScheduledConfig: ScheduledConfig[E, T] = ScheduledConfig( - schedule, - onRetry, - shouldContinueOnError = resultPolicy.isWorthRetrying, - shouldContinueOnResult = t => !resultPolicy.isSuccess(t), - sleepMode = SleepMode.Delay - ) -end RetryConfig - -object RetryConfig: - /** Creates a config that retries up to a given number of times, with no delay between subsequent attempts, using a default - * [[ResultPolicy]]. - * - * This is a shorthand for {{{RetryConfig(Schedule.Immediate(maxRetries))}}} - * - * @param maxRetries - * The maximum number of retries. - */ - def immediate[E, T](maxRetries: Int): RetryConfig[E, T] = RetryConfig(Schedule.Immediate(maxRetries)) - - /** Creates a config that retries indefinitely, with no delay between subsequent attempts, using a default [[ResultPolicy]]. - * - * This is a shorthand for {{{RetryConfig(Schedule.Immediate.forever)}}} - */ - def immediateForever[E, T]: RetryConfig[E, T] = RetryConfig(Schedule.Immediate.forever) - - /** Creates a config that retries up to a given number of times, with a fixed delay between subsequent attempts, using a default - * [[ResultPolicy]]. - * - * This is a shorthand for {{{RetryConfig(Schedule.Delay(maxRetries, delay))}}} - * - * @param maxRetries - * The maximum number of retries. - * @param delay - * The delay between subsequent attempts. - */ - def delay[E, T](maxRetries: Int, delay: FiniteDuration): RetryConfig[E, T] = RetryConfig(Schedule.Fixed(maxRetries, delay)) - - /** Creates a config that retries indefinitely, with a fixed delay between subsequent attempts, using a default [[ResultPolicy]]. - * - * This is a shorthand for {{{RetryConfig(Schedule.Delay.forever(delay))}}} - * - * @param delay - * The delay between subsequent attempts. - */ - def delayForever[E, T](delay: FiniteDuration): RetryConfig[E, T] = RetryConfig(Schedule.Fixed.forever(delay)) - - /** Creates a config that retries up to a given number of times, with an increasing delay (backoff) between subsequent attempts, using a - * default [[ResultPolicy]]. - * - * The backoff is exponential with base 2 (i.e. the next delay is twice as long as the previous one), starting at the given initial delay - * and capped at the given maximum delay. - * - * This is a shorthand for {{{RetryConfig(Schedule.Backoff(maxRetries, initialDelay, maxDelay, jitter))}}} - * - * @param maxRetries - * The maximum number of retries. - * @param initialDelay - * The delay before the first retry. - * @param maxDelay - * The maximum delay between subsequent retries. Defaults to 1 minute. - * @param jitter - * A random factor used for calculating the delay between subsequent retries. See [[Jitter]] for more details. Defaults to no jitter, - * i.e. an exponential backoff with no adjustments. - */ - def backoff[E, T]( - maxRetries: Int, - initialDelay: FiniteDuration, - maxDelay: FiniteDuration = 1.minute, - jitter: Jitter = Jitter.None - ): RetryConfig[E, T] = - RetryConfig(Schedule.Backoff(maxRetries, initialDelay, maxDelay, jitter)) - - /** Creates a config that retries indefinitely, with an increasing delay (backoff) between subsequent attempts, using a default - * [[ResultPolicy]]. - * - * The backoff is exponential with base 2 (i.e. the next delay is twice as long as the previous one), starting at the given initial delay - * and capped at the given maximum delay. - * - * This is a shorthand for {{{RetryConfig(Schedule.Backoff.forever(initialDelay, maxDelay, jitter))}}} - * - * @param initialDelay - * The delay before the first retry. - * @param maxDelay - * The maximum delay between subsequent retries. Defaults to 1 minute. - * @param jitter - * A random factor used for calculating the delay between subsequent retries. See [[Jitter]] for more details. Defaults to no jitter, - * i.e. an exponential backoff with no adjustments. - */ - def backoffForever[E, T]( - initialDelay: FiniteDuration, - maxDelay: FiniteDuration = 1.minute, - jitter: Jitter = Jitter.None - ): RetryConfig[E, T] = - RetryConfig(Schedule.Backoff.forever(initialDelay, maxDelay, jitter)) -end RetryConfig +trait RetryConfig[E, T]: + def toScheduledConfig: ScheduledConfig[E, T] diff --git a/core/src/main/scala/ox/resilience/StandardRetryConfig.scala b/core/src/main/scala/ox/resilience/StandardRetryConfig.scala new file mode 100644 index 00000000..2b5322c1 --- /dev/null +++ b/core/src/main/scala/ox/resilience/StandardRetryConfig.scala @@ -0,0 +1,128 @@ +package ox.resilience + +import ox.scheduling.{Jitter, Schedule, ScheduledConfig, SleepMode} +import scala.concurrent.duration.* + +/** A config that defines how to retry a failed operation. + * + * It is a special case of [[ScheduledConfig]] with [[ScheduledConfig.sleepMode]] always set to [[SleepMode.Delay]] + * + * @param schedule + * The retry schedule which determines the maximum number of retries and the delay between subsequent attempts to execute the operation. + * See [[Schedule]] for more details. + * @param resultPolicy + * A policy that allows to customize when a non-erroneous result is considered successful and when an error is worth retrying (which + * allows for failing fast on certain errors). See [[ResultPolicy]] for more details. + * @param onRetry + * A function that is invoked after each retry attempt. The callback receives the number of the current retry attempt (starting from 1) + * and the result of the operation that was attempted. The result is either a successful value or an error. The callback can be used to + * log information about the retry attempts, or to perform other side effects. By default, the callback does nothing. + * @tparam E + * The error type of the operation. For operations returning a `T` or a `Try[T]`, this is fixed to `Throwable`. For operations returning + * an `Either[E, T]`, this can be any `E`. + * @tparam T + * The successful result type for the operation. + */ +case class StandardRetryConfig[E, T]( + schedule: Schedule, + resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T], + onRetry: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => () +) extends RetryConfig[E, T]: + def toScheduledConfig: ScheduledConfig[E, T] = ScheduledConfig( + schedule, + onRetry, + shouldContinueOnError = resultPolicy.isWorthRetrying, + shouldContinueOnResult = t => !resultPolicy.isSuccess(t), + sleepMode = SleepMode.Delay + ) +end StandardRetryConfig + +object StandardRetryConfig: + /** Creates a config that retries up to a given number of times, with no delay between subsequent attempts, using a default + * [[ResultPolicy]]. + * + * This is a shorthand for {{{RetryConfig(Schedule.Immediate(maxRetries))}}} + * + * @param maxRetries + * The maximum number of retries. + */ + def immediate[E, T](maxRetries: Int): StandardRetryConfig[E, T] = StandardRetryConfig(Schedule.Immediate(maxRetries)) + + /** Creates a config that retries indefinitely, with no delay between subsequent attempts, using a default [[ResultPolicy]]. + * + * This is a shorthand for {{{RetryConfig(Schedule.Immediate.forever)}}} + */ + def immediateForever[E, T]: StandardRetryConfig[E, T] = StandardRetryConfig(Schedule.Immediate.forever) + + /** Creates a config that retries up to a given number of times, with a fixed delay between subsequent attempts, using a default + * [[ResultPolicy]]. + * + * This is a shorthand for {{{RetryConfig(Schedule.Delay(maxRetries, delay))}}} + * + * @param maxRetries + * The maximum number of retries. + * @param delay + * The delay between subsequent attempts. + */ + def delay[E, T](maxRetries: Int, delay: FiniteDuration): StandardRetryConfig[E, T] = StandardRetryConfig( + Schedule.Fixed(maxRetries, delay) + ) + + /** Creates a config that retries indefinitely, with a fixed delay between subsequent attempts, using a default [[ResultPolicy]]. + * + * This is a shorthand for {{{RetryConfig(Schedule.Delay.forever(delay))}}} + * + * @param delay + * The delay between subsequent attempts. + */ + def delayForever[E, T](delay: FiniteDuration): StandardRetryConfig[E, T] = StandardRetryConfig(Schedule.Fixed.forever(delay)) + + /** Creates a config that retries up to a given number of times, with an increasing delay (backoff) between subsequent attempts, using a + * default [[ResultPolicy]]. + * + * The backoff is exponential with base 2 (i.e. the next delay is twice as long as the previous one), starting at the given initial delay + * and capped at the given maximum delay. + * + * This is a shorthand for {{{RetryConfig(Schedule.Backoff(maxRetries, initialDelay, maxDelay, jitter))}}} + * + * @param maxRetries + * The maximum number of retries. + * @param initialDelay + * The delay before the first retry. + * @param maxDelay + * The maximum delay between subsequent retries. Defaults to 1 minute. + * @param jitter + * A random factor used for calculating the delay between subsequent retries. See [[Jitter]] for more details. Defaults to no jitter, + * i.e. an exponential backoff with no adjustments. + */ + def backoff[E, T]( + maxRetries: Int, + initialDelay: FiniteDuration, + maxDelay: FiniteDuration = 1.minute, + jitter: Jitter = Jitter.None + ): StandardRetryConfig[E, T] = + StandardRetryConfig(Schedule.Backoff(maxRetries, initialDelay, maxDelay, jitter)) + + /** Creates a config that retries indefinitely, with an increasing delay (backoff) between subsequent attempts, using a default + * [[ResultPolicy]]. + * + * The backoff is exponential with base 2 (i.e. the next delay is twice as long as the previous one), starting at the given initial delay + * and capped at the given maximum delay. + * + * This is a shorthand for {{{RetryConfig(Schedule.Backoff.forever(initialDelay, maxDelay, jitter))}}} + * + * @param initialDelay + * The delay before the first retry. + * @param maxDelay + * The maximum delay between subsequent retries. Defaults to 1 minute. + * @param jitter + * A random factor used for calculating the delay between subsequent retries. See [[Jitter]] for more details. Defaults to no jitter, + * i.e. an exponential backoff with no adjustments. + */ + def backoffForever[E, T]( + initialDelay: FiniteDuration, + maxDelay: FiniteDuration = 1.minute, + jitter: Jitter = Jitter.None + ): StandardRetryConfig[E, T] = + StandardRetryConfig(Schedule.Backoff.forever(initialDelay, maxDelay, jitter)) +end StandardRetryConfig diff --git a/core/src/main/scala/ox/resilience/TokenBucket.scala b/core/src/main/scala/ox/resilience/TokenBucket.scala new file mode 100644 index 00000000..8d334f73 --- /dev/null +++ b/core/src/main/scala/ox/resilience/TokenBucket.scala @@ -0,0 +1,16 @@ +package ox.resilience + +import java.util.concurrent.Semaphore + +case class TokenBucket(bucketSize: Int): + private val semaphore = Semaphore(bucketSize) + + def tryAcquire(permits: Int): Boolean = + semaphore.tryAcquire(permits) + + def release(permits: Int): Unit = + val availablePermits = semaphore.availablePermits() + val toRelease = if availablePermits + permits >= bucketSize then bucketSize - availablePermits else permits + semaphore.release(toRelease) + +end TokenBucket diff --git a/core/src/main/scala/ox/resilience/retry.scala b/core/src/main/scala/ox/resilience/retry.scala index 8e35d6f7..b93a7ff3 100644 --- a/core/src/main/scala/ox/resilience/retry.scala +++ b/core/src/main/scala/ox/resilience/retry.scala @@ -7,10 +7,10 @@ import scala.util.Try /** Retries an operation returning a direct result until it succeeds or the config decides to stop. * - * [[retry]] is a special case of [[scheduled]] with a given set of defaults. See [[RetryConfig]] for more details. + * [[retry]] is a special case of [[scheduled]] with a given set of defaults. See implementations of[[RetryConfig]] for more details. * * @param config - * The retry config - see [[RetryConfig]]. + * The retry config - see implementations of [[RetryConfig]]. * @param operation * The operation to retry. * @return @@ -26,10 +26,11 @@ def retry[T](config: RetryConfig[Throwable, T])(operation: => T): T = /** Retries an operation returning an [[scala.util.Either]] until it succeeds or the config decides to stop. Note that any exceptions thrown * by the operation aren't caught and don't cause a retry to happen. * - * [[retryEither]] is a special case of [[scheduledEither]] with a given set of defaults. See [[RetryConfig]] for more details. + * [[retryEither]] is a special case of [[scheduledEither]] with a given set of defaults. See implementations of [[RetryConfig]] for more + * details. * * @param config - * The retry config - see [[RetryConfig]]. + * The retry config - see implementations of [[RetryConfig]]. * @param operation * The operation to retry. * @return @@ -43,13 +44,13 @@ def retryEither[E, T](config: RetryConfig[E, T])(operation: => Either[E, T]): Ei /** Retries an operation using the given error mode until it succeeds or the config decides to stop. Note that any exceptions thrown by the * operation aren't caught (unless the operation catches them as part of its implementation) and don't cause a retry to happen. * - * [[retryWithErrorMode]] is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See [[RetryConfig]] for more - * details. + * [[retryWithErrorMode]] is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See implementations of + * [[RetryConfig]] for more details. * * @param em * The error mode to use, which specifies when a result value is considered success, and when a failure. * @param config - * The retry config - see [[RetryConfig]]. + * The retry config - See implementations of [[RetryConfig]]. * @param operation * The operation to retry. * @return diff --git a/core/src/test/scala/ox/resilience/BackoffRetryTest.scala b/core/src/test/scala/ox/resilience/BackoffRetryTest.scala index 15ecf345..8310083c 100644 --- a/core/src/test/scala/ox/resilience/BackoffRetryTest.scala +++ b/core/src/test/scala/ox/resilience/BackoffRetryTest.scala @@ -23,7 +23,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with if true then throw new RuntimeException("boom") // when - val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(RetryConfig.backoff(maxRetries, initialDelay))(f)) + val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(StandardRetryConfig.backoff(maxRetries, initialDelay))(f)) // then result should have message "boom" @@ -43,7 +43,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with if counter <= retriesUntilSuccess then throw new RuntimeException("boom") else successfulResult // when - val result = retry(RetryConfig.backoffForever(initialDelay, maxDelay = 2.millis))(f) + val result = retry(StandardRetryConfig.backoffForever(initialDelay, maxDelay = 2.millis))(f) // then result shouldBe successfulResult @@ -61,7 +61,8 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with if true then throw new RuntimeException("boom") // when - val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(RetryConfig.backoff(maxRetries, initialDelay, maxDelay))(f)) + val (result, elapsedTime) = + measure(the[RuntimeException] thrownBy retry(StandardRetryConfig.backoff(maxRetries, initialDelay, maxDelay))(f)) // then result should have message "boom" @@ -82,7 +83,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with // when val (result, elapsedTime) = - measure(the[RuntimeException] thrownBy retry(RetryConfig.backoff(maxRetries, initialDelay, maxDelay, Jitter.Equal))(f)) + measure(the[RuntimeException] thrownBy retry(StandardRetryConfig.backoff(maxRetries, initialDelay, maxDelay, Jitter.Equal))(f)) // then result should have message "boom" @@ -103,7 +104,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with Left(errorMessage) // when - val (result, elapsedTime) = measure(retryEither(RetryConfig.backoff(maxRetries, initialDelay))(f)) + val (result, elapsedTime) = measure(retryEither(StandardRetryConfig.backoff(maxRetries, initialDelay))(f)) // then result.left.value shouldBe errorMessage diff --git a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala index 1b41257a..18273073 100644 --- a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala +++ b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala @@ -22,7 +22,7 @@ class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with if true then throw new RuntimeException("boom") // when - val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(RetryConfig.delay(maxRetries, sleep))(f)) + val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(StandardRetryConfig.delay(maxRetries, sleep))(f)) // then result should have message "boom" @@ -42,7 +42,7 @@ class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with if counter <= retriesUntilSuccess then throw new RuntimeException("boom") else successfulResult // when - val result = retry(RetryConfig.delayForever(sleep))(f) + val result = retry(StandardRetryConfig.delayForever(sleep))(f) // then result shouldBe successfulResult @@ -61,7 +61,7 @@ class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with Left(errorMessage) // when - val (result, elapsedTime) = measure(retryEither(RetryConfig.delay(maxRetries, sleep))(f)) + val (result, elapsedTime) = measure(retryEither(StandardRetryConfig.delay(maxRetries, sleep))(f)) // then result.left.value shouldBe errorMessage diff --git a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala index 7e775a06..ded1b60a 100644 --- a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala +++ b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala @@ -20,7 +20,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi successfulResult // when - val result = retry(RetryConfig.immediate(3))(f) + val result = retry(StandardRetryConfig.immediate(3))(f) // then result shouldBe successfulResult @@ -31,7 +31,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi // given var counter = 0 val errorMessage = "boom" - val policy = RetryConfig[Throwable, Unit](Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != errorMessage)) + val policy = StandardRetryConfig[Throwable, Unit](Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != errorMessage)) def f = counter += 1 @@ -46,7 +46,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi // given var counter = 0 val unsuccessfulResult = -1 - val policy = RetryConfig[Throwable, Int](Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)) + val policy = StandardRetryConfig[Throwable, Int](Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)) def f = counter += 1 @@ -70,7 +70,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi if true then throw new RuntimeException(errorMessage) // when/then - the[RuntimeException] thrownBy retry(RetryConfig.immediate(3))(f) should have message errorMessage + the[RuntimeException] thrownBy retry(StandardRetryConfig.immediate(3))(f) should have message errorMessage counter shouldBe 4 } @@ -85,7 +85,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi if counter <= retriesUntilSuccess then throw new RuntimeException("boom") else successfulResult // when - val result = retry(RetryConfig.immediateForever)(f) + val result = retry(StandardRetryConfig.immediateForever)(f) // then result shouldBe successfulResult @@ -102,7 +102,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi Right(successfulResult) // when - val result = retryEither(RetryConfig.immediate(3))(f) + val result = retryEither(StandardRetryConfig.immediate(3))(f) // then result.value shouldBe successfulResult @@ -113,7 +113,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi // given var counter = 0 val errorMessage = "boom" - val policy: RetryConfig[String, Int] = RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != errorMessage)) + val policy: StandardRetryConfig[String, Int] = StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != errorMessage)) def f: Either[String, Int] = counter += 1 @@ -131,7 +131,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi // given var counter = 0 val unsuccessfulResult = -1 - val policy: RetryConfig[String, Int] = RetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)) + val policy: StandardRetryConfig[String, Int] = StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)) def f: Either[String, Int] = counter += 1 @@ -155,10 +155,47 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi Left(errorMessage) // when - val result = retryEither(RetryConfig.immediate(3))(f) + val result = retryEither(StandardRetryConfig.immediate(3))(f) // then result.left.value shouldBe errorMessage counter shouldBe 4 } + + it should "retry a failing adaptive" in { + // given + var counter = 0 + val errorMessage = "boom" + + def f = + counter += 1 + if counter <= 2 then Left(errorMessage) + else Right("Success") + + // when + val result = retryEither(AdaptiveRetryConfig.immediate(5, 3))(f) + + // then + result.value shouldBe "Success" + counter shouldBe 3 + } + + it should "retry a failing adaptive 2" in { + // given + var counter = 0 + val errorMessage = "boom" + + def f = + counter += 1 + Left(errorMessage) + + // when + val result = retryEither(AdaptiveRetryConfig.immediate(2, 5))(f) + + // then + result.left.value shouldBe errorMessage + // One for first try, two for retries with bucket size 2 + counter shouldBe 3 + } + end ImmediateRetryTest diff --git a/core/src/test/scala/ox/resilience/OnRetryTest.scala b/core/src/test/scala/ox/resilience/OnRetryTest.scala index 900a60fe..31a2a99b 100644 --- a/core/src/test/scala/ox/resilience/OnRetryTest.scala +++ b/core/src/test/scala/ox/resilience/OnRetryTest.scala @@ -26,7 +26,7 @@ class OnRetryTest extends AnyFlatSpec with Matchers with EitherValues with TryVa returnedResult = result // when - val result = retry(RetryConfig(Schedule.Immediate(3), onRetry = onRetry))(f) + val result = retry(StandardRetryConfig(Schedule.Immediate(3), onRetry = onRetry))(f) // then result shouldBe successfulResult @@ -53,7 +53,7 @@ class OnRetryTest extends AnyFlatSpec with Matchers with EitherValues with TryVa returnedResult = result // when - val result = the[RuntimeException] thrownBy retry(RetryConfig(Schedule.Immediate(3), onRetry = onRetry))(f) + val result = the[RuntimeException] thrownBy retry(StandardRetryConfig(Schedule.Immediate(3), onRetry = onRetry))(f) // then result shouldBe failedResult diff --git a/core/src/test/scala/ox/resilience/ScheduleFallingBackRetryTest.scala b/core/src/test/scala/ox/resilience/ScheduleFallingBackRetryTest.scala index e67cc7b0..9bb1ecca 100644 --- a/core/src/test/scala/ox/resilience/ScheduleFallingBackRetryTest.scala +++ b/core/src/test/scala/ox/resilience/ScheduleFallingBackRetryTest.scala @@ -24,7 +24,7 @@ class ScheduleFallingBackRetryTest extends AnyFlatSpec with Matchers with Elapse val schedule = Schedule.Immediate(immediateRetries).andThen(Schedule.Fixed(delayedRetries, sleep)) // when - val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(RetryConfig(schedule))(f)) + val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(StandardRetryConfig(schedule))(f)) // then result should have message "boom" @@ -45,7 +45,7 @@ class ScheduleFallingBackRetryTest extends AnyFlatSpec with Matchers with Elapse val schedule = Schedule.Immediate(100).andThen(Schedule.Fixed.forever(2.millis)) // when - val result = retry(RetryConfig(schedule))(f) + val result = retry(StandardRetryConfig(schedule))(f) // then result shouldBe successfulResult diff --git a/generated-doc/out/utils/repeat.md b/generated-doc/out/utils/repeat.md index 2ed4843e..0322d7b2 100644 --- a/generated-doc/out/utils/repeat.md +++ b/generated-doc/out/utils/repeat.md @@ -51,7 +51,7 @@ See [scheduled](scheduled.md) for details on how to create custom schedules. ```scala import ox.UnionMode import ox.scheduling.{Schedule, repeat, repeatEither, repeatWithErrorMode, RepeatConfig} -import ox.resilience.{retry, RetryConfig} +import ox.resilience.{retry, StandardRetryConfig} import scala.concurrent.duration.* def directOperation: Int = ??? @@ -75,6 +75,6 @@ repeatWithErrorMode(UnionMode[String])(RepeatConfig.fixedRate(3, 100.millis))(un // repeat with retry inside repeat(RepeatConfig.fixedRate(3, 100.millis)) { - retry(RetryConfig.backoff(3, 100.millis))(directOperation) + retry(StandardRetryConfig.backoff(3, 100.millis))(directOperation) } ``` diff --git a/generated-doc/out/utils/retries.md b/generated-doc/out/utils/retries.md index e22dcd0c..1d07e5c9 100644 --- a/generated-doc/out/utils/retries.md +++ b/generated-doc/out/utils/retries.md @@ -92,7 +92,7 @@ If you want to customize a part of the result policy, you can use the following ```scala import ox.UnionMode -import ox.resilience.{retry, retryEither, retryWithErrorMode, ResultPolicy, RetryConfig} +import ox.resilience.{retry, retryEither, retryWithErrorMode, ResultPolicy, StandardRetryConfig} import ox.scheduling.{Jitter, Schedule} import scala.concurrent.duration.* @@ -101,27 +101,27 @@ def eitherOperation: Either[String, Int] = ??? def unionOperation: String | Int = ??? // various operation definitions - same syntax -retry(RetryConfig.immediate(3))(directOperation) -retryEither(RetryConfig.immediate(3))(eitherOperation) +retry(StandardRetryConfig.immediate(3))(directOperation) +retryEither(StandardRetryConfig.immediate(3))(eitherOperation) // various configs with custom schedules and default ResultPolicy -retry(RetryConfig.delay(3, 100.millis))(directOperation) -retry(RetryConfig.backoff(3, 100.millis))(directOperation) // defaults: maxDelay = 1.minute, jitter = Jitter.None -retry(RetryConfig.backoff(3, 100.millis, 5.minutes, Jitter.Equal))(directOperation) +retry(StandardRetryConfig.delay(3, 100.millis))(directOperation) +retry(StandardRetryConfig.backoff(3, 100.millis))(directOperation) // defaults: maxDelay = 1.minute, jitter = Jitter.None +retry(StandardRetryConfig.backoff(3, 100.millis, 5.minutes, Jitter.Equal))(directOperation) // infinite retries with a default ResultPolicy -retry(RetryConfig.delayForever(100.millis))(directOperation) -retry(RetryConfig.backoffForever(100.millis, 5.minutes, Jitter.Full))(directOperation) +retry(StandardRetryConfig.delayForever(100.millis))(directOperation) +retry(StandardRetryConfig.backoffForever(100.millis, 5.minutes, Jitter.Full))(directOperation) // result policies // custom success -retry[Int](RetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)))(directOperation) +retry[Int](StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)))(directOperation) // fail fast on certain errors -retry(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != "fatal error")))(directOperation) -retryEither(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) +retry(StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != "fatal error")))(directOperation) +retryEither(StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) // custom error mode -retryWithErrorMode(UnionMode[String])(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(unionOperation) +retryWithErrorMode(UnionMode[String])(StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(unionOperation) ``` See the tests in `ox.resilience.*` for more. From 62059c08bf1b1047f3a14a3c53ab498a15fe8449 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Fri, 20 Dec 2024 11:43:53 +0100 Subject: [PATCH 02/15] adaptive as extension of ResultPolicy --- .../ox/resilience/AdaptiveRetryConfig.scala | 80 ----------- .../scala/ox/resilience/ResultPolicy.scala | 26 +++- .../scala/ox/resilience/RetryConfig.scala | 133 +++++++++++++++++- .../ox/resilience/StandardRetryConfig.scala | 128 ----------------- .../StartTimeRateLimiterAlgorithm.scala | 8 +- .../scala/ox/resilience/TokenBucket.scala | 7 +- .../ox/resilience/BackoffRetryTest.scala | 10 +- .../ox/resilience/DelayedRetryTest.scala | 6 +- .../ox/resilience/ImmediateRetryTest.scala | 22 +-- .../scala/ox/resilience/OnRetryTest.scala | 4 +- .../ScheduleFallingBackRetryTest.scala | 4 +- generated-doc/out/utils/repeat.md | 4 +- generated-doc/out/utils/retries.md | 24 ++-- 13 files changed, 201 insertions(+), 255 deletions(-) delete mode 100644 core/src/main/scala/ox/resilience/AdaptiveRetryConfig.scala delete mode 100644 core/src/main/scala/ox/resilience/StandardRetryConfig.scala diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetryConfig.scala b/core/src/main/scala/ox/resilience/AdaptiveRetryConfig.scala deleted file mode 100644 index 6ef9682b..00000000 --- a/core/src/main/scala/ox/resilience/AdaptiveRetryConfig.scala +++ /dev/null @@ -1,80 +0,0 @@ -package ox.resilience - -import ox.scheduling.{Schedule, ScheduledConfig, SleepMode} - -/** A config that defines how to retry a failed operation. - * - * It is a special case of [[ScheduledConfig]] with [[ScheduledConfig.sleepMode]] always set to [[SleepMode.Delay]]. It uses token bucket - * to determine if operation should be retried. Tokens are taken for every failure and returned on every successful operation, so in case - * of system failure client does not flood service with retry request. - * - * @param schedule - * The retry schedule which determines the maximum number of retries and the delay between subsequent attempts to execute the operation. - * See [[Schedule]] for more details. - * @param resultPolicy - * A policy that allows to customize when a non-erroneous result is considered successful and when an error is worth retrying (which - * allows for failing fast on certain errors). See [[ResultPolicy]] for more details. - * @param onRetry - * A function that is invoked after each retry attempt. The callback receives the number of the current retry attempt (starting from 1) - * and the result of the operation that was attempted. The result is either a successful value or an error. The callback can be used to - * log information about the retry attempts, or to perform other side effects. By default, the callback does nothing. - * @param tokenBucket - * Token bucket which backs up adaptive circuit breaker. If bucket is empty, there will be no more retries. Bucket can be provided by - * user and shared with different [[AdaptiveRetryConfig]] - * @param bucketSize - * Size of [[TokenBucket]]. Will be ignored if [[tokenBucket]] is provided. - * @param onFailureCost - * Cost of tokens for every failure. It is also number of token added to the bucket for successful operation. - * @tparam E - * The error type of the operation. For operations returning a `T` or a `Try[T]`, this is fixed to `Throwable`. For operations returning - * an `Either[E, T]`, this can be any `E`. - * @tparam T - * The successful result type for the operation. - */ -case class AdaptiveRetryConfig[E, T]( - schedule: Schedule, - resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T], - onRetry: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => (), - tokenBucket: Option[TokenBucket] = None, - bucketSize: Int = 100, - onFailureCost: Int = 1 -) extends RetryConfig[E, T]: - def toScheduledConfig: ScheduledConfig[E, T] = - val bucket = tokenBucket.getOrElse(TokenBucket(bucketSize)) - def shouldContinueOnError(e: E): Boolean = - // if we cannot acquire token we short circuit and stop retrying - bucket.tryAcquire(onFailureCost) && resultPolicy.isWorthRetrying(e) - - def shouldContinueOnResult(result: T): Boolean = - // if we consider this result as success token are given back to bucket - if resultPolicy.isSuccess(result) then - bucket.release(onFailureCost) - false - else true - - ScheduledConfig( - schedule, - onRetry, - shouldContinueOnError = shouldContinueOnError, - shouldContinueOnResult = shouldContinueOnResult, - sleepMode = SleepMode.Delay - ) - end toScheduledConfig -end AdaptiveRetryConfig - -object AdaptiveRetryConfig: - - /** Creates a config that retries up to a given number of times if there are enough token in the bucket, with no delay between subsequent - * attempts, using a default [[ResultPolicy]]. - * - * This is a shorthand for {{{AdaptiveRetryConfig(Schedule.Immediate(maxRetries))}}} - * - * @param maxRetries - * The maximum number of retries. - */ - def immediate[E, T](maxRetries: Int, bucketSize: Int = 100): RetryConfig[E, T] = - AdaptiveRetryConfig( - Schedule.Immediate(maxRetries), - bucketSize = bucketSize - ) -end AdaptiveRetryConfig diff --git a/core/src/main/scala/ox/resilience/ResultPolicy.scala b/core/src/main/scala/ox/resilience/ResultPolicy.scala index d230b735..a3aec0d9 100644 --- a/core/src/main/scala/ox/resilience/ResultPolicy.scala +++ b/core/src/main/scala/ox/resilience/ResultPolicy.scala @@ -14,7 +14,31 @@ package ox.resilience * @tparam T * The successful result type for the operation. */ -case class ResultPolicy[E, T](isSuccess: T => Boolean = (_: T) => true, isWorthRetrying: E => Boolean = (_: E) => true) +case class ResultPolicy[E, T](isSuccess: T => Boolean = (_: T) => true, isWorthRetrying: E => Boolean = (_: E) => true): + /** @param tokenBucket + * [[TokenBucket]] used by this policy. Can be shared by multiple policies. Default token size is 100. + * @param onFailureCost + * Cost of tokens for failure. Default is 1 + * @return + * New adaptive [[ResultPolicy]] backed by [[TokenBucket]]. For every retry we try to acquire [[onFailureCost]] tokens. If we succeed + * we continue, if not we short circuit and stop. For every successful attempt we release [[onFailureCost]] tokens. + */ + def adaptive(tokenBucket: TokenBucket = TokenBucket(100), onFailureCost: Int = 1): ResultPolicy[E, T] = + val onError: E => Boolean = (e: E) => + // if we cannot acquire token we short circuit and stop retrying + isWorthRetrying(e) && tokenBucket.tryAcquire(onFailureCost) + + val onSuccess: T => Boolean = (result: T) => + // if we consider this result as success token are given back to bucket + if isSuccess(result) then + tokenBucket.release(onFailureCost) + true + else false + end onSuccess + ResultPolicy(isSuccess = onSuccess, isWorthRetrying = onError) + end adaptive + +end ResultPolicy object ResultPolicy: /** A policy that considers every non-erroneous result successful and retries on any error. */ diff --git a/core/src/main/scala/ox/resilience/RetryConfig.scala b/core/src/main/scala/ox/resilience/RetryConfig.scala index c8b11330..93ca1142 100644 --- a/core/src/main/scala/ox/resilience/RetryConfig.scala +++ b/core/src/main/scala/ox/resilience/RetryConfig.scala @@ -1,6 +1,133 @@ package ox.resilience -import ox.scheduling.ScheduledConfig +import ox.scheduling.{Jitter, Schedule, ScheduledConfig, SleepMode} +import scala.concurrent.duration.* -trait RetryConfig[E, T]: - def toScheduledConfig: ScheduledConfig[E, T] +/** A config that defines how to retry a failed operation. + * + * It is a special case of [[ScheduledConfig]] with [[ScheduledConfig.sleepMode]] always set to [[SleepMode.Delay]] + * + * @param schedule + * The retry schedule which determines the maximum number of retries and the delay between subsequent attempts to execute the operation. + * See [[Schedule]] for more details. + * @param resultPolicy + * A policy that allows to customize when a non-erroneous result is considered successful and when an error is worth retrying (which + * allows for failing fast on certain errors). See [[ResultPolicy]] for more details. + * @param onRetry + * A function that is invoked after each retry attempt. The callback receives the number of the current retry attempt (starting from 1) + * and the result of the operation that was attempted. The result is either a successful value or an error. The callback can be used to + * log information about the retry attempts, or to perform other side effects. By default, the callback does nothing. + * @tparam E + * The error type of the operation. For operations returning a `T` or a `Try[T]`, this is fixed to `Throwable`. For operations returning + * an `Either[E, T]`, this can be any `E`. + * @tparam T + * The successful result type for the operation. + */ +case class RetryConfig[E, T]( + schedule: Schedule, + resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T], + onRetry: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => () +): + def toScheduledConfig: ScheduledConfig[E, T] = ScheduledConfig( + schedule, + onRetry, + shouldContinueOnError = resultPolicy.isWorthRetrying, + shouldContinueOnResult = t => !resultPolicy.isSuccess(t), + sleepMode = SleepMode.Delay + ) + + def adaptive(tokenBucket: TokenBucket, onFailureCost: Int): RetryConfig[E, T] = + copy( + resultPolicy = resultPolicy.adaptive(tokenBucket: TokenBucket, onFailureCost: Int) + ) + + end adaptive +end RetryConfig + +object RetryConfig: + /** Creates a config that retries up to a given number of times, with no delay between subsequent attempts, using a default + * [[ResultPolicy]]. + * + * This is a shorthand for {{{RetryConfig(Schedule.Immediate(maxRetries))}}} + * + * @param maxRetries + * The maximum number of retries. + */ + def immediate[E, T](maxRetries: Int): RetryConfig[E, T] = RetryConfig(Schedule.Immediate(maxRetries)) + + /** Creates a config that retries indefinitely, with no delay between subsequent attempts, using a default [[ResultPolicy]]. + * + * This is a shorthand for {{{RetryConfig(Schedule.Immediate.forever)}}} + */ + def immediateForever[E, T]: RetryConfig[E, T] = RetryConfig(Schedule.Immediate.forever) + + /** Creates a config that retries up to a given number of times, with a fixed delay between subsequent attempts, using a default + * [[ResultPolicy]]. + * + * This is a shorthand for {{{RetryConfig(Schedule.Delay(maxRetries, delay))}}} + * + * @param maxRetries + * The maximum number of retries. + * @param delay + * The delay between subsequent attempts. + */ + def delay[E, T](maxRetries: Int, delay: FiniteDuration): RetryConfig[E, T] = RetryConfig(Schedule.Fixed(maxRetries, delay)) + + /** Creates a config that retries indefinitely, with a fixed delay between subsequent attempts, using a default [[ResultPolicy]]. + * + * This is a shorthand for {{{RetryConfig(Schedule.Delay.forever(delay))}}} + * + * @param delay + * The delay between subsequent attempts. + */ + def delayForever[E, T](delay: FiniteDuration): RetryConfig[E, T] = RetryConfig(Schedule.Fixed.forever(delay)) + + /** Creates a config that retries up to a given number of times, with an increasing delay (backoff) between subsequent attempts, using a + * default [[ResultPolicy]]. + * + * The backoff is exponential with base 2 (i.e. the next delay is twice as long as the previous one), starting at the given initial delay + * and capped at the given maximum delay. + * + * This is a shorthand for {{{RetryConfig(Schedule.Backoff(maxRetries, initialDelay, maxDelay, jitter))}}} + * + * @param maxRetries + * The maximum number of retries. + * @param initialDelay + * The delay before the first retry. + * @param maxDelay + * The maximum delay between subsequent retries. Defaults to 1 minute. + * @param jitter + * A random factor used for calculating the delay between subsequent retries. See [[Jitter]] for more details. Defaults to no jitter, + * i.e. an exponential backoff with no adjustments. + */ + def backoff[E, T]( + maxRetries: Int, + initialDelay: FiniteDuration, + maxDelay: FiniteDuration = 1.minute, + jitter: Jitter = Jitter.None + ): RetryConfig[E, T] = + RetryConfig(Schedule.Backoff(maxRetries, initialDelay, maxDelay, jitter)) + + /** Creates a config that retries indefinitely, with an increasing delay (backoff) between subsequent attempts, using a default + * [[ResultPolicy]]. + * + * The backoff is exponential with base 2 (i.e. the next delay is twice as long as the previous one), starting at the given initial delay + * and capped at the given maximum delay. + * + * This is a shorthand for {{{RetryConfig(Schedule.Backoff.forever(initialDelay, maxDelay, jitter))}}} + * + * @param initialDelay + * The delay before the first retry. + * @param maxDelay + * The maximum delay between subsequent retries. Defaults to 1 minute. + * @param jitter + * A random factor used for calculating the delay between subsequent retries. See [[Jitter]] for more details. Defaults to no jitter, + * i.e. an exponential backoff with no adjustments. + */ + def backoffForever[E, T]( + initialDelay: FiniteDuration, + maxDelay: FiniteDuration = 1.minute, + jitter: Jitter = Jitter.None + ): RetryConfig[E, T] = + RetryConfig(Schedule.Backoff.forever(initialDelay, maxDelay, jitter)) +end RetryConfig diff --git a/core/src/main/scala/ox/resilience/StandardRetryConfig.scala b/core/src/main/scala/ox/resilience/StandardRetryConfig.scala deleted file mode 100644 index 2b5322c1..00000000 --- a/core/src/main/scala/ox/resilience/StandardRetryConfig.scala +++ /dev/null @@ -1,128 +0,0 @@ -package ox.resilience - -import ox.scheduling.{Jitter, Schedule, ScheduledConfig, SleepMode} -import scala.concurrent.duration.* - -/** A config that defines how to retry a failed operation. - * - * It is a special case of [[ScheduledConfig]] with [[ScheduledConfig.sleepMode]] always set to [[SleepMode.Delay]] - * - * @param schedule - * The retry schedule which determines the maximum number of retries and the delay between subsequent attempts to execute the operation. - * See [[Schedule]] for more details. - * @param resultPolicy - * A policy that allows to customize when a non-erroneous result is considered successful and when an error is worth retrying (which - * allows for failing fast on certain errors). See [[ResultPolicy]] for more details. - * @param onRetry - * A function that is invoked after each retry attempt. The callback receives the number of the current retry attempt (starting from 1) - * and the result of the operation that was attempted. The result is either a successful value or an error. The callback can be used to - * log information about the retry attempts, or to perform other side effects. By default, the callback does nothing. - * @tparam E - * The error type of the operation. For operations returning a `T` or a `Try[T]`, this is fixed to `Throwable`. For operations returning - * an `Either[E, T]`, this can be any `E`. - * @tparam T - * The successful result type for the operation. - */ -case class StandardRetryConfig[E, T]( - schedule: Schedule, - resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T], - onRetry: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => () -) extends RetryConfig[E, T]: - def toScheduledConfig: ScheduledConfig[E, T] = ScheduledConfig( - schedule, - onRetry, - shouldContinueOnError = resultPolicy.isWorthRetrying, - shouldContinueOnResult = t => !resultPolicy.isSuccess(t), - sleepMode = SleepMode.Delay - ) -end StandardRetryConfig - -object StandardRetryConfig: - /** Creates a config that retries up to a given number of times, with no delay between subsequent attempts, using a default - * [[ResultPolicy]]. - * - * This is a shorthand for {{{RetryConfig(Schedule.Immediate(maxRetries))}}} - * - * @param maxRetries - * The maximum number of retries. - */ - def immediate[E, T](maxRetries: Int): StandardRetryConfig[E, T] = StandardRetryConfig(Schedule.Immediate(maxRetries)) - - /** Creates a config that retries indefinitely, with no delay between subsequent attempts, using a default [[ResultPolicy]]. - * - * This is a shorthand for {{{RetryConfig(Schedule.Immediate.forever)}}} - */ - def immediateForever[E, T]: StandardRetryConfig[E, T] = StandardRetryConfig(Schedule.Immediate.forever) - - /** Creates a config that retries up to a given number of times, with a fixed delay between subsequent attempts, using a default - * [[ResultPolicy]]. - * - * This is a shorthand for {{{RetryConfig(Schedule.Delay(maxRetries, delay))}}} - * - * @param maxRetries - * The maximum number of retries. - * @param delay - * The delay between subsequent attempts. - */ - def delay[E, T](maxRetries: Int, delay: FiniteDuration): StandardRetryConfig[E, T] = StandardRetryConfig( - Schedule.Fixed(maxRetries, delay) - ) - - /** Creates a config that retries indefinitely, with a fixed delay between subsequent attempts, using a default [[ResultPolicy]]. - * - * This is a shorthand for {{{RetryConfig(Schedule.Delay.forever(delay))}}} - * - * @param delay - * The delay between subsequent attempts. - */ - def delayForever[E, T](delay: FiniteDuration): StandardRetryConfig[E, T] = StandardRetryConfig(Schedule.Fixed.forever(delay)) - - /** Creates a config that retries up to a given number of times, with an increasing delay (backoff) between subsequent attempts, using a - * default [[ResultPolicy]]. - * - * The backoff is exponential with base 2 (i.e. the next delay is twice as long as the previous one), starting at the given initial delay - * and capped at the given maximum delay. - * - * This is a shorthand for {{{RetryConfig(Schedule.Backoff(maxRetries, initialDelay, maxDelay, jitter))}}} - * - * @param maxRetries - * The maximum number of retries. - * @param initialDelay - * The delay before the first retry. - * @param maxDelay - * The maximum delay between subsequent retries. Defaults to 1 minute. - * @param jitter - * A random factor used for calculating the delay between subsequent retries. See [[Jitter]] for more details. Defaults to no jitter, - * i.e. an exponential backoff with no adjustments. - */ - def backoff[E, T]( - maxRetries: Int, - initialDelay: FiniteDuration, - maxDelay: FiniteDuration = 1.minute, - jitter: Jitter = Jitter.None - ): StandardRetryConfig[E, T] = - StandardRetryConfig(Schedule.Backoff(maxRetries, initialDelay, maxDelay, jitter)) - - /** Creates a config that retries indefinitely, with an increasing delay (backoff) between subsequent attempts, using a default - * [[ResultPolicy]]. - * - * The backoff is exponential with base 2 (i.e. the next delay is twice as long as the previous one), starting at the given initial delay - * and capped at the given maximum delay. - * - * This is a shorthand for {{{RetryConfig(Schedule.Backoff.forever(initialDelay, maxDelay, jitter))}}} - * - * @param initialDelay - * The delay before the first retry. - * @param maxDelay - * The maximum delay between subsequent retries. Defaults to 1 minute. - * @param jitter - * A random factor used for calculating the delay between subsequent retries. See [[Jitter]] for more details. Defaults to no jitter, - * i.e. an exponential backoff with no adjustments. - */ - def backoffForever[E, T]( - initialDelay: FiniteDuration, - maxDelay: FiniteDuration = 1.minute, - jitter: Jitter = Jitter.None - ): StandardRetryConfig[E, T] = - StandardRetryConfig(Schedule.Backoff.forever(initialDelay, maxDelay, jitter)) -end StandardRetryConfig diff --git a/core/src/main/scala/ox/resilience/StartTimeRateLimiterAlgorithm.scala b/core/src/main/scala/ox/resilience/StartTimeRateLimiterAlgorithm.scala index c979c740..ed9cec17 100644 --- a/core/src/main/scala/ox/resilience/StartTimeRateLimiterAlgorithm.scala +++ b/core/src/main/scala/ox/resilience/StartTimeRateLimiterAlgorithm.scala @@ -102,13 +102,13 @@ object StartTimeRateLimiterAlgorithm: case class LeakyBucket(rate: Int, per: FiniteDuration) extends RateLimiterAlgorithm: private val refillInterval = per.toNanos private val lastRefillTime = new AtomicLong(System.nanoTime()) - private val semaphore = new Semaphore(1) + private val bucket = TokenBucket(rate, Some(1)) def acquire(permits: Int): Unit = - semaphore.acquire(permits) + bucket.acquire(permits) def tryAcquire(permits: Int): Boolean = - semaphore.tryAcquire(permits) + bucket.tryAcquire(permits) def getNextUpdate: Long = val waitTime = lastRefillTime.get() + refillInterval - System.nanoTime() @@ -117,7 +117,7 @@ object StartTimeRateLimiterAlgorithm: def update(): Unit = val now = System.nanoTime() lastRefillTime.set(now) - if semaphore.availablePermits() < rate then semaphore.release() + bucket.release(1) def runOperation[T](operation: => T, permits: Int): T = operation diff --git a/core/src/main/scala/ox/resilience/TokenBucket.scala b/core/src/main/scala/ox/resilience/TokenBucket.scala index 8d334f73..324f5a0e 100644 --- a/core/src/main/scala/ox/resilience/TokenBucket.scala +++ b/core/src/main/scala/ox/resilience/TokenBucket.scala @@ -2,12 +2,15 @@ package ox.resilience import java.util.concurrent.Semaphore -case class TokenBucket(bucketSize: Int): - private val semaphore = Semaphore(bucketSize) +case class TokenBucket(bucketSize: Int, initSize: Option[Int] = None): + private val semaphore = Semaphore(initSize.getOrElse(bucketSize)) def tryAcquire(permits: Int): Boolean = semaphore.tryAcquire(permits) + def acquire(permits: Int): Unit = + semaphore.acquire(permits) + def release(permits: Int): Unit = val availablePermits = semaphore.availablePermits() val toRelease = if availablePermits + permits >= bucketSize then bucketSize - availablePermits else permits diff --git a/core/src/test/scala/ox/resilience/BackoffRetryTest.scala b/core/src/test/scala/ox/resilience/BackoffRetryTest.scala index 8310083c..9c379311 100644 --- a/core/src/test/scala/ox/resilience/BackoffRetryTest.scala +++ b/core/src/test/scala/ox/resilience/BackoffRetryTest.scala @@ -23,7 +23,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with if true then throw new RuntimeException("boom") // when - val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(StandardRetryConfig.backoff(maxRetries, initialDelay))(f)) + val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(RetryConfig.backoff(maxRetries, initialDelay))(f)) // then result should have message "boom" @@ -43,7 +43,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with if counter <= retriesUntilSuccess then throw new RuntimeException("boom") else successfulResult // when - val result = retry(StandardRetryConfig.backoffForever(initialDelay, maxDelay = 2.millis))(f) + val result = retry(RetryConfig.backoffForever(initialDelay, maxDelay = 2.millis))(f) // then result shouldBe successfulResult @@ -62,7 +62,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with // when val (result, elapsedTime) = - measure(the[RuntimeException] thrownBy retry(StandardRetryConfig.backoff(maxRetries, initialDelay, maxDelay))(f)) + measure(the[RuntimeException] thrownBy retry(RetryConfig.backoff(maxRetries, initialDelay, maxDelay))(f)) // then result should have message "boom" @@ -83,7 +83,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with // when val (result, elapsedTime) = - measure(the[RuntimeException] thrownBy retry(StandardRetryConfig.backoff(maxRetries, initialDelay, maxDelay, Jitter.Equal))(f)) + measure(the[RuntimeException] thrownBy retry(RetryConfig.backoff(maxRetries, initialDelay, maxDelay, Jitter.Equal))(f)) // then result should have message "boom" @@ -104,7 +104,7 @@ class BackoffRetryTest extends AnyFlatSpec with Matchers with EitherValues with Left(errorMessage) // when - val (result, elapsedTime) = measure(retryEither(StandardRetryConfig.backoff(maxRetries, initialDelay))(f)) + val (result, elapsedTime) = measure(retryEither(RetryConfig.backoff(maxRetries, initialDelay))(f)) // then result.left.value shouldBe errorMessage diff --git a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala index 18273073..1b41257a 100644 --- a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala +++ b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala @@ -22,7 +22,7 @@ class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with if true then throw new RuntimeException("boom") // when - val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(StandardRetryConfig.delay(maxRetries, sleep))(f)) + val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(RetryConfig.delay(maxRetries, sleep))(f)) // then result should have message "boom" @@ -42,7 +42,7 @@ class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with if counter <= retriesUntilSuccess then throw new RuntimeException("boom") else successfulResult // when - val result = retry(StandardRetryConfig.delayForever(sleep))(f) + val result = retry(RetryConfig.delayForever(sleep))(f) // then result shouldBe successfulResult @@ -61,7 +61,7 @@ class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with Left(errorMessage) // when - val (result, elapsedTime) = measure(retryEither(StandardRetryConfig.delay(maxRetries, sleep))(f)) + val (result, elapsedTime) = measure(retryEither(RetryConfig.delay(maxRetries, sleep))(f)) // then result.left.value shouldBe errorMessage diff --git a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala index ded1b60a..f5ff5944 100644 --- a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala +++ b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala @@ -20,7 +20,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi successfulResult // when - val result = retry(StandardRetryConfig.immediate(3))(f) + val result = retry(RetryConfig.immediate(3))(f) // then result shouldBe successfulResult @@ -31,7 +31,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi // given var counter = 0 val errorMessage = "boom" - val policy = StandardRetryConfig[Throwable, Unit](Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != errorMessage)) + val policy = RetryConfig[Throwable, Unit](Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != errorMessage)) def f = counter += 1 @@ -46,7 +46,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi // given var counter = 0 val unsuccessfulResult = -1 - val policy = StandardRetryConfig[Throwable, Int](Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)) + val policy = RetryConfig[Throwable, Int](Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)) def f = counter += 1 @@ -70,7 +70,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi if true then throw new RuntimeException(errorMessage) // when/then - the[RuntimeException] thrownBy retry(StandardRetryConfig.immediate(3))(f) should have message errorMessage + the[RuntimeException] thrownBy retry(RetryConfig.immediate(3))(f) should have message errorMessage counter shouldBe 4 } @@ -85,7 +85,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi if counter <= retriesUntilSuccess then throw new RuntimeException("boom") else successfulResult // when - val result = retry(StandardRetryConfig.immediateForever)(f) + val result = retry(RetryConfig.immediateForever)(f) // then result shouldBe successfulResult @@ -102,7 +102,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi Right(successfulResult) // when - val result = retryEither(StandardRetryConfig.immediate(3))(f) + val result = retryEither(RetryConfig.immediate(3))(f) // then result.value shouldBe successfulResult @@ -113,7 +113,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi // given var counter = 0 val errorMessage = "boom" - val policy: StandardRetryConfig[String, Int] = StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != errorMessage)) + val policy: RetryConfig[String, Int] = RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != errorMessage)) def f: Either[String, Int] = counter += 1 @@ -131,7 +131,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi // given var counter = 0 val unsuccessfulResult = -1 - val policy: StandardRetryConfig[String, Int] = StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)) + val policy: RetryConfig[String, Int] = RetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)) def f: Either[String, Int] = counter += 1 @@ -155,7 +155,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi Left(errorMessage) // when - val result = retryEither(StandardRetryConfig.immediate(3))(f) + val result = retryEither(RetryConfig.immediate(3))(f) // then result.left.value shouldBe errorMessage @@ -173,7 +173,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi else Right("Success") // when - val result = retryEither(AdaptiveRetryConfig.immediate(5, 3))(f) + val result = retryEither(RetryConfig.immediate(5).adaptive(TokenBucket(5), 1))(f) // then result.value shouldBe "Success" @@ -190,7 +190,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi Left(errorMessage) // when - val result = retryEither(AdaptiveRetryConfig.immediate(2, 5))(f) + val result = retryEither(RetryConfig.immediate(5).adaptive(TokenBucket(2), 1))(f) // then result.left.value shouldBe errorMessage diff --git a/core/src/test/scala/ox/resilience/OnRetryTest.scala b/core/src/test/scala/ox/resilience/OnRetryTest.scala index 31a2a99b..900a60fe 100644 --- a/core/src/test/scala/ox/resilience/OnRetryTest.scala +++ b/core/src/test/scala/ox/resilience/OnRetryTest.scala @@ -26,7 +26,7 @@ class OnRetryTest extends AnyFlatSpec with Matchers with EitherValues with TryVa returnedResult = result // when - val result = retry(StandardRetryConfig(Schedule.Immediate(3), onRetry = onRetry))(f) + val result = retry(RetryConfig(Schedule.Immediate(3), onRetry = onRetry))(f) // then result shouldBe successfulResult @@ -53,7 +53,7 @@ class OnRetryTest extends AnyFlatSpec with Matchers with EitherValues with TryVa returnedResult = result // when - val result = the[RuntimeException] thrownBy retry(StandardRetryConfig(Schedule.Immediate(3), onRetry = onRetry))(f) + val result = the[RuntimeException] thrownBy retry(RetryConfig(Schedule.Immediate(3), onRetry = onRetry))(f) // then result shouldBe failedResult diff --git a/core/src/test/scala/ox/resilience/ScheduleFallingBackRetryTest.scala b/core/src/test/scala/ox/resilience/ScheduleFallingBackRetryTest.scala index 9bb1ecca..e67cc7b0 100644 --- a/core/src/test/scala/ox/resilience/ScheduleFallingBackRetryTest.scala +++ b/core/src/test/scala/ox/resilience/ScheduleFallingBackRetryTest.scala @@ -24,7 +24,7 @@ class ScheduleFallingBackRetryTest extends AnyFlatSpec with Matchers with Elapse val schedule = Schedule.Immediate(immediateRetries).andThen(Schedule.Fixed(delayedRetries, sleep)) // when - val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(StandardRetryConfig(schedule))(f)) + val (result, elapsedTime) = measure(the[RuntimeException] thrownBy retry(RetryConfig(schedule))(f)) // then result should have message "boom" @@ -45,7 +45,7 @@ class ScheduleFallingBackRetryTest extends AnyFlatSpec with Matchers with Elapse val schedule = Schedule.Immediate(100).andThen(Schedule.Fixed.forever(2.millis)) // when - val result = retry(StandardRetryConfig(schedule))(f) + val result = retry(RetryConfig(schedule))(f) // then result shouldBe successfulResult diff --git a/generated-doc/out/utils/repeat.md b/generated-doc/out/utils/repeat.md index 0322d7b2..2ed4843e 100644 --- a/generated-doc/out/utils/repeat.md +++ b/generated-doc/out/utils/repeat.md @@ -51,7 +51,7 @@ See [scheduled](scheduled.md) for details on how to create custom schedules. ```scala import ox.UnionMode import ox.scheduling.{Schedule, repeat, repeatEither, repeatWithErrorMode, RepeatConfig} -import ox.resilience.{retry, StandardRetryConfig} +import ox.resilience.{retry, RetryConfig} import scala.concurrent.duration.* def directOperation: Int = ??? @@ -75,6 +75,6 @@ repeatWithErrorMode(UnionMode[String])(RepeatConfig.fixedRate(3, 100.millis))(un // repeat with retry inside repeat(RepeatConfig.fixedRate(3, 100.millis)) { - retry(StandardRetryConfig.backoff(3, 100.millis))(directOperation) + retry(RetryConfig.backoff(3, 100.millis))(directOperation) } ``` diff --git a/generated-doc/out/utils/retries.md b/generated-doc/out/utils/retries.md index 1d07e5c9..e22dcd0c 100644 --- a/generated-doc/out/utils/retries.md +++ b/generated-doc/out/utils/retries.md @@ -92,7 +92,7 @@ If you want to customize a part of the result policy, you can use the following ```scala import ox.UnionMode -import ox.resilience.{retry, retryEither, retryWithErrorMode, ResultPolicy, StandardRetryConfig} +import ox.resilience.{retry, retryEither, retryWithErrorMode, ResultPolicy, RetryConfig} import ox.scheduling.{Jitter, Schedule} import scala.concurrent.duration.* @@ -101,27 +101,27 @@ def eitherOperation: Either[String, Int] = ??? def unionOperation: String | Int = ??? // various operation definitions - same syntax -retry(StandardRetryConfig.immediate(3))(directOperation) -retryEither(StandardRetryConfig.immediate(3))(eitherOperation) +retry(RetryConfig.immediate(3))(directOperation) +retryEither(RetryConfig.immediate(3))(eitherOperation) // various configs with custom schedules and default ResultPolicy -retry(StandardRetryConfig.delay(3, 100.millis))(directOperation) -retry(StandardRetryConfig.backoff(3, 100.millis))(directOperation) // defaults: maxDelay = 1.minute, jitter = Jitter.None -retry(StandardRetryConfig.backoff(3, 100.millis, 5.minutes, Jitter.Equal))(directOperation) +retry(RetryConfig.delay(3, 100.millis))(directOperation) +retry(RetryConfig.backoff(3, 100.millis))(directOperation) // defaults: maxDelay = 1.minute, jitter = Jitter.None +retry(RetryConfig.backoff(3, 100.millis, 5.minutes, Jitter.Equal))(directOperation) // infinite retries with a default ResultPolicy -retry(StandardRetryConfig.delayForever(100.millis))(directOperation) -retry(StandardRetryConfig.backoffForever(100.millis, 5.minutes, Jitter.Full))(directOperation) +retry(RetryConfig.delayForever(100.millis))(directOperation) +retry(RetryConfig.backoffForever(100.millis, 5.minutes, Jitter.Full))(directOperation) // result policies // custom success -retry[Int](StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)))(directOperation) +retry[Int](RetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)))(directOperation) // fail fast on certain errors -retry(StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != "fatal error")))(directOperation) -retryEither(StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) +retry(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != "fatal error")))(directOperation) +retryEither(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) // custom error mode -retryWithErrorMode(UnionMode[String])(StandardRetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(unionOperation) +retryWithErrorMode(UnionMode[String])(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(unionOperation) ``` See the tests in `ox.resilience.*` for more. From 0032a968ecc657b7c80290aaeea828ffe8eaf840 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Fri, 20 Dec 2024 15:56:52 +0100 Subject: [PATCH 03/15] AdaptiveRetry class --- .../scala/ox/resilience/AdaptiveRetry.scala | 50 +++++++++++++++++++ .../scala/ox/resilience/ResultPolicy.scala | 26 +--------- .../scala/ox/resilience/RetryConfig.scala | 7 --- core/src/main/scala/ox/resilience/retry.scala | 12 ++--- .../ox/resilience/ImmediateRetryTest.scala | 6 ++- 5 files changed, 61 insertions(+), 40 deletions(-) create mode 100644 core/src/main/scala/ox/resilience/AdaptiveRetry.scala diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala new file mode 100644 index 00000000..9cf86294 --- /dev/null +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -0,0 +1,50 @@ +package ox.resilience + +import ox.{EitherMode, ErrorMode} +import ox.scheduling.scheduledWithErrorMode + +import scala.util.Try + +case class AdaptiveRetry( + tokenBucket: TokenBucket +): + def apply[E, T, F[_]]( + config: RetryConfig[E, T], + errorMode: ErrorMode[E, F], + failureCost: E => Int, + successReward: T => Int, + isFailure: E => Boolean + )(operation: => F[T]): F[T] = + val isWorthRetrying: E => Boolean = (error: E) => + // if we cannot acquire token we short circuit and stop retrying + val isWorth = config.resultPolicy.isWorthRetrying(error) + if isWorth && isFailure(error) then tokenBucket.tryAcquire(failureCost(error)) + else isWorth + + val isSuccess: T => Boolean = (result: T) => + // if we consider this result as success token are given back to bucket + if config.resultPolicy.isSuccess(result) then + tokenBucket.release(successReward(result)) + true + else false + end isSuccess + + val resultPolicy = ResultPolicy(isSuccess, isWorthRetrying) + scheduledWithErrorMode(errorMode)(config.copy(resultPolicy = resultPolicy).toScheduledConfig)(operation) + end apply + + def apply[E, T]( + config: RetryConfig[E, T], + failureCost: E => Int = (_: E) => 1, + successReward: T => Int = (_: T) => 1, + isFailure: E => Boolean = (_: E) => true + )(operation: => Either[E, T]): Either[E, T] = + apply(config, EitherMode[E], failureCost, successReward, isFailure)(operation) + + def apply[T]( + config: RetryConfig[Throwable, T] + )(operation: => T): T = + apply(config, EitherMode[Throwable], (_: Throwable) => 1, (_: T) => 1, (_: Throwable) => true)(Try(operation).toEither) + .fold(throw _, identity) + +end AdaptiveRetry diff --git a/core/src/main/scala/ox/resilience/ResultPolicy.scala b/core/src/main/scala/ox/resilience/ResultPolicy.scala index a3aec0d9..d230b735 100644 --- a/core/src/main/scala/ox/resilience/ResultPolicy.scala +++ b/core/src/main/scala/ox/resilience/ResultPolicy.scala @@ -14,31 +14,7 @@ package ox.resilience * @tparam T * The successful result type for the operation. */ -case class ResultPolicy[E, T](isSuccess: T => Boolean = (_: T) => true, isWorthRetrying: E => Boolean = (_: E) => true): - /** @param tokenBucket - * [[TokenBucket]] used by this policy. Can be shared by multiple policies. Default token size is 100. - * @param onFailureCost - * Cost of tokens for failure. Default is 1 - * @return - * New adaptive [[ResultPolicy]] backed by [[TokenBucket]]. For every retry we try to acquire [[onFailureCost]] tokens. If we succeed - * we continue, if not we short circuit and stop. For every successful attempt we release [[onFailureCost]] tokens. - */ - def adaptive(tokenBucket: TokenBucket = TokenBucket(100), onFailureCost: Int = 1): ResultPolicy[E, T] = - val onError: E => Boolean = (e: E) => - // if we cannot acquire token we short circuit and stop retrying - isWorthRetrying(e) && tokenBucket.tryAcquire(onFailureCost) - - val onSuccess: T => Boolean = (result: T) => - // if we consider this result as success token are given back to bucket - if isSuccess(result) then - tokenBucket.release(onFailureCost) - true - else false - end onSuccess - ResultPolicy(isSuccess = onSuccess, isWorthRetrying = onError) - end adaptive - -end ResultPolicy +case class ResultPolicy[E, T](isSuccess: T => Boolean = (_: T) => true, isWorthRetrying: E => Boolean = (_: E) => true) object ResultPolicy: /** A policy that considers every non-erroneous result successful and retries on any error. */ diff --git a/core/src/main/scala/ox/resilience/RetryConfig.scala b/core/src/main/scala/ox/resilience/RetryConfig.scala index 93ca1142..ef89408e 100644 --- a/core/src/main/scala/ox/resilience/RetryConfig.scala +++ b/core/src/main/scala/ox/resilience/RetryConfig.scala @@ -35,13 +35,6 @@ case class RetryConfig[E, T]( shouldContinueOnResult = t => !resultPolicy.isSuccess(t), sleepMode = SleepMode.Delay ) - - def adaptive(tokenBucket: TokenBucket, onFailureCost: Int): RetryConfig[E, T] = - copy( - resultPolicy = resultPolicy.adaptive(tokenBucket: TokenBucket, onFailureCost: Int) - ) - - end adaptive end RetryConfig object RetryConfig: diff --git a/core/src/main/scala/ox/resilience/retry.scala b/core/src/main/scala/ox/resilience/retry.scala index b93a7ff3..cb0c190e 100644 --- a/core/src/main/scala/ox/resilience/retry.scala +++ b/core/src/main/scala/ox/resilience/retry.scala @@ -7,10 +7,10 @@ import scala.util.Try /** Retries an operation returning a direct result until it succeeds or the config decides to stop. * - * [[retry]] is a special case of [[scheduled]] with a given set of defaults. See implementations of[[RetryConfig]] for more details. + * [[retry]] is a special case of [[scheduled]] with a given set of defaults. See [[RetryConfig]]. * * @param config - * The retry config - see implementations of [[RetryConfig]]. + * The retry config - see [[RetryConfig]]. * @param operation * The operation to retry. * @return @@ -30,7 +30,7 @@ def retry[T](config: RetryConfig[Throwable, T])(operation: => T): T = * details. * * @param config - * The retry config - see implementations of [[RetryConfig]]. + * The retry config - see [[RetryConfig]]. * @param operation * The operation to retry. * @return @@ -44,13 +44,13 @@ def retryEither[E, T](config: RetryConfig[E, T])(operation: => Either[E, T]): Ei /** Retries an operation using the given error mode until it succeeds or the config decides to stop. Note that any exceptions thrown by the * operation aren't caught (unless the operation catches them as part of its implementation) and don't cause a retry to happen. * - * [[retryWithErrorMode]] is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See implementations of - * [[RetryConfig]] for more details. + * [[retryWithErrorMode]] is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See [[RetryConfig]] for more + * details. * * @param em * The error mode to use, which specifies when a result value is considered success, and when a failure. * @param config - * The retry config - See implementations of [[RetryConfig]]. + * The retry config - See [[RetryConfig]]. * @param operation * The operation to retry. * @return diff --git a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala index f5ff5944..5d0b108e 100644 --- a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala +++ b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala @@ -172,8 +172,9 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi if counter <= 2 then Left(errorMessage) else Right("Success") + val adaptive = AdaptiveRetry(TokenBucket(5)) // when - val result = retryEither(RetryConfig.immediate(5).adaptive(TokenBucket(5), 1))(f) + val result = adaptive[String, String](RetryConfig.immediate(5))(f) // then result.value shouldBe "Success" @@ -189,8 +190,9 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi counter += 1 Left(errorMessage) + val adaptive = AdaptiveRetry(TokenBucket(2)) // when - val result = retryEither(RetryConfig.immediate(5).adaptive(TokenBucket(2), 1))(f) + val result = adaptive(RetryConfig.immediate[String, String](5))(f) // then result.left.value shouldBe errorMessage From e2616e62144cad1c4b7f4dd2edda6823769f27bc Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Mon, 23 Dec 2024 14:58:09 +0100 Subject: [PATCH 04/15] AdaptiveRetry refinement, docs --- .../scala/ox/resilience/AdaptiveRetry.scala | 109 ++++++++++++++---- core/src/main/scala/ox/resilience/retry.scala | 3 +- .../ox/resilience/DelayedRetryTest.scala | 26 +++++ .../ox/resilience/ImmediateRetryTest.scala | 26 ++++- doc/utils/retries.md | 54 +++++++++ 5 files changed, 193 insertions(+), 25 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index 9cf86294..3ef56963 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -5,26 +5,64 @@ import ox.scheduling.scheduledWithErrorMode import scala.util.Try +/** Provides mechanism of "adaptive" retries. For every retry we take [[failureCost]] from token bucket and for every success we add back to + * the bucket [[successReward]] tokens. One instance can be "shared" across multiple operations against constrained resource. This allows + * to retry in case of transient failures and at the same time doesn't produce more load on systemic failure of a resource. + * + * @param tokenBucket + * instance of [[TokenBucket]]. Provided instance is thread safe and can be "shared" between different instances of [[AdaptiveRetry]] + * with different [[failureCost]] for example. + * @param failureCost + * Number of tokens to take from [[tokenBucket]] when retrying. + * @param successReward + * Number of tokens to add back to [[tokenBucket]] after successful operation. + */ case class AdaptiveRetry( - tokenBucket: TokenBucket + tokenBucket: TokenBucket, + failureCost: Int = 1, + successReward: Int = 1 ): + /** Retries an operation using the given error mode until it succeeds or the config decides to stop. Note that any exceptions thrown by + * the operation aren't caught (unless the operation catches them as part of its implementation) and don't cause a retry to happen. + * + * This is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See [[RetryConfig]] for more details. + * + * @param config + * The retry config - See [[RetryConfig]]. + * @param isFailure + * Function to decide if returned [[E]] should be considered failure. + * @param errorMode + * The error mode to use, which specifies when a result value is considered success, and when a failure. + * @param operation + * The operation to retry. + * @tparam E + * type of error. + * @tparam T + * type of result of an operation. + * @tparam F + * the context inside which [[E]] or [[T]] are returned. + * @return + * Either: + * - the result of the function if it eventually succeeds, in the context of `F`, as dictated by the error mode. + * - the error `E` in context `F` as returned by the last attempt if the config decides to stop. + * @see + * [[scheduledWithErrorMode]] + */ def apply[E, T, F[_]]( config: RetryConfig[E, T], - errorMode: ErrorMode[E, F], - failureCost: E => Int, - successReward: T => Int, - isFailure: E => Boolean + isFailure: E => Boolean = (_: E) => true, + errorMode: ErrorMode[E, F] )(operation: => F[T]): F[T] = val isWorthRetrying: E => Boolean = (error: E) => // if we cannot acquire token we short circuit and stop retrying val isWorth = config.resultPolicy.isWorthRetrying(error) - if isWorth && isFailure(error) then tokenBucket.tryAcquire(failureCost(error)) + if isWorth && isFailure(error) then tokenBucket.tryAcquire(failureCost) else isWorth val isSuccess: T => Boolean = (result: T) => // if we consider this result as success token are given back to bucket if config.resultPolicy.isSuccess(result) then - tokenBucket.release(successReward(result)) + tokenBucket.release(successReward) true else false end isSuccess @@ -33,18 +71,49 @@ case class AdaptiveRetry( scheduledWithErrorMode(errorMode)(config.copy(resultPolicy = resultPolicy).toScheduledConfig)(operation) end apply - def apply[E, T]( - config: RetryConfig[E, T], - failureCost: E => Int = (_: E) => 1, - successReward: T => Int = (_: T) => 1, - isFailure: E => Boolean = (_: E) => true - )(operation: => Either[E, T]): Either[E, T] = - apply(config, EitherMode[E], failureCost, successReward, isFailure)(operation) - - def apply[T]( - config: RetryConfig[Throwable, T] - )(operation: => T): T = - apply(config, EitherMode[Throwable], (_: Throwable) => 1, (_: T) => 1, (_: Throwable) => true)(Try(operation).toEither) - .fold(throw _, identity) + /** Retries an operation returning an [[scala.util.Either]] until it succeeds or the config decides to stop. Note that any exceptions + * thrown by the operation aren't caught and don't cause a retry to happen. + * + * [[retryEither]] is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See implementations of [[RetryConfig]] + * for more details. + * + * @param config + * The retry config - see [[RetryConfig]]. + * @param isFailure + * Function to decide if returned [[E]] should be considered failure. + * @param operation + * The operation to retry. + * @tparam E + * type of error. + * @tparam T + * type of result of an operation. + * @return + * A [[scala.util.Right]] if the function eventually succeeds, or, otherwise, a [[scala.util.Left]] with the error from the last + * attempt. + * @see + * [[scheduledEither]] + */ + def retryEither[E, T](config: RetryConfig[E, T], isFailure: E => Boolean = (_: E) => true)(operation: => Either[E, T]): Either[E, T] = + apply(config, isFailure, EitherMode[E])(operation) + + /** Retries an operation returning a direct result until it succeeds or the config decides to stop. + * + * [[retry]] is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See [[RetryConfig]]. + * + * @param config + * The retry config - see [[RetryConfig]]. + * @param isFailure + * Function to decide if returned [[Throwable]] should be considered failure. + * @param operation + * The operation to retry. + * @return + * The result of the function if it eventually succeeds. + * @throws anything + * The exception thrown by the last attempt if the config decides to stop. + * @see + * [[scheduled]] + */ + def retry[T](config: RetryConfig[Throwable, T], isFailure: Throwable => Boolean = _ => true)(operation: => T): T = + apply(config, isFailure, EitherMode[Throwable])(Try(operation).toEither).fold(throw _, identity) end AdaptiveRetry diff --git a/core/src/main/scala/ox/resilience/retry.scala b/core/src/main/scala/ox/resilience/retry.scala index cb0c190e..6573221e 100644 --- a/core/src/main/scala/ox/resilience/retry.scala +++ b/core/src/main/scala/ox/resilience/retry.scala @@ -26,8 +26,7 @@ def retry[T](config: RetryConfig[Throwable, T])(operation: => T): T = /** Retries an operation returning an [[scala.util.Either]] until it succeeds or the config decides to stop. Note that any exceptions thrown * by the operation aren't caught and don't cause a retry to happen. * - * [[retryEither]] is a special case of [[scheduledEither]] with a given set of defaults. See implementations of [[RetryConfig]] for more - * details. + * [[retryEither]] is a special case of [[scheduledEither]] with a given set of defaults. See [[RetryConfig]] for more details. * * @param config * The retry config - see [[RetryConfig]]. diff --git a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala index 1b41257a..c0b0b741 100644 --- a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala +++ b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala @@ -7,6 +7,7 @@ import ox.util.ElapsedTime import ox.resilience.* import scala.concurrent.duration.* +import scala.util.Try class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with TryValues with ElapsedTime: @@ -68,4 +69,29 @@ class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with elapsedTime.toMillis should be >= maxRetries * sleep.toMillis counter shouldBe 4 } + + behavior of "adaptive retry with delayed config" + + it should "retry a failing function forever or until adaptive retry blocks it" in { + // given + var counter = 0 + val sleep = 2.millis + val retriesUntilSuccess = 1_000 + val successfulResult = 42 + val bucketSize = 500 + val errorMessage = "boom" + + def f = + counter += 1 + if counter <= retriesUntilSuccess then throw RuntimeException(errorMessage) else successfulResult + + // when + val adaptive = AdaptiveRetry(TokenBucket(bucketSize)) + val result = the[RuntimeException] thrownBy adaptive.retry(RetryConfig.delayForever(sleep))(f) + + // then + result should have message errorMessage + counter shouldBe bucketSize + 1 + } + end DelayedRetryTest diff --git a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala index 5d0b108e..8e382b9e 100644 --- a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala +++ b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala @@ -162,6 +162,8 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi counter shouldBe 4 } + behavior of "Adaptive retry with immediate config" + it should "retry a failing adaptive" in { // given var counter = 0 @@ -174,14 +176,14 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi val adaptive = AdaptiveRetry(TokenBucket(5)) // when - val result = adaptive[String, String](RetryConfig.immediate(5))(f) + val result = adaptive.retryEither(RetryConfig.immediate(5))(f) // then result.value shouldBe "Success" counter shouldBe 3 } - it should "retry a failing adaptive 2" in { + it should "stop retrying after emptying bucket" in { // given var counter = 0 val errorMessage = "boom" @@ -192,7 +194,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi val adaptive = AdaptiveRetry(TokenBucket(2)) // when - val result = adaptive(RetryConfig.immediate[String, String](5))(f) + val result = adaptive.retryEither(RetryConfig.immediate[String, String](5))(f) // then result.left.value shouldBe errorMessage @@ -200,4 +202,22 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi counter shouldBe 3 } + it should "not take tokens if isFailure returns false" in { + // given + var counter = 0 + val errorMessage = "boom" + + def f = + counter += 1 + Left(errorMessage) + + val adaptive = AdaptiveRetry(TokenBucket(2)) + // when + val result = adaptive.retryEither[String, String](RetryConfig.immediate(5), _ => false)(f) + + // then + result.left.value shouldBe errorMessage + counter shouldBe 6 + } + end ImmediateRetryTest diff --git a/doc/utils/retries.md b/doc/utils/retries.md index 6706f096..3839976e 100644 --- a/doc/utils/retries.md +++ b/doc/utils/retries.md @@ -125,3 +125,57 @@ retryWithErrorMode(UnionMode[String])(RetryConfig(Schedule.Immediate(3), ResultP ``` See the tests in `ox.resilience.*` for more. + +# Adaptive retries +This retry mechanism by implementing part of [AdaptiveRetryStrategy](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/retries/AdaptiveRetryStrategy.html) from `aws-sdk-java-v2`. Class `AdaptiveRetry` contains thread safe `TokenBucket` that acts as a circuit breaker for instance of this class. +For every retry, tokens are taken from bucket, if there is not enough we stop retrying. For every successful operations tokens are added back to bucket. +This allows for "normal" retry mechanism in case of transient failures, but does not allow to generate for example 4 times the load in case of systemic failure (if we retry every operation 3 times). + +## Configuration +Instance of `AdaptiveRetry` consists of three parts: +- `tokenBucket: Tokenbucket` - instance of `TokenBucket`, can be shared across multiple instances of `AdaptiveRetry`. +- `failureCost: Int` - cost of tokens that are needed for retry in case of failure. +- `successReward: Int` - number of tokens that are added back to token bucket after successful attempt. + +`RetryConfig` and `ResultPolicy` are defined the same as with "normal" retry mechanism, all the information from above apply also here. + +## API +To retry operation on `AdaptiveRetry` instance you can use one of three operations: +- `def apply[E, T, F[_]](config: RetryConfig[E, T], isFailure: E => Boolean = (_: E) => true, errorMode: ErrorMode[E, F])(operation: => F[T]): F[T]` - where `E` represents error type, `T` result type, and `F[_]` context in which they are returned. This method is similar to `retryWithErrorMode` +- `def retryEither[E, T](config: RetryConfig[E, T], isFailure: E => Boolean = (_: E) => true)(operation: => Either[E, T]): Either[E, T]` - This method is equivalent of `retryEither`. +- `def retry[T](config: RetryConfig[Throwable, T], isFailure: Throwable => Boolean = _ => true)(operation: => T): T` - This method is equivalent of `retry` + +`retry` and `retryEither` are implemented in terms of `apply` method. + +## Examples + +If you want to use this mechanism you need to run operation through instance of `AdaptiveRetry`: +```scala +import ox.resilience.{AdaptiveRetry, TokenBucket} + +val tokenBucket = TokenBucket(bucketSize = 500) +val adaptive = AdaptiveRetry(tokenBucket, failureCost = 5, successReward = 4) + +// various configs with custom schedules and default ResultPolicy +adaptive.retry(RetryConfig.immediate(sleep))(directOperation) +adaptive.retry(RetryConfig.delay(3, 100.millis))(directOperation) +adaptive.retry(RetryConfig.backoff(3, 100.millis))(directOperation) // defaults: maxDelay = 1.minute, jitter = Jitter.None +adaptive.retry(RetryConfig.backoff(3, 100.millis, 5.minutes, Jitter.Equal))(directOperation) + +// result policies +// custom success +adaptive.retry(RetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)))(directOperation) +// fail fast on certain errors +adaptive.retry(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != "fatal error")))(directOperation) +adaptive.retryEither(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) + +// custom error mode +adaptive(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), errorMode = UnionMode[String])(unionOperation) + +// consider "throttling error" not as a failure that should incur the retry penalty +adaptive(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), isFailure = _ != "throttling error", errorMode = UnionMode[String])(unionOperation) +``` + +Instance of `AdaptiveRetry` can be shared for different operation, for example different operations on the same constrained resource. + +See the tests in `ox.resilience.*` for more. \ No newline at end of file From 04980f2c5f6b9554f2605212e1dc3b2871971b52 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Fri, 27 Dec 2024 11:54:21 +0100 Subject: [PATCH 05/15] changes after review --- .../scala/ox/resilience/AdaptiveRetry.scala | 41 +++++++++++-------- .../ox/resilience/DelayedRetryTest.scala | 2 +- .../ox/resilience/ImmediateRetryTest.scala | 16 ++++---- doc/utils/retries.md | 6 +-- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index 3ef56963..edbc4b2f 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -5,9 +5,11 @@ import ox.scheduling.scheduledWithErrorMode import scala.util.Try -/** Provides mechanism of "adaptive" retries. For every retry we take [[failureCost]] from token bucket and for every success we add back to - * the bucket [[successReward]] tokens. One instance can be "shared" across multiple operations against constrained resource. This allows - * to retry in case of transient failures and at the same time doesn't produce more load on systemic failure of a resource. +/** Provides mechanism of "adaptive" retries. Inspired by `AdaptiveRetryStrategy` from `aws-sdk-java-v2` and the talk "AWS re:Invent 2024 - + * Try again: The tools and techniques behind resilient systems". For every retry we take [[failureCost]] from token bucket and for every + * success we add back to the bucket [[successReward]] tokens. Instance of this class is thread-safe and can be "shared" across multiple + * operations against constrained resource. This allows to retry in case of transient failures and at the same time doesn't produce more + * load on systemic failure of a resource. * * @param tokenBucket * instance of [[TokenBucket]]. Provided instance is thread safe and can be "shared" between different instances of [[AdaptiveRetry]] @@ -19,8 +21,8 @@ import scala.util.Try */ case class AdaptiveRetry( tokenBucket: TokenBucket, - failureCost: Int = 1, - successReward: Int = 1 + failureCost: Int, + successReward: Int ): /** Retries an operation using the given error mode until it succeeds or the config decides to stop. Note that any exceptions thrown by * the operation aren't caught (unless the operation catches them as part of its implementation) and don't cause a retry to happen. @@ -30,7 +32,7 @@ case class AdaptiveRetry( * @param config * The retry config - See [[RetryConfig]]. * @param isFailure - * Function to decide if returned [[E]] should be considered failure. + * Function to decide if returned result [[T]] should be considered failure. * @param errorMode * The error mode to use, which specifies when a result value is considered success, and when a failure. * @param operation @@ -48,20 +50,20 @@ case class AdaptiveRetry( * @see * [[scheduledWithErrorMode]] */ - def apply[E, T, F[_]]( + def retryWithErrorMode[E, T, F[_]]( config: RetryConfig[E, T], - isFailure: E => Boolean = (_: E) => true, + isFailure: T => Boolean = (_: T) => true, errorMode: ErrorMode[E, F] )(operation: => F[T]): F[T] = val isWorthRetrying: E => Boolean = (error: E) => // if we cannot acquire token we short circuit and stop retrying val isWorth = config.resultPolicy.isWorthRetrying(error) - if isWorth && isFailure(error) then tokenBucket.tryAcquire(failureCost) - else isWorth + if isWorth then tokenBucket.tryAcquire(failureCost) + else false val isSuccess: T => Boolean = (result: T) => // if we consider this result as success token are given back to bucket - if config.resultPolicy.isSuccess(result) then + if config.resultPolicy.isSuccess(result) && !isFailure(result) then tokenBucket.release(successReward) true else false @@ -69,7 +71,7 @@ case class AdaptiveRetry( val resultPolicy = ResultPolicy(isSuccess, isWorthRetrying) scheduledWithErrorMode(errorMode)(config.copy(resultPolicy = resultPolicy).toScheduledConfig)(operation) - end apply + end retryWithErrorMode /** Retries an operation returning an [[scala.util.Either]] until it succeeds or the config decides to stop. Note that any exceptions * thrown by the operation aren't caught and don't cause a retry to happen. @@ -80,7 +82,7 @@ case class AdaptiveRetry( * @param config * The retry config - see [[RetryConfig]]. * @param isFailure - * Function to decide if returned [[E]] should be considered failure. + * Function to decide if returned result [[T]] should be considered failure. * @param operation * The operation to retry. * @tparam E @@ -93,8 +95,8 @@ case class AdaptiveRetry( * @see * [[scheduledEither]] */ - def retryEither[E, T](config: RetryConfig[E, T], isFailure: E => Boolean = (_: E) => true)(operation: => Either[E, T]): Either[E, T] = - apply(config, isFailure, EitherMode[E])(operation) + def retryEither[E, T](config: RetryConfig[E, T], isFailure: T => Boolean = (_: T) => true)(operation: => Either[E, T]): Either[E, T] = + retryWithErrorMode(config, isFailure, EitherMode[E])(operation) /** Retries an operation returning a direct result until it succeeds or the config decides to stop. * @@ -103,7 +105,7 @@ case class AdaptiveRetry( * @param config * The retry config - see [[RetryConfig]]. * @param isFailure - * Function to decide if returned [[Throwable]] should be considered failure. + * Function to decide if returned result [[T]] should be considered failure. * @param operation * The operation to retry. * @return @@ -113,7 +115,10 @@ case class AdaptiveRetry( * @see * [[scheduled]] */ - def retry[T](config: RetryConfig[Throwable, T], isFailure: Throwable => Boolean = _ => true)(operation: => T): T = - apply(config, isFailure, EitherMode[Throwable])(Try(operation).toEither).fold(throw _, identity) + def retry[T](config: RetryConfig[Throwable, T], isFailure: T => Boolean = (_: T) => true)(operation: => T): T = + retryWithErrorMode(config, isFailure, EitherMode[Throwable])(Try(operation).toEither).fold(throw _, identity) end AdaptiveRetry + +object AdaptiveRetry: + def default: AdaptiveRetry = AdaptiveRetry(TokenBucket(500), 5, 1) diff --git a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala index c0b0b741..278e294b 100644 --- a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala +++ b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala @@ -86,7 +86,7 @@ class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with if counter <= retriesUntilSuccess then throw RuntimeException(errorMessage) else successfulResult // when - val adaptive = AdaptiveRetry(TokenBucket(bucketSize)) + val adaptive = AdaptiveRetry(TokenBucket(bucketSize), 1, 1) val result = the[RuntimeException] thrownBy adaptive.retry(RetryConfig.delayForever(sleep))(f) // then diff --git a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala index 8e382b9e..4d56759f 100644 --- a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala +++ b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala @@ -174,7 +174,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi if counter <= 2 then Left(errorMessage) else Right("Success") - val adaptive = AdaptiveRetry(TokenBucket(5)) + val adaptive = AdaptiveRetry(TokenBucket(5), 1, 1) // when val result = adaptive.retryEither(RetryConfig.immediate(5))(f) @@ -192,7 +192,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi counter += 1 Left(errorMessage) - val adaptive = AdaptiveRetry(TokenBucket(2)) + val adaptive = AdaptiveRetry(TokenBucket(2), 1, 1) // when val result = adaptive.retryEither(RetryConfig.immediate[String, String](5))(f) @@ -202,21 +202,21 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi counter shouldBe 3 } - it should "not take tokens if isFailure returns false" in { + it should "not succeed if isFailure returns true" in { // given var counter = 0 - val errorMessage = "boom" + val message = "success" def f = counter += 1 - Left(errorMessage) + Right(message) - val adaptive = AdaptiveRetry(TokenBucket(2)) + val adaptive = AdaptiveRetry(TokenBucket(2), 1, 1) // when - val result = adaptive.retryEither[String, String](RetryConfig.immediate(5), _ => false)(f) + val result = adaptive.retryEither[String, String](RetryConfig.immediate(5), _ => true)(f) // then - result.left.value shouldBe errorMessage + result.value shouldBe message counter shouldBe 6 } diff --git a/doc/utils/retries.md b/doc/utils/retries.md index 3839976e..0ae01cf5 100644 --- a/doc/utils/retries.md +++ b/doc/utils/retries.md @@ -127,7 +127,7 @@ retryWithErrorMode(UnionMode[String])(RetryConfig(Schedule.Immediate(3), ResultP See the tests in `ox.resilience.*` for more. # Adaptive retries -This retry mechanism by implementing part of [AdaptiveRetryStrategy](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/retries/AdaptiveRetryStrategy.html) from `aws-sdk-java-v2`. Class `AdaptiveRetry` contains thread safe `TokenBucket` that acts as a circuit breaker for instance of this class. +This retry mechanism is inspired by the talk `AWS re:Invent 2024 - Try again: The tools and techniques behind resilient systems` and [AdaptiveRetryStrategy](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/retries/AdaptiveRetryStrategy.html) from `aws-sdk-java-v2`. Class `AdaptiveRetry` contains thread safe `TokenBucket` that acts as a circuit breaker for instance of this class. For every retry, tokens are taken from bucket, if there is not enough we stop retrying. For every successful operations tokens are added back to bucket. This allows for "normal" retry mechanism in case of transient failures, but does not allow to generate for example 4 times the load in case of systemic failure (if we retry every operation 3 times). @@ -170,10 +170,10 @@ adaptive.retry(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMe adaptive.retryEither(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) // custom error mode -adaptive(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), errorMode = UnionMode[String])(unionOperation) +adaptive.retryWithErrorMode(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), errorMode = UnionMode[String])(unionOperation) // consider "throttling error" not as a failure that should incur the retry penalty -adaptive(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), isFailure = _ != "throttling error", errorMode = UnionMode[String])(unionOperation) +adaptive.retryWithErrorMode(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), isFailure = _ != "throttling error", errorMode = UnionMode[String])(unionOperation) ``` Instance of `AdaptiveRetry` can be shared for different operation, for example different operations on the same constrained resource. From bd97a6467436bd441a278bcd9ad864bc26cec0bb Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Fri, 27 Dec 2024 12:43:58 +0100 Subject: [PATCH 06/15] changes after review --- core/src/main/scala/ox/resilience/AdaptiveRetry.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index edbc4b2f..d2287f6a 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -52,7 +52,7 @@ case class AdaptiveRetry( */ def retryWithErrorMode[E, T, F[_]]( config: RetryConfig[E, T], - isFailure: T => Boolean = (_: T) => true, + isFailure: T => Boolean = (_: T) => false, errorMode: ErrorMode[E, F] )(operation: => F[T]): F[T] = val isWorthRetrying: E => Boolean = (error: E) => @@ -95,7 +95,7 @@ case class AdaptiveRetry( * @see * [[scheduledEither]] */ - def retryEither[E, T](config: RetryConfig[E, T], isFailure: T => Boolean = (_: T) => true)(operation: => Either[E, T]): Either[E, T] = + def retryEither[E, T](config: RetryConfig[E, T], isFailure: T => Boolean = (_: T) => false)(operation: => Either[E, T]): Either[E, T] = retryWithErrorMode(config, isFailure, EitherMode[E])(operation) /** Retries an operation returning a direct result until it succeeds or the config decides to stop. @@ -115,7 +115,7 @@ case class AdaptiveRetry( * @see * [[scheduled]] */ - def retry[T](config: RetryConfig[Throwable, T], isFailure: T => Boolean = (_: T) => true)(operation: => T): T = + def retry[T](config: RetryConfig[Throwable, T], isFailure: T => Boolean = (_: T) => false)(operation: => T): T = retryWithErrorMode(config, isFailure, EitherMode[Throwable])(Try(operation).toEither).fold(throw _, identity) end AdaptiveRetry From de125eeb147621717b62454820dc5a5febdbbdb8 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Fri, 27 Dec 2024 15:24:46 +0100 Subject: [PATCH 07/15] changes after review --- .../scala/ox/resilience/AdaptiveRetry.scala | 60 +++++++++++-------- .../main/scala/ox/scheduling/scheduled.scala | 16 ++++- .../ox/resilience/ImmediateRetryTest.scala | 5 +- doc/utils/retries.md | 14 +++-- 4 files changed, 59 insertions(+), 36 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index d2287f6a..a4b49510 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -1,7 +1,7 @@ package ox.resilience import ox.{EitherMode, ErrorMode} -import ox.scheduling.scheduledWithErrorMode +import ox.scheduling.{ScheduledConfig, SleepMode, scheduledWithErrorMode} import scala.util.Try @@ -31,8 +31,9 @@ case class AdaptiveRetry( * * @param config * The retry config - See [[RetryConfig]]. - * @param isFailure - * Function to decide if returned result [[T]] should be considered failure. + * @param shouldPayPenaltyCost + * Function to decide if returned result [[T]] should be considered failure in terms of paying cost for retry. Penalty is paid only if + * it is decided to retry operation, the penalty will not be paid for successful operation. Defaults to `true`. * @param errorMode * The error mode to use, which specifies when a result value is considered success, and when a failure. * @param operation @@ -52,25 +53,28 @@ case class AdaptiveRetry( */ def retryWithErrorMode[E, T, F[_]]( config: RetryConfig[E, T], - isFailure: T => Boolean = (_: T) => false, + shouldPayPenaltyCost: T => Boolean = (_: T) => true, errorMode: ErrorMode[E, F] )(operation: => F[T]): F[T] = - val isWorthRetrying: E => Boolean = (error: E) => - // if we cannot acquire token we short circuit and stop retrying - val isWorth = config.resultPolicy.isWorthRetrying(error) - if isWorth then tokenBucket.tryAcquire(failureCost) - else false + val shouldAttempt: Either[E, T] => Boolean = result => + // for result T check if penalty should be paid, in case of E we always pay. + // If shouldPayPenaltyCost return false, we always attempt + if result.map(shouldPayPenaltyCost).getOrElse(true) then tokenBucket.tryAcquire(failureCost) + else true - val isSuccess: T => Boolean = (result: T) => - // if we consider this result as success token are given back to bucket - if config.resultPolicy.isSuccess(result) && !isFailure(result) then - tokenBucket.release(successReward) - true - else false - end isSuccess + val afterSuccess: T => Unit = _ => tokenBucket.release(successReward) - val resultPolicy = ResultPolicy(isSuccess, isWorthRetrying) - scheduledWithErrorMode(errorMode)(config.copy(resultPolicy = resultPolicy).toScheduledConfig)(operation) + val scheduledConfig = ScheduledConfig( + config.schedule, + config.onRetry, + shouldContinueOnError = config.resultPolicy.isWorthRetrying, + shouldContinueOnResult = t => !config.resultPolicy.isSuccess(t), + shouldAttempt = shouldAttempt, + afterSuccess = afterSuccess, + sleepMode = SleepMode.Delay + ) + + scheduledWithErrorMode(errorMode)(scheduledConfig)(operation) end retryWithErrorMode /** Retries an operation returning an [[scala.util.Either]] until it succeeds or the config decides to stop. Note that any exceptions @@ -81,8 +85,9 @@ case class AdaptiveRetry( * * @param config * The retry config - see [[RetryConfig]]. - * @param isFailure - * Function to decide if returned result [[T]] should be considered failure. + * @param shouldPayPenaltyCost + * Function to decide if returned result [[T]] should be considered failure in terms of paying cost for retry. Penalty is paid only if + * it is decided to retry operation, the penalty will not be paid for successful operation. * @param operation * The operation to retry. * @tparam E @@ -95,8 +100,10 @@ case class AdaptiveRetry( * @see * [[scheduledEither]] */ - def retryEither[E, T](config: RetryConfig[E, T], isFailure: T => Boolean = (_: T) => false)(operation: => Either[E, T]): Either[E, T] = - retryWithErrorMode(config, isFailure, EitherMode[E])(operation) + def retryEither[E, T](config: RetryConfig[E, T], shouldPayPenaltyCost: T => Boolean = (_: T) => false)( + operation: => Either[E, T] + ): Either[E, T] = + retryWithErrorMode(config, shouldPayPenaltyCost, EitherMode[E])(operation) /** Retries an operation returning a direct result until it succeeds or the config decides to stop. * @@ -104,8 +111,9 @@ case class AdaptiveRetry( * * @param config * The retry config - see [[RetryConfig]]. - * @param isFailure - * Function to decide if returned result [[T]] should be considered failure. + * @param shouldPayPenaltyCost + * Function to decide if returned result [[T]] should be considered failure in terms of paying cost for retry. Penalty is paid only if + * it is decided to retry operation, the penalty will not be paid for successful operation. * @param operation * The operation to retry. * @return @@ -115,8 +123,8 @@ case class AdaptiveRetry( * @see * [[scheduled]] */ - def retry[T](config: RetryConfig[Throwable, T], isFailure: T => Boolean = (_: T) => false)(operation: => T): T = - retryWithErrorMode(config, isFailure, EitherMode[Throwable])(Try(operation).toEither).fold(throw _, identity) + def retry[T](config: RetryConfig[Throwable, T], shouldPayPenaltyCost: T => Boolean = (_: T) => false)(operation: => T): T = + retryWithErrorMode(config, shouldPayPenaltyCost, EitherMode[Throwable])(Try(operation).toEither).fold(throw _, identity) end AdaptiveRetry diff --git a/core/src/main/scala/ox/scheduling/scheduled.scala b/core/src/main/scala/ox/scheduling/scheduled.scala index 37c853ec..340e0acb 100644 --- a/core/src/main/scala/ox/scheduling/scheduled.scala +++ b/core/src/main/scala/ox/scheduling/scheduled.scala @@ -33,6 +33,12 @@ end SleepMode * @param shouldContinueOnResult * A function that determines whether to continue the loop after a success. The function receives the value that was emitted by the last * invocation. Defaults to [[_ => true]]. + * + * @param shouldAttempt + * A function that determines whether to attempt a retry. This function is called after shouldContinueOnError or shouldContinueOnResult + * returns true and the result is considered for retry, it may perform side effects to determine if attempt should be made. + * @param afterSuccess + * A function that is invoked after every successful attempt. Performs side effects. * @param sleepMode * The mode that specifies how to interpret the duration provided by the schedule. See [[SleepMode]] for more details. * @tparam E @@ -46,6 +52,8 @@ case class ScheduledConfig[E, T]( onOperationResult: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => (), shouldContinueOnError: E => Boolean = (_: E) => false, shouldContinueOnResult: T => Boolean = (_: T) => true, + shouldAttempt: Either[E, T] => Boolean = (_: Either[E, T]) => true, + afterSuccess: T => Unit = (_: T) => (), sleepMode: SleepMode = SleepMode.Interval ) @@ -109,7 +117,7 @@ def scheduledWithErrorMode[E, F[_], T](em: ErrorMode[E, F])(config: ScheduledCon val error = em.getError(v) config.onOperationResult(invocation, Left(error)) - if config.shouldContinueOnError(error) && remainingInvocations.forall(_ > 0) then + if config.shouldContinueOnError(error) && remainingInvocations.forall(_ > 0) && config.shouldAttempt(Left(error)) then val delay = sleepIfNeeded(startTimestamp) loop(invocation + 1, remainingInvocations.map(_ - 1), Some(delay)) else v @@ -117,10 +125,12 @@ def scheduledWithErrorMode[E, F[_], T](em: ErrorMode[E, F])(config: ScheduledCon val result = em.getT(v) config.onOperationResult(invocation, Right(result)) - if config.shouldContinueOnResult(result) && remainingInvocations.forall(_ > 0) then + if config.shouldContinueOnResult(result) && remainingInvocations.forall(_ > 0) && config.shouldAttempt(Right(result)) then val delay = sleepIfNeeded(startTimestamp) loop(invocation + 1, remainingInvocations.map(_ - 1), Some(delay)) - else v + else + config.afterSuccess(result) + v end match end loop diff --git a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala index 4d56759f..91e9aa5e 100644 --- a/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala +++ b/core/src/test/scala/ox/resilience/ImmediateRetryTest.scala @@ -202,7 +202,7 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi counter shouldBe 3 } - it should "not succeed if isFailure returns true" in { + it should "not pay exceptionCost if result T is going to be retried and shouldPayPenaltyCost returns false" in { // given var counter = 0 val message = "success" @@ -212,8 +212,9 @@ class ImmediateRetryTest extends AnyFlatSpec with EitherValues with TryValues wi Right(message) val adaptive = AdaptiveRetry(TokenBucket(2), 1, 1) + val retryConfig = RetryConfig.immediate(5).copy(resultPolicy = ResultPolicy.successfulWhen[String, String](_ => false)) // when - val result = adaptive.retryEither[String, String](RetryConfig.immediate(5), _ => true)(f) + val result = adaptive.retryEither(retryConfig, _ => false)(f) // then result.value shouldBe message diff --git a/doc/utils/retries.md b/doc/utils/retries.md index 0ae01cf5..d5fcbae1 100644 --- a/doc/utils/retries.md +++ b/doc/utils/retries.md @@ -127,7 +127,8 @@ retryWithErrorMode(UnionMode[String])(RetryConfig(Schedule.Immediate(3), ResultP See the tests in `ox.resilience.*` for more. # Adaptive retries -This retry mechanism is inspired by the talk `AWS re:Invent 2024 - Try again: The tools and techniques behind resilient systems` and [AdaptiveRetryStrategy](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/retries/AdaptiveRetryStrategy.html) from `aws-sdk-java-v2`. Class `AdaptiveRetry` contains thread safe `TokenBucket` that acts as a circuit breaker for instance of this class. +This retry mechanism is inspired by the talk `AWS re:Invent 2024 - Try again: The tools and techniques behind resilient systems` and [AdaptiveRetryStrategy](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/retries/AdaptiveRetryStrategy.html) from `aws-sdk-java-v2`. +Class `AdaptiveRetry` contains thread-safe `TokenBucket` that acts as a circuit breaker for instance of this class. For every retry, tokens are taken from bucket, if there is not enough we stop retrying. For every successful operations tokens are added back to bucket. This allows for "normal" retry mechanism in case of transient failures, but does not allow to generate for example 4 times the load in case of systemic failure (if we retry every operation 3 times). @@ -141,11 +142,14 @@ Instance of `AdaptiveRetry` consists of three parts: ## API To retry operation on `AdaptiveRetry` instance you can use one of three operations: -- `def apply[E, T, F[_]](config: RetryConfig[E, T], isFailure: E => Boolean = (_: E) => true, errorMode: ErrorMode[E, F])(operation: => F[T]): F[T]` - where `E` represents error type, `T` result type, and `F[_]` context in which they are returned. This method is similar to `retryWithErrorMode` -- `def retryEither[E, T](config: RetryConfig[E, T], isFailure: E => Boolean = (_: E) => true)(operation: => Either[E, T]): Either[E, T]` - This method is equivalent of `retryEither`. -- `def retry[T](config: RetryConfig[Throwable, T], isFailure: Throwable => Boolean = _ => true)(operation: => T): T` - This method is equivalent of `retry` +- `def retryWithErrorMode[E, T, F[_]](config: RetryConfig[E, T], shouldPayPenaltyCost: T => Boolean = (_: T) => true, errorMode: ErrorMode[E, F])(operation: => F[T]): F[T]` - where `E` represents error type, `T` result type, and `F[_]` context in which they are returned. This method is similar to `retryWithErrorMode` +- `def retryEither[E, T](config: RetryConfig[E, T], shouldPayPenaltyCost: T => Boolean = (_: T) => true)(operation: => Either[E, T]): Either[E, T]` - This method is equivalent of `retryEither`. +- `def retry[T](config: RetryConfig[Throwable, T], shouldPayPenaltyCost: T => Boolean = (_: T) => true)(operation: => T): T` - This method is equivalent of `retry` -`retry` and `retryEither` are implemented in terms of `apply` method. +`retry` and `retryEither` are implemented in terms of `retryWithErrorMode` method. + +`shouldPayPenaltyCost` determines if result `T` should be considered failure in terms of paying cost for retry. +Penalty is paid only if it is decided to retry operation, the penalty will not be paid for successful operation. ## Examples From 08310b0841230897e862ff04935d4fa3921d386b Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Mon, 30 Dec 2024 15:56:52 +0100 Subject: [PATCH 08/15] change callbacks in ScheduledConfig --- .../scala/ox/resilience/AdaptiveRetry.scala | 33 ++++++++----- .../scala/ox/resilience/RetryConfig.scala | 20 +++++--- .../scala/ox/scheduling/RepeatConfig.scala | 14 ++++-- .../main/scala/ox/scheduling/scheduled.scala | 49 +++++++++---------- 4 files changed, 63 insertions(+), 53 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index a4b49510..b70a9dc8 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -1,7 +1,7 @@ package ox.resilience -import ox.{EitherMode, ErrorMode} -import ox.scheduling.{ScheduledConfig, SleepMode, scheduledWithErrorMode} +import ox.scheduling.{ScheduleContinue, ScheduledConfig, SleepMode, scheduledWithErrorMode} +import ox.* import scala.util.Try @@ -56,21 +56,28 @@ case class AdaptiveRetry( shouldPayPenaltyCost: T => Boolean = (_: T) => true, errorMode: ErrorMode[E, F] )(operation: => F[T]): F[T] = - val shouldAttempt: Either[E, T] => Boolean = result => - // for result T check if penalty should be paid, in case of E we always pay. - // If shouldPayPenaltyCost return false, we always attempt - if result.map(shouldPayPenaltyCost).getOrElse(true) then tokenBucket.tryAcquire(failureCost) - else true - val afterSuccess: T => Unit = _ => tokenBucket.release(successReward) + val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (attemptNum, attempt) => + config.onRetry(attemptNum, attempt) + attempt match + case Left(value) => + // If we want to retry we try to acquire tokens from bucket + if config.resultPolicy.isWorthRetrying(value) then ScheduleContinue.fromBool(tokenBucket.tryAcquire(failureCost)) + else ScheduleContinue.No + case Right(value) => + // If we are successful, we release tokens to bucket and end schedule + if config.resultPolicy.isSuccess(value) then + tokenBucket.release(successReward) + ScheduleContinue.No + // If it is not success we check if we need to acquire tokens, then we check bucket, otherwise we continue + else if shouldPayPenaltyCost(value) then ScheduleContinue.fromBool(tokenBucket.tryAcquire(failureCost)) + else ScheduleContinue.Yes + end match + end afterAttempt val scheduledConfig = ScheduledConfig( config.schedule, - config.onRetry, - shouldContinueOnError = config.resultPolicy.isWorthRetrying, - shouldContinueOnResult = t => !config.resultPolicy.isSuccess(t), - shouldAttempt = shouldAttempt, - afterSuccess = afterSuccess, + afterAttempt, sleepMode = SleepMode.Delay ) diff --git a/core/src/main/scala/ox/resilience/RetryConfig.scala b/core/src/main/scala/ox/resilience/RetryConfig.scala index ef89408e..4abb8a45 100644 --- a/core/src/main/scala/ox/resilience/RetryConfig.scala +++ b/core/src/main/scala/ox/resilience/RetryConfig.scala @@ -1,6 +1,7 @@ package ox.resilience -import ox.scheduling.{Jitter, Schedule, ScheduledConfig, SleepMode} +import ox.scheduling.{Jitter, Schedule, ScheduleContinue, ScheduledConfig, SleepMode} + import scala.concurrent.duration.* /** A config that defines how to retry a failed operation. @@ -28,13 +29,16 @@ case class RetryConfig[E, T]( resultPolicy: ResultPolicy[E, T] = ResultPolicy.default[E, T], onRetry: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => () ): - def toScheduledConfig: ScheduledConfig[E, T] = ScheduledConfig( - schedule, - onRetry, - shouldContinueOnError = resultPolicy.isWorthRetrying, - shouldContinueOnResult = t => !resultPolicy.isSuccess(t), - sleepMode = SleepMode.Delay - ) + def toScheduledConfig: ScheduledConfig[E, T] = + val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (attemptNum, attempt) => + onRetry(attemptNum, attempt) + attempt match + case Left(value) => ScheduleContinue.fromBool(resultPolicy.isWorthRetrying(value)) + case Right(value) => ScheduleContinue.fromBool(!resultPolicy.isSuccess(value)) + end afterAttempt + + ScheduledConfig(schedule, afterAttempt, SleepMode.Delay) + end toScheduledConfig end RetryConfig object RetryConfig: diff --git a/core/src/main/scala/ox/scheduling/RepeatConfig.scala b/core/src/main/scala/ox/scheduling/RepeatConfig.scala index 7959abbf..2863198f 100644 --- a/core/src/main/scala/ox/scheduling/RepeatConfig.scala +++ b/core/src/main/scala/ox/scheduling/RepeatConfig.scala @@ -28,11 +28,15 @@ case class RepeatConfig[E, T]( schedule: Schedule, shouldContinueOnResult: T => Boolean = (_: T) => true ): - def toScheduledConfig: ScheduledConfig[E, T] = ScheduledConfig( - schedule, - shouldContinueOnResult = shouldContinueOnResult, - sleepMode = SleepMode.Interval - ) + def toScheduledConfig: ScheduledConfig[E, T] = + val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (_, attempt) => + attempt match + case Left(_) => ScheduleContinue.Yes + case Right(value) => ScheduleContinue.fromBool(shouldContinueOnResult(value)) + end afterAttempt + + ScheduledConfig(schedule, afterAttempt, SleepMode.Interval) + end toScheduledConfig end RepeatConfig object RepeatConfig: diff --git a/core/src/main/scala/ox/scheduling/scheduled.scala b/core/src/main/scala/ox/scheduling/scheduled.scala index 340e0acb..841ec9ae 100644 --- a/core/src/main/scala/ox/scheduling/scheduled.scala +++ b/core/src/main/scala/ox/scheduling/scheduled.scala @@ -19,26 +19,26 @@ enum SleepMode: case Delay end SleepMode +enum ScheduleContinue(val continue: Boolean): + case Yes extends ScheduleContinue(true) + case No extends ScheduleContinue(false) + +end ScheduleContinue + +object ScheduleContinue: + def fromBool(predicate: Boolean): ScheduleContinue = + if predicate then Yes + else No +end ScheduleContinue + /** A config that defines how to schedule an operation. * * @param schedule * The schedule which determines the maximum number of invocations and the duration between subsequent invocations. See [[Schedule]] for * more details. - * @param onOperationResult - * A function that is invoked after each invocation. The callback receives the number of the current invocations number (starting from 1) - * and the result of the operation. The result is either a successful value or an error. - * @param shouldContinueOnError - * A function that determines whether to continue the loop after an error. The function receives the error that was emitted by the last - * invocation. Defaults to [[_ => false]]. - * @param shouldContinueOnResult - * A function that determines whether to continue the loop after a success. The function receives the value that was emitted by the last - * invocation. Defaults to [[_ => true]]. - * - * @param shouldAttempt - * A function that determines whether to attempt a retry. This function is called after shouldContinueOnError or shouldContinueOnResult - * returns true and the result is considered for retry, it may perform side effects to determine if attempt should be made. - * @param afterSuccess - * A function that is invoked after every successful attempt. Performs side effects. + * @param afterAttempt + * A function that determines if schedule should continue. It is invoked after every attempt with current invocations number (starting + * from 1) and the result of an operation. It can contain side effects. * @param sleepMode * The mode that specifies how to interpret the duration provided by the schedule. See [[SleepMode]] for more details. * @tparam E @@ -49,11 +49,8 @@ end SleepMode */ case class ScheduledConfig[E, T]( schedule: Schedule, - onOperationResult: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => (), - shouldContinueOnError: E => Boolean = (_: E) => false, - shouldContinueOnResult: T => Boolean = (_: T) => true, - shouldAttempt: Either[E, T] => Boolean = (_: Either[E, T]) => true, - afterSuccess: T => Unit = (_: T) => (), + afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (_, attempt: Either[E, T]) => + attempt.map(_ => ScheduleContinue.Yes).getOrElse(ScheduleContinue.No), sleepMode: SleepMode = SleepMode.Interval ) @@ -115,22 +112,20 @@ def scheduledWithErrorMode[E, F[_], T](em: ErrorMode[E, F])(config: ScheduledCon operation match case v if em.isError(v) => val error = em.getError(v) - config.onOperationResult(invocation, Left(error)) + val shouldContinue = config.afterAttempt(invocation, Left(error)) - if config.shouldContinueOnError(error) && remainingInvocations.forall(_ > 0) && config.shouldAttempt(Left(error)) then + if remainingInvocations.forall(_ > 0) && shouldContinue.continue then val delay = sleepIfNeeded(startTimestamp) loop(invocation + 1, remainingInvocations.map(_ - 1), Some(delay)) else v case v => val result = em.getT(v) - config.onOperationResult(invocation, Right(result)) + val shouldContinue = config.afterAttempt(invocation, Right(result)) - if config.shouldContinueOnResult(result) && remainingInvocations.forall(_ > 0) && config.shouldAttempt(Right(result)) then + if remainingInvocations.forall(_ > 0) && shouldContinue.continue then val delay = sleepIfNeeded(startTimestamp) loop(invocation + 1, remainingInvocations.map(_ - 1), Some(delay)) - else - config.afterSuccess(result) - v + else v end match end loop From c901e3ce256668e7694c00af4eed35341209b57a Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Mon, 30 Dec 2024 16:13:30 +0100 Subject: [PATCH 09/15] update docs --- doc/utils/retries.md | 2 ++ doc/utils/scheduled.md | 7 +------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/doc/utils/retries.md b/doc/utils/retries.md index d5fcbae1..789f4a96 100644 --- a/doc/utils/retries.md +++ b/doc/utils/retries.md @@ -140,6 +140,8 @@ Instance of `AdaptiveRetry` consists of three parts: `RetryConfig` and `ResultPolicy` are defined the same as with "normal" retry mechanism, all the information from above apply also here. +Instance with default configuration can be obtained with `AdaptiveRetry.default` with bucket size = 500, cost for failure = 5 and reward for success = 1. + ## API To retry operation on `AdaptiveRetry` instance you can use one of three operations: - `def retryWithErrorMode[E, T, F[_]](config: RetryConfig[E, T], shouldPayPenaltyCost: T => Boolean = (_: T) => true, errorMode: ErrorMode[E, F])(operation: => F[T]): F[T]` - where `E` represents error type, `T` result type, and `F[_]` context in which they are returned. This method is similar to `retryWithErrorMode` diff --git a/doc/utils/scheduled.md b/doc/utils/scheduled.md index c7f26be6..06df5abd 100644 --- a/doc/utils/scheduled.md +++ b/doc/utils/scheduled.md @@ -20,13 +20,8 @@ The `scheduled` config consists of: - `Interval` - default for `repeat` operations, where the sleep is calculated as the duration provided by schedule minus the duration of the last operation (can be negative, in which case the next operation occurs immediately). - `Delay` - default for `retry` operations, where the sleep is just the duration provided by schedule. -- `onOperationResult` - a callback function that is invoked after each operation. Used primarily for `onRetry` in `retry` API. +- `afterAttempt` - a callback function that is invoked after each operation and determines if scheduler loop should continue. Used for `onRetry`, `shouldContinueOnError`, `shouldContinueOnSuccess` and adaptive retries in `retry` API. Defaults to continuing on error and stopping on result `T` -In addition, it is possible to define strategies for handling the results and errors returned by the `operation`: -- `shouldContinueOnError` - defaults to `_: E => false`, which allows to decide if the scheduler loop should continue - after an error returned by the previous operation. -- `shouldContinueOnSuccess` - defaults to `_: T => true`, which allows to decide if the scheduler loop should continue - after a successful result returned by the previous operation. ## Schedule From 6b9b99e80529bc1fbddb9274f7ebdc7b2deaacd2 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Mon, 30 Dec 2024 19:11:17 +0100 Subject: [PATCH 10/15] RepeatConfig - stop on error --- core/src/main/scala/ox/scheduling/RepeatConfig.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/ox/scheduling/RepeatConfig.scala b/core/src/main/scala/ox/scheduling/RepeatConfig.scala index 2863198f..d1bbbd77 100644 --- a/core/src/main/scala/ox/scheduling/RepeatConfig.scala +++ b/core/src/main/scala/ox/scheduling/RepeatConfig.scala @@ -31,7 +31,7 @@ case class RepeatConfig[E, T]( def toScheduledConfig: ScheduledConfig[E, T] = val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (_, attempt) => attempt match - case Left(_) => ScheduleContinue.Yes + case Left(_) => ScheduleContinue.No case Right(value) => ScheduleContinue.fromBool(shouldContinueOnResult(value)) end afterAttempt From 1f4872642409658ee9f4b09f9e2445b2d7c66f18 Mon Sep 17 00:00:00 2001 From: adamw Date: Tue, 31 Dec 2024 10:44:19 +0100 Subject: [PATCH 11/15] Docs --- .../scala/ox/resilience/AdaptiveRetry.scala | 37 +++++--- .../scala/ox/resilience/TokenBucket.scala | 1 + core/src/main/scala/ox/resilience/retry.scala | 2 +- .../ox/resilience/DelayedRetryTest.scala | 1 - doc/utils/retries.md | 91 +++++++++++-------- doc/utils/scheduled.md | 2 +- 6 files changed, 82 insertions(+), 52 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index b70a9dc8..aba053c9 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -5,19 +5,31 @@ import ox.* import scala.util.Try -/** Provides mechanism of "adaptive" retries. Inspired by `AdaptiveRetryStrategy` from `aws-sdk-java-v2` and the talk "AWS re:Invent 2024 - - * Try again: The tools and techniques behind resilient systems". For every retry we take [[failureCost]] from token bucket and for every - * success we add back to the bucket [[successReward]] tokens. Instance of this class is thread-safe and can be "shared" across multiple - * operations against constrained resource. This allows to retry in case of transient failures and at the same time doesn't produce more - * load on systemic failure of a resource. +/** Implements "adaptive" retries: every retry costs [[failureCost]] tokens from the bucket, and every success causes [[successReward]] + * tokens to be added to the bucket. If there are not enought tokens, retry is not attempted. + * + * This way retries don't overload a system that is down due to a systemic failure (such as a bug in the code, excessive load etc.): + * retries will be attempted only as long as there are enought tokens in the bucket, then the load on the downstream system will be reduced + * so that it can recover. For transient failures (component failure, infrastructure issues etc.), retries still work as expected, as the + * bucket has enough tokens to cover the cost of multiple retries. + * + * Instances of this class are thread-safe and are designed to be shared. Typically, a single instance should be used to proxy access to a + * single constrained resource. + * + * An instance with default parameters can be created using [[AdaptiveRetry.default]]. + * + * Inspired by: + * - [`AdaptiveRetryStrategy`](https://github.com/aws/aws-sdk-java-v2/blob/master/core/retries/src/main/java/software/amazon/awssdk/retries/AdaptiveRetryStrategy.java) + * from `aws-sdk-java-v2` + * - ["Try again: The tools and techniques behind resilient systems" from re:Invent 2024](https://www.youtube.com/watch?v=rvHd4Y76-fs) * * @param tokenBucket - * instance of [[TokenBucket]]. Provided instance is thread safe and can be "shared" between different instances of [[AdaptiveRetry]] - * with different [[failureCost]] for example. + * Instance of [[TokenBucket]]. As a token bucket is thread-safe, it can be shared between different instances of [[AdaptiveRetry]], e.g. + * with a different [[failureCost]]. * @param failureCost * Number of tokens to take from [[tokenBucket]] when retrying. * @param successReward - * Number of tokens to add back to [[tokenBucket]] after successful operation. + * Number of tokens to add back to [[tokenBucket]] after a successful operation. */ case class AdaptiveRetry( tokenBucket: TokenBucket, @@ -51,10 +63,9 @@ case class AdaptiveRetry( * @see * [[scheduledWithErrorMode]] */ - def retryWithErrorMode[E, T, F[_]]( + def retryWithErrorMode[E, T, F[_]](errorMode: ErrorMode[E, F])( config: RetryConfig[E, T], - shouldPayPenaltyCost: T => Boolean = (_: T) => true, - errorMode: ErrorMode[E, F] + shouldPayPenaltyCost: T => Boolean = (_: T) => true )(operation: => F[T]): F[T] = val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (attemptNum, attempt) => @@ -110,7 +121,7 @@ case class AdaptiveRetry( def retryEither[E, T](config: RetryConfig[E, T], shouldPayPenaltyCost: T => Boolean = (_: T) => false)( operation: => Either[E, T] ): Either[E, T] = - retryWithErrorMode(config, shouldPayPenaltyCost, EitherMode[E])(operation) + retryWithErrorMode(EitherMode[E])(config, shouldPayPenaltyCost)(operation) /** Retries an operation returning a direct result until it succeeds or the config decides to stop. * @@ -131,7 +142,7 @@ case class AdaptiveRetry( * [[scheduled]] */ def retry[T](config: RetryConfig[Throwable, T], shouldPayPenaltyCost: T => Boolean = (_: T) => false)(operation: => T): T = - retryWithErrorMode(config, shouldPayPenaltyCost, EitherMode[Throwable])(Try(operation).toEither).fold(throw _, identity) + retryWithErrorMode(EitherMode[Throwable])(config, shouldPayPenaltyCost)(Try(operation).toEither).fold(throw _, identity) end AdaptiveRetry diff --git a/core/src/main/scala/ox/resilience/TokenBucket.scala b/core/src/main/scala/ox/resilience/TokenBucket.scala index 324f5a0e..f29bc40a 100644 --- a/core/src/main/scala/ox/resilience/TokenBucket.scala +++ b/core/src/main/scala/ox/resilience/TokenBucket.scala @@ -2,6 +2,7 @@ package ox.resilience import java.util.concurrent.Semaphore +/** Used by the leaky bucket rate limiter & [[AdaptiveRetry]], to limit the rate of operations. */ case class TokenBucket(bucketSize: Int, initSize: Option[Int] = None): private val semaphore = Semaphore(initSize.getOrElse(bucketSize)) diff --git a/core/src/main/scala/ox/resilience/retry.scala b/core/src/main/scala/ox/resilience/retry.scala index 6573221e..3cb582d6 100644 --- a/core/src/main/scala/ox/resilience/retry.scala +++ b/core/src/main/scala/ox/resilience/retry.scala @@ -49,7 +49,7 @@ def retryEither[E, T](config: RetryConfig[E, T])(operation: => Either[E, T]): Ei * @param em * The error mode to use, which specifies when a result value is considered success, and when a failure. * @param config - * The retry config - See [[RetryConfig]]. + * The retry config - see [[RetryConfig]]. * @param operation * The operation to retry. * @return diff --git a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala index 278e294b..e7681f67 100644 --- a/core/src/test/scala/ox/resilience/DelayedRetryTest.scala +++ b/core/src/test/scala/ox/resilience/DelayedRetryTest.scala @@ -7,7 +7,6 @@ import ox.util.ElapsedTime import ox.resilience.* import scala.concurrent.duration.* -import scala.util.Try class DelayedRetryTest extends AnyFlatSpec with Matchers with EitherValues with TryValues with ElapsedTime: diff --git a/doc/utils/retries.md b/doc/utils/retries.md index 789f4a96..316a8ef7 100644 --- a/doc/utils/retries.md +++ b/doc/utils/retries.md @@ -121,67 +121,86 @@ retry(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != retryEither(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) // custom error mode -retryWithErrorMode(UnionMode[String])(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(unionOperation) +retryWithErrorMode(UnionMode[String])( + RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(unionOperation) ``` See the tests in `ox.resilience.*` for more. -# Adaptive retries -This retry mechanism is inspired by the talk `AWS re:Invent 2024 - Try again: The tools and techniques behind resilient systems` and [AdaptiveRetryStrategy](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/retries/AdaptiveRetryStrategy.html) from `aws-sdk-java-v2`. -Class `AdaptiveRetry` contains thread-safe `TokenBucket` that acts as a circuit breaker for instance of this class. -For every retry, tokens are taken from bucket, if there is not enough we stop retrying. For every successful operations tokens are added back to bucket. -This allows for "normal" retry mechanism in case of transient failures, but does not allow to generate for example 4 times the load in case of systemic failure (if we retry every operation 3 times). +## Adaptive retries -## Configuration -Instance of `AdaptiveRetry` consists of three parts: -- `tokenBucket: Tokenbucket` - instance of `TokenBucket`, can be shared across multiple instances of `AdaptiveRetry`. -- `failureCost: Int` - cost of tokens that are needed for retry in case of failure. -- `successReward: Int` - number of tokens that are added back to token bucket after successful attempt. +A retry strategy, backed by a token bucket. Every retry costs a certain amount of tokens from the bucket, and every success causes some tokens to be added back to the bucket. If there are not enought tokens, retry is not attempted. + +This way retries don't overload a system that is down due to a systemic failure (such as a bug in the code, excessive load etc.): retries will be attempted only as long as there are enought tokens in the bucket, then the load on the downstream system will be reduced so that it can recover. In contrast, using a "normal" retry strategy, where every operation is retries up to 3 times, a failure causes the load on the system to increas 4 times. -`RetryConfig` and `ResultPolicy` are defined the same as with "normal" retry mechanism, all the information from above apply also here. +For transient failures (component failure, infrastructure issues etc.), retries still work "normally", as the bucket has enough tokens to cover the cost of multiple retries. -Instance with default configuration can be obtained with `AdaptiveRetry.default` with bucket size = 500, cost for failure = 5 and reward for success = 1. +### Inspiration -## API -To retry operation on `AdaptiveRetry` instance you can use one of three operations: -- `def retryWithErrorMode[E, T, F[_]](config: RetryConfig[E, T], shouldPayPenaltyCost: T => Boolean = (_: T) => true, errorMode: ErrorMode[E, F])(operation: => F[T]): F[T]` - where `E` represents error type, `T` result type, and `F[_]` context in which they are returned. This method is similar to `retryWithErrorMode` -- `def retryEither[E, T](config: RetryConfig[E, T], shouldPayPenaltyCost: T => Boolean = (_: T) => true)(operation: => Either[E, T]): Either[E, T]` - This method is equivalent of `retryEither`. -- `def retry[T](config: RetryConfig[Throwable, T], shouldPayPenaltyCost: T => Boolean = (_: T) => true)(operation: => T): T` - This method is equivalent of `retry` +* [`AdaptiveRetryStrategy`](https://github.com/aws/aws-sdk-java-v2/blob/master/core/retries/src/main/java/software/amazon/awssdk/retries/AdaptiveRetryStrategy.java) from `aws-sdk-java-v2` +* *["Try again: The tools and techniques behind resilient systems" from re:Invent 2024](https://www.youtube.com/watch?v=rvHd4Y76-fs) -`retry` and `retryEither` are implemented in terms of `retryWithErrorMode` method. +### Configuration -`shouldPayPenaltyCost` determines if result `T` should be considered failure in terms of paying cost for retry. -Penalty is paid only if it is decided to retry operation, the penalty will not be paid for successful operation. +To use adaptive retries, create an instance of `AdaptiveRetry`. These instances are thread-safe and are designed to be shared. Typically, a single instance should be used to proxy access to a single constrained resource. -## Examples +`AdaptiveRetry` is parametrized with: + +* `tokenBucket: Tokenbucket`: instances of `TokenBucket` can be shared across multiple instances of `AdaptiveRetry` +* `failureCost: Int`: number of tokens that are needed for retry in case of failure +* `successReward: Int`: number of tokens that are added back to token bucket after success + +`RetryConfig` and `ResultPolicy` are defined the same as with "normal" retry mechanism, all the configuration from above also applies here. + +Instance with default configuration can be obtained with `AdaptiveRetry.default` (bucket size = 500, cost for failure = 5 and reward for success = 1). + +### API + +`AdaptiveRetry` exposes three variants of retrying, which correspond to the three variants discussed above: `retry`, `retryEither` and `retryWithErrorMode`. + +`retry` will attempt to retry an operation if it throws an exception; `retryEither` will additionally retry, if the result is a `Left`. Finally `retryWithErrorMode` is the most flexible, and allows retrying operations using custom failure modes (such as union types). + +The methods have an additional parameter, `shouldPayPenaltyCost`, which determines if result `T` should be considered failure in terms of paying cost for retry. Penalty is paid only if it is decided to retry operation, the penalty will not be paid for successful operation. + +### Examples If you want to use this mechanism you need to run operation through instance of `AdaptiveRetry`: -```scala -import ox.resilience.{AdaptiveRetry, TokenBucket} -val tokenBucket = TokenBucket(bucketSize = 500) -val adaptive = AdaptiveRetry(tokenBucket, failureCost = 5, successReward = 4) +```scala mdoc:compile-only +import ox.UnionMode +import ox.resilience.AdaptiveRetry +import ox.resilience.{ResultPolicy, RetryConfig} +import ox.scheduling.{Jitter, Schedule} +import scala.concurrent.duration.* + +def directOperation: Int = ??? +def eitherOperation: Either[String, Int] = ??? +def unionOperation: String | Int = ??? + +val adaptive = AdaptiveRetry.default // various configs with custom schedules and default ResultPolicy -adaptive.retry(RetryConfig.immediate(sleep))(directOperation) +adaptive.retry(RetryConfig.immediate(3))(directOperation) adaptive.retry(RetryConfig.delay(3, 100.millis))(directOperation) adaptive.retry(RetryConfig.backoff(3, 100.millis))(directOperation) // defaults: maxDelay = 1.minute, jitter = Jitter.None adaptive.retry(RetryConfig.backoff(3, 100.millis, 5.minutes, Jitter.Equal))(directOperation) // result policies // custom success -adaptive.retry(RetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)))(directOperation) +adaptive.retry[Int]( + RetryConfig(Schedule.Immediate(3), ResultPolicy.successfulWhen(_ > 0)))(directOperation) // fail fast on certain errors -adaptive.retry(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != "fatal error")))(directOperation) -adaptive.retryEither(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) +adaptive.retry( + RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_.getMessage != "fatal error")))(directOperation) +adaptive.retryEither( + RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(eitherOperation) // custom error mode -adaptive.retryWithErrorMode(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), errorMode = UnionMode[String])(unionOperation) +adaptive.retryWithErrorMode(UnionMode[String])( + RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")))(unionOperation) // consider "throttling error" not as a failure that should incur the retry penalty -adaptive.retryWithErrorMode(RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), isFailure = _ != "throttling error", errorMode = UnionMode[String])(unionOperation) +adaptive.retryWithErrorMode(UnionMode[String])( + RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), + shouldPayPenaltyCost = _ != "throttling error")(unionOperation) ``` - -Instance of `AdaptiveRetry` can be shared for different operation, for example different operations on the same constrained resource. - -See the tests in `ox.resilience.*` for more. \ No newline at end of file diff --git a/doc/utils/scheduled.md b/doc/utils/scheduled.md index 06df5abd..c209535b 100644 --- a/doc/utils/scheduled.md +++ b/doc/utils/scheduled.md @@ -20,7 +20,7 @@ The `scheduled` config consists of: - `Interval` - default for `repeat` operations, where the sleep is calculated as the duration provided by schedule minus the duration of the last operation (can be negative, in which case the next operation occurs immediately). - `Delay` - default for `retry` operations, where the sleep is just the duration provided by schedule. -- `afterAttempt` - a callback function that is invoked after each operation and determines if scheduler loop should continue. Used for `onRetry`, `shouldContinueOnError`, `shouldContinueOnSuccess` and adaptive retries in `retry` API. Defaults to continuing on error and stopping on result `T` +- `afterAttempt` - a callback function that is invoked after each operation and determines if the scheduler loop should continue. Used for `onRetry`, `shouldContinueOnError`, `shouldContinueOnSuccess` and adaptive retries in `retry` API. Defaults to continuing on error and stopping on result `T`. ## Schedule From 05136b14417a985dec9e19bc2e2df68a58b201f9 Mon Sep 17 00:00:00 2001 From: adamw Date: Tue, 31 Dec 2024 10:50:12 +0100 Subject: [PATCH 12/15] Simplify ScheduleContinue --- .../main/scala/ox/resilience/AdaptiveRetry.scala | 4 ++-- .../main/scala/ox/resilience/RetryConfig.scala | 4 ++-- .../main/scala/ox/scheduling/RepeatConfig.scala | 2 +- core/src/main/scala/ox/scheduling/scheduled.scala | 15 +++++---------- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index aba053c9..efc85c29 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -73,7 +73,7 @@ case class AdaptiveRetry( attempt match case Left(value) => // If we want to retry we try to acquire tokens from bucket - if config.resultPolicy.isWorthRetrying(value) then ScheduleContinue.fromBool(tokenBucket.tryAcquire(failureCost)) + if config.resultPolicy.isWorthRetrying(value) then ScheduleContinue(tokenBucket.tryAcquire(failureCost)) else ScheduleContinue.No case Right(value) => // If we are successful, we release tokens to bucket and end schedule @@ -81,7 +81,7 @@ case class AdaptiveRetry( tokenBucket.release(successReward) ScheduleContinue.No // If it is not success we check if we need to acquire tokens, then we check bucket, otherwise we continue - else if shouldPayPenaltyCost(value) then ScheduleContinue.fromBool(tokenBucket.tryAcquire(failureCost)) + else if shouldPayPenaltyCost(value) then ScheduleContinue(tokenBucket.tryAcquire(failureCost)) else ScheduleContinue.Yes end match end afterAttempt diff --git a/core/src/main/scala/ox/resilience/RetryConfig.scala b/core/src/main/scala/ox/resilience/RetryConfig.scala index 4abb8a45..67cc2e61 100644 --- a/core/src/main/scala/ox/resilience/RetryConfig.scala +++ b/core/src/main/scala/ox/resilience/RetryConfig.scala @@ -33,8 +33,8 @@ case class RetryConfig[E, T]( val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (attemptNum, attempt) => onRetry(attemptNum, attempt) attempt match - case Left(value) => ScheduleContinue.fromBool(resultPolicy.isWorthRetrying(value)) - case Right(value) => ScheduleContinue.fromBool(!resultPolicy.isSuccess(value)) + case Left(value) => ScheduleContinue(resultPolicy.isWorthRetrying(value)) + case Right(value) => ScheduleContinue(!resultPolicy.isSuccess(value)) end afterAttempt ScheduledConfig(schedule, afterAttempt, SleepMode.Delay) diff --git a/core/src/main/scala/ox/scheduling/RepeatConfig.scala b/core/src/main/scala/ox/scheduling/RepeatConfig.scala index d1bbbd77..4ea26503 100644 --- a/core/src/main/scala/ox/scheduling/RepeatConfig.scala +++ b/core/src/main/scala/ox/scheduling/RepeatConfig.scala @@ -32,7 +32,7 @@ case class RepeatConfig[E, T]( val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (_, attempt) => attempt match case Left(_) => ScheduleContinue.No - case Right(value) => ScheduleContinue.fromBool(shouldContinueOnResult(value)) + case Right(value) => ScheduleContinue(shouldContinueOnResult(value)) end afterAttempt ScheduledConfig(schedule, afterAttempt, SleepMode.Interval) diff --git a/core/src/main/scala/ox/scheduling/scheduled.scala b/core/src/main/scala/ox/scheduling/scheduled.scala index 841ec9ae..09b92af7 100644 --- a/core/src/main/scala/ox/scheduling/scheduled.scala +++ b/core/src/main/scala/ox/scheduling/scheduled.scala @@ -14,22 +14,17 @@ enum SleepMode: */ case Interval - /** Delay (since the end of the last operation), i.e. sleeps the duration provided by the schedule before the next operation starts. - */ + /** Delay (since the end of the last operation), i.e. sleeps the duration provided by the schedule before the next operation starts. */ case Delay end SleepMode +/** @see [[ScheduleConfig.afterAttempt]] */ enum ScheduleContinue(val continue: Boolean): case Yes extends ScheduleContinue(true) case No extends ScheduleContinue(false) -end ScheduleContinue - object ScheduleContinue: - def fromBool(predicate: Boolean): ScheduleContinue = - if predicate then Yes - else No -end ScheduleContinue + def apply(predicate: Boolean): ScheduleContinue = if predicate then Yes else No /** A config that defines how to schedule an operation. * @@ -37,8 +32,8 @@ end ScheduleContinue * The schedule which determines the maximum number of invocations and the duration between subsequent invocations. See [[Schedule]] for * more details. * @param afterAttempt - * A function that determines if schedule should continue. It is invoked after every attempt with current invocations number (starting - * from 1) and the result of an operation. It can contain side effects. + * A function that determines if schedule should continue. It is invoked after every attempt with current invocation number (starting + * from 1) and the result of the operation. * @param sleepMode * The mode that specifies how to interpret the duration provided by the schedule. See [[SleepMode]] for more details. * @tparam E From 70e44ec4c928a07253c52e56e33dc395f95a7848 Mon Sep 17 00:00:00 2001 From: adamw Date: Tue, 31 Dec 2024 11:16:36 +0100 Subject: [PATCH 13/15] Change ScheduleContinue to ScheduleStop --- .../scala/ox/resilience/AdaptiveRetry.scala | 21 +++++---------- .../scala/ox/resilience/RetryConfig.scala | 9 +++---- .../scala/ox/scheduling/RepeatConfig.scala | 7 +++-- .../main/scala/ox/scheduling/scheduled.scala | 26 +++++++++---------- 4 files changed, 27 insertions(+), 36 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index efc85c29..3484f76b 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -1,6 +1,6 @@ package ox.resilience -import ox.scheduling.{ScheduleContinue, ScheduledConfig, SleepMode, scheduledWithErrorMode} +import ox.scheduling.{ScheduleStop, ScheduledConfig, SleepMode, scheduledWithErrorMode} import ox.* import scala.util.Try @@ -39,8 +39,6 @@ case class AdaptiveRetry( /** Retries an operation using the given error mode until it succeeds or the config decides to stop. Note that any exceptions thrown by * the operation aren't caught (unless the operation catches them as part of its implementation) and don't cause a retry to happen. * - * This is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See [[RetryConfig]] for more details. - * * @param config * The retry config - See [[RetryConfig]]. * @param shouldPayPenaltyCost @@ -68,21 +66,21 @@ case class AdaptiveRetry( shouldPayPenaltyCost: T => Boolean = (_: T) => true )(operation: => F[T]): F[T] = - val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (attemptNum, attempt) => + val afterAttempt: (Int, Either[E, T]) => ScheduleStop = (attemptNum, attempt) => config.onRetry(attemptNum, attempt) attempt match case Left(value) => // If we want to retry we try to acquire tokens from bucket - if config.resultPolicy.isWorthRetrying(value) then ScheduleContinue(tokenBucket.tryAcquire(failureCost)) - else ScheduleContinue.No + if config.resultPolicy.isWorthRetrying(value) then ScheduleStop(!tokenBucket.tryAcquire(failureCost)) + else ScheduleStop.Yes case Right(value) => // If we are successful, we release tokens to bucket and end schedule if config.resultPolicy.isSuccess(value) then tokenBucket.release(successReward) - ScheduleContinue.No + ScheduleStop.Yes // If it is not success we check if we need to acquire tokens, then we check bucket, otherwise we continue - else if shouldPayPenaltyCost(value) then ScheduleContinue(tokenBucket.tryAcquire(failureCost)) - else ScheduleContinue.Yes + else if shouldPayPenaltyCost(value) then ScheduleStop(!tokenBucket.tryAcquire(failureCost)) + else ScheduleStop.No end match end afterAttempt @@ -98,9 +96,6 @@ case class AdaptiveRetry( /** Retries an operation returning an [[scala.util.Either]] until it succeeds or the config decides to stop. Note that any exceptions * thrown by the operation aren't caught and don't cause a retry to happen. * - * [[retryEither]] is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See implementations of [[RetryConfig]] - * for more details. - * * @param config * The retry config - see [[RetryConfig]]. * @param shouldPayPenaltyCost @@ -124,8 +119,6 @@ case class AdaptiveRetry( retryWithErrorMode(EitherMode[E])(config, shouldPayPenaltyCost)(operation) /** Retries an operation returning a direct result until it succeeds or the config decides to stop. - * - * [[retry]] is a special case of [[scheduledWithErrorMode]] with a given set of defaults. See [[RetryConfig]]. * * @param config * The retry config - see [[RetryConfig]]. diff --git a/core/src/main/scala/ox/resilience/RetryConfig.scala b/core/src/main/scala/ox/resilience/RetryConfig.scala index 67cc2e61..c0c0af2c 100644 --- a/core/src/main/scala/ox/resilience/RetryConfig.scala +++ b/core/src/main/scala/ox/resilience/RetryConfig.scala @@ -1,6 +1,6 @@ package ox.resilience -import ox.scheduling.{Jitter, Schedule, ScheduleContinue, ScheduledConfig, SleepMode} +import ox.scheduling.{Jitter, Schedule, ScheduleStop, ScheduledConfig, SleepMode} import scala.concurrent.duration.* @@ -30,12 +30,11 @@ case class RetryConfig[E, T]( onRetry: (Int, Either[E, T]) => Unit = (_: Int, _: Either[E, T]) => () ): def toScheduledConfig: ScheduledConfig[E, T] = - val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (attemptNum, attempt) => + val afterAttempt: (Int, Either[E, T]) => ScheduleStop = (attemptNum, attempt) => onRetry(attemptNum, attempt) attempt match - case Left(value) => ScheduleContinue(resultPolicy.isWorthRetrying(value)) - case Right(value) => ScheduleContinue(!resultPolicy.isSuccess(value)) - end afterAttempt + case Left(value) => ScheduleStop(!resultPolicy.isWorthRetrying(value)) + case Right(value) => ScheduleStop(resultPolicy.isSuccess(value)) ScheduledConfig(schedule, afterAttempt, SleepMode.Delay) end toScheduledConfig diff --git a/core/src/main/scala/ox/scheduling/RepeatConfig.scala b/core/src/main/scala/ox/scheduling/RepeatConfig.scala index 4ea26503..d19d0130 100644 --- a/core/src/main/scala/ox/scheduling/RepeatConfig.scala +++ b/core/src/main/scala/ox/scheduling/RepeatConfig.scala @@ -29,11 +29,10 @@ case class RepeatConfig[E, T]( shouldContinueOnResult: T => Boolean = (_: T) => true ): def toScheduledConfig: ScheduledConfig[E, T] = - val afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (_, attempt) => + val afterAttempt: (Int, Either[E, T]) => ScheduleStop = (_, attempt) => attempt match - case Left(_) => ScheduleContinue.No - case Right(value) => ScheduleContinue(shouldContinueOnResult(value)) - end afterAttempt + case Left(_) => ScheduleStop.Yes + case Right(value) => ScheduleStop(!shouldContinueOnResult(value)) ScheduledConfig(schedule, afterAttempt, SleepMode.Interval) end toScheduledConfig diff --git a/core/src/main/scala/ox/scheduling/scheduled.scala b/core/src/main/scala/ox/scheduling/scheduled.scala index 09b92af7..8736bb58 100644 --- a/core/src/main/scala/ox/scheduling/scheduled.scala +++ b/core/src/main/scala/ox/scheduling/scheduled.scala @@ -19,12 +19,12 @@ enum SleepMode: end SleepMode /** @see [[ScheduleConfig.afterAttempt]] */ -enum ScheduleContinue(val continue: Boolean): - case Yes extends ScheduleContinue(true) - case No extends ScheduleContinue(false) +enum ScheduleStop(val stop: Boolean): + case Yes extends ScheduleStop(true) + case No extends ScheduleStop(false) -object ScheduleContinue: - def apply(predicate: Boolean): ScheduleContinue = if predicate then Yes else No +object ScheduleStop: + def apply(stop: Boolean): ScheduleStop = if stop then Yes else No /** A config that defines how to schedule an operation. * @@ -32,8 +32,9 @@ object ScheduleContinue: * The schedule which determines the maximum number of invocations and the duration between subsequent invocations. See [[Schedule]] for * more details. * @param afterAttempt - * A function that determines if schedule should continue. It is invoked after every attempt with current invocation number (starting - * from 1) and the result of the operation. + * A callback invoked after every attempt, with the current invocation number (starting from 1) and the result of the operation. Might + * decide to short-curcuit further attempts, and stop the schedule. Schedule configuration (e.g. max number of attempts) takes + * precedence. * @param sleepMode * The mode that specifies how to interpret the duration provided by the schedule. See [[SleepMode]] for more details. * @tparam E @@ -44,8 +45,7 @@ object ScheduleContinue: */ case class ScheduledConfig[E, T]( schedule: Schedule, - afterAttempt: (Int, Either[E, T]) => ScheduleContinue = (_, attempt: Either[E, T]) => - attempt.map(_ => ScheduleContinue.Yes).getOrElse(ScheduleContinue.No), + afterAttempt: (Int, Either[E, T]) => ScheduleStop = (_, _: Either[E, T]) => ScheduleStop.No, sleepMode: SleepMode = SleepMode.Interval ) @@ -107,17 +107,17 @@ def scheduledWithErrorMode[E, F[_], T](em: ErrorMode[E, F])(config: ScheduledCon operation match case v if em.isError(v) => val error = em.getError(v) - val shouldContinue = config.afterAttempt(invocation, Left(error)) + val shouldStop = config.afterAttempt(invocation, Left(error)) - if remainingInvocations.forall(_ > 0) && shouldContinue.continue then + if remainingInvocations.forall(_ > 0) && !shouldStop.stop then val delay = sleepIfNeeded(startTimestamp) loop(invocation + 1, remainingInvocations.map(_ - 1), Some(delay)) else v case v => val result = em.getT(v) - val shouldContinue = config.afterAttempt(invocation, Right(result)) + val shouldStop = config.afterAttempt(invocation, Right(result)) - if remainingInvocations.forall(_ > 0) && shouldContinue.continue then + if remainingInvocations.forall(_ > 0) && !shouldStop.stop then val delay = sleepIfNeeded(startTimestamp) loop(invocation + 1, remainingInvocations.map(_ - 1), Some(delay)) else v From 1d88f1e5b539e2e47a6eea1a7a235c11a8895b38 Mon Sep 17 00:00:00 2001 From: Kamil-Lontkowski Date: Tue, 31 Dec 2024 11:55:55 +0100 Subject: [PATCH 14/15] change shouldPayFailureCost --- .../scala/ox/resilience/AdaptiveRetry.scala | 41 +++++++++++-------- doc/utils/retries.md | 4 +- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala index 3484f76b..88075d1a 100644 --- a/core/src/main/scala/ox/resilience/AdaptiveRetry.scala +++ b/core/src/main/scala/ox/resilience/AdaptiveRetry.scala @@ -6,10 +6,10 @@ import ox.* import scala.util.Try /** Implements "adaptive" retries: every retry costs [[failureCost]] tokens from the bucket, and every success causes [[successReward]] - * tokens to be added to the bucket. If there are not enought tokens, retry is not attempted. + * tokens to be added to the bucket. If there are not enough tokens, retry is not attempted. * * This way retries don't overload a system that is down due to a systemic failure (such as a bug in the code, excessive load etc.): - * retries will be attempted only as long as there are enought tokens in the bucket, then the load on the downstream system will be reduced + * retries will be attempted only as long as there are enough tokens in the bucket, then the load on the downstream system will be reduced * so that it can recover. For transient failures (component failure, infrastructure issues etc.), retries still work as expected, as the * bucket has enough tokens to cover the cost of multiple retries. * @@ -41,9 +41,9 @@ case class AdaptiveRetry( * * @param config * The retry config - See [[RetryConfig]]. - * @param shouldPayPenaltyCost - * Function to decide if returned result [[T]] should be considered failure in terms of paying cost for retry. Penalty is paid only if - * it is decided to retry operation, the penalty will not be paid for successful operation. Defaults to `true`. + * @param shouldPayFailureCost + * Function to decide if returned result Either[E, T] should be considered failure in terms of paying cost for retry. Penalty is paid + * only if it is decided to retry operation, the penalty will not be paid for successful operation. Defaults to `true`. * @param errorMode * The error mode to use, which specifies when a result value is considered success, and when a failure. * @param operation @@ -63,7 +63,7 @@ case class AdaptiveRetry( */ def retryWithErrorMode[E, T, F[_]](errorMode: ErrorMode[E, F])( config: RetryConfig[E, T], - shouldPayPenaltyCost: T => Boolean = (_: T) => true + shouldPayFailureCost: Either[E, T] => Boolean = (_: Either[E, T]) => true )(operation: => F[T]): F[T] = val afterAttempt: (Int, Either[E, T]) => ScheduleStop = (attemptNum, attempt) => @@ -71,7 +71,9 @@ case class AdaptiveRetry( attempt match case Left(value) => // If we want to retry we try to acquire tokens from bucket - if config.resultPolicy.isWorthRetrying(value) then ScheduleStop(!tokenBucket.tryAcquire(failureCost)) + if config.resultPolicy.isWorthRetrying(value) then + if shouldPayFailureCost(Left(value)) then ScheduleStop(!tokenBucket.tryAcquire(failureCost)) + else ScheduleStop.Yes else ScheduleStop.Yes case Right(value) => // If we are successful, we release tokens to bucket and end schedule @@ -79,7 +81,7 @@ case class AdaptiveRetry( tokenBucket.release(successReward) ScheduleStop.Yes // If it is not success we check if we need to acquire tokens, then we check bucket, otherwise we continue - else if shouldPayPenaltyCost(value) then ScheduleStop(!tokenBucket.tryAcquire(failureCost)) + else if shouldPayFailureCost(Right(value)) then ScheduleStop(!tokenBucket.tryAcquire(failureCost)) else ScheduleStop.No end match end afterAttempt @@ -98,9 +100,9 @@ case class AdaptiveRetry( * * @param config * The retry config - see [[RetryConfig]]. - * @param shouldPayPenaltyCost - * Function to decide if returned result [[T]] should be considered failure in terms of paying cost for retry. Penalty is paid only if - * it is decided to retry operation, the penalty will not be paid for successful operation. + * @param shouldPayFailureCost + * Function to decide if returned result Either[E, T] should be considered failure in terms of paying cost for retry. Penalty is paid + * only if it is decided to retry operation, the penalty will not be paid for successful operation. Defaults to `true`. * @param operation * The operation to retry. * @tparam E @@ -113,18 +115,18 @@ case class AdaptiveRetry( * @see * [[scheduledEither]] */ - def retryEither[E, T](config: RetryConfig[E, T], shouldPayPenaltyCost: T => Boolean = (_: T) => false)( + def retryEither[E, T](config: RetryConfig[E, T], shouldPayFailureCost: Either[E, T] => Boolean = (_: Either[E, T]) => true)( operation: => Either[E, T] ): Either[E, T] = - retryWithErrorMode(EitherMode[E])(config, shouldPayPenaltyCost)(operation) + retryWithErrorMode(EitherMode[E])(config, shouldPayFailureCost)(operation) /** Retries an operation returning a direct result until it succeeds or the config decides to stop. * * @param config * The retry config - see [[RetryConfig]]. - * @param shouldPayPenaltyCost - * Function to decide if returned result [[T]] should be considered failure in terms of paying cost for retry. Penalty is paid only if - * it is decided to retry operation, the penalty will not be paid for successful operation. + * @param shouldPayFailureCost + * Function to decide if returned result Either[E, T] should be considered failure in terms of paying cost for retry. Penalty is paid + * only if it is decided to retry operation, the penalty will not be paid for successful operation. Defaults to `true`. * @param operation * The operation to retry. * @return @@ -134,8 +136,11 @@ case class AdaptiveRetry( * @see * [[scheduled]] */ - def retry[T](config: RetryConfig[Throwable, T], shouldPayPenaltyCost: T => Boolean = (_: T) => false)(operation: => T): T = - retryWithErrorMode(EitherMode[Throwable])(config, shouldPayPenaltyCost)(Try(operation).toEither).fold(throw _, identity) + def retry[T]( + config: RetryConfig[Throwable, T], + shouldPayFailureCost: Either[Throwable, T] => Boolean = (_: Either[Throwable, T]) => true + )(operation: => T): T = + retryWithErrorMode(EitherMode[Throwable])(config, shouldPayFailureCost)(Try(operation).toEither).fold(throw _, identity) end AdaptiveRetry diff --git a/doc/utils/retries.md b/doc/utils/retries.md index 316a8ef7..622d99b1 100644 --- a/doc/utils/retries.md +++ b/doc/utils/retries.md @@ -201,6 +201,6 @@ adaptive.retryWithErrorMode(UnionMode[String])( // consider "throttling error" not as a failure that should incur the retry penalty adaptive.retryWithErrorMode(UnionMode[String])( - RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), - shouldPayPenaltyCost = _ != "throttling error")(unionOperation) + RetryConfig(Schedule.Immediate(3), ResultPolicy.retryWhen(_ != "fatal error")), + shouldPayFailureCost = _.fold(_ != "throttling error", _ => true))(unionOperation) ``` From 45000a70d96dfc0abd30a7595742bc0f95c01513 Mon Sep 17 00:00:00 2001 From: adamw Date: Tue, 31 Dec 2024 12:12:51 +0100 Subject: [PATCH 15/15] Fix docs --- core/src/main/scala/ox/scheduling/RepeatConfig.scala | 4 ++-- doc/utils/repeat.md | 2 +- doc/utils/scheduled.md | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/ox/scheduling/RepeatConfig.scala b/core/src/main/scala/ox/scheduling/RepeatConfig.scala index d19d0130..b15cb458 100644 --- a/core/src/main/scala/ox/scheduling/RepeatConfig.scala +++ b/core/src/main/scala/ox/scheduling/RepeatConfig.scala @@ -9,8 +9,8 @@ import scala.concurrent.duration.DurationInt * the specified duration after the previous operations has finished. If the previous operation takes longer than the interval, the next * operation will start immediately after the previous one has finished. * - * It is a special case of [[ScheduledConfig]] with [[ScheduledConfig.sleepMode]] always set to [[SleepMode.Interval]] and - * [[ScheduledConfig.shouldContinueOnError]] always returning `false`. + * It is a special case of [[ScheduledConfig]] with [[ScheduledConfig.sleepMode]] always set to [[SleepMode.Interval]] and a + * [[ScheduledConfig.afterAttempt]] callback which checks if the result was successful. * * @param schedule * The repeat schedule which determines the maximum number of invocations and the interval between subsequent invocations. See diff --git a/doc/utils/repeat.md b/doc/utils/repeat.md index 528e930d..0e6cd4d3 100644 --- a/doc/utils/repeat.md +++ b/doc/utils/repeat.md @@ -27,7 +27,7 @@ Similarly to the `retry` API, the `operation` can be defined: The `repeat` config requires a `Schedule`, which indicates how many times and with what interval should the `operation` be repeated. -In addition, it is possible to define a custom `shouldContinueOnSuccess` strategy for deciding if the operation +In addition, it is possible to define a custom `shouldContinueOnResult` strategy for deciding if the operation should continue to be repeated after a successful result returned by the previous operation (defaults to `_: T => true`). If an operation returns an error, the repeat loop will always be stopped. If an error handling within the operation diff --git a/doc/utils/scheduled.md b/doc/utils/scheduled.md index c209535b..7086fa6e 100644 --- a/doc/utils/scheduled.md +++ b/doc/utils/scheduled.md @@ -20,8 +20,7 @@ The `scheduled` config consists of: - `Interval` - default for `repeat` operations, where the sleep is calculated as the duration provided by schedule minus the duration of the last operation (can be negative, in which case the next operation occurs immediately). - `Delay` - default for `retry` operations, where the sleep is just the duration provided by schedule. -- `afterAttempt` - a callback function that is invoked after each operation and determines if the scheduler loop should continue. Used for `onRetry`, `shouldContinueOnError`, `shouldContinueOnSuccess` and adaptive retries in `retry` API. Defaults to continuing on error and stopping on result `T`. - +- `afterAttempt` - a callback function that is invoked after each operation and determines if the scheduler loop should continue. Used for `onRetry`, `shouldContinueOnError`, `shouldContinueOnResult` and adaptive retries in `retry` API. Defaults to always continuing. ## Schedule