diff --git a/README.md b/README.md index 9dd39a7f..25a698d8 100644 --- a/README.md +++ b/README.md @@ -647,7 +647,15 @@ To enable this behavior set `MERGE_DOMAIN_CONFIG` to `true`. xDS Management Server is a gRPC server which implements the [Aggregated Discovery Service (ADS)](https://github.com/envoyproxy/data-plane-api/blob/97b6dae39046f7da1331a4dc57830d20e842fc26/envoy/service/discovery/v3/ads.proto). The xDS Management server serves [Discovery Response](https://github.com/envoyproxy/data-plane-api/blob/97b6dae39046f7da1331a4dc57830d20e842fc26/envoy/service/discovery/v3/discovery.proto#L69) with [Ratelimit Configuration Resources](api/ratelimit/config/ratelimit/v3/rls_conf.proto) and with Type URL `"type.googleapis.com/ratelimit.config.ratelimit.v3.RateLimitConfig"`. + The xDS client in the Rate limit service configure Rate limit service with the provided configuration. +In case of connection failures, the xDS Client retries the connection to the xDS server with exponential backoff and the backoff parameters are configurable. + +1. `XDS_CLIENT_BACKOFF_JITTER`: set to `"true"` to add jitter to the exponential backoff. +2. `XDS_CLIENT_BACKOFF_INITIAL_INTERVAL`: The base amount of time the xDS client waits before retyring the connection after failure. Default: "10s" +3. `XDS_CLIENT_BACKOFF_MAX_INTERVAL`: The max backoff interval is the upper limit on the amount of time the xDS client will wait between retries. After reaching the max backoff interval, the next retries will continue using the max interval. Default: "60s" +4. `XDS_CLIENT_BACKOFF_RANDOM_FACTOR`: This is a factor by which the initial interval is multiplied to calculate the next backoff interval. Default: "0.5" + For more information on xDS protocol please refer to the [envoy proxy documentation](https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol). You can refer to [the sample xDS configuration management server](examples/xds-sotw-config-server/README.md). diff --git a/go.mod b/go.mod index fe94a09f..45ef3a66 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/google/uuid v1.4.0 github.com/gorilla/mux v1.8.1 github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 + github.com/jpillora/backoff v1.0.0 github.com/kavu/go_reuseport v1.5.0 github.com/kelseyhightower/envconfig v1.4.0 github.com/lyft/goruntime v0.3.0 diff --git a/go.sum b/go.sum index 9b62a6f4..2b8a48b3 100644 --- a/go.sum +++ b/go.sum @@ -77,6 +77,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDa github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/kavu/go_reuseport v1.5.0 h1:UNuiY2OblcqAtVDE8Gsg1kZz8zbBWg907sP1ceBV+bk= github.com/kavu/go_reuseport v1.5.0/go.mod h1:CG8Ee7ceMFSMnx/xr25Vm0qXaj2Z4i5PWoUx+JZ5/CU= github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8= diff --git a/src/provider/xds_grpc_sotw_provider.go b/src/provider/xds_grpc_sotw_provider.go index fcacc212..9dc9faee 100644 --- a/src/provider/xds_grpc_sotw_provider.go +++ b/src/provider/xds_grpc_sotw_provider.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "google.golang.org/grpc/metadata" @@ -11,6 +12,7 @@ import ( "github.com/envoyproxy/go-control-plane/pkg/resource/v3" "github.com/golang/protobuf/ptypes/any" grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry" + "github.com/jpillora/backoff" logger "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -67,6 +69,12 @@ func (p *XdsGrpcSotwProvider) Stop() { func (p *XdsGrpcSotwProvider) initXdsClient() { logger.Info("Starting xDS client connection for rate limit configurations") conn := p.initializeAndWatch() + b := &backoff.Backoff{ + Min: p.settings.XdsClientBackoffInitialInterval, + Max: p.settings.XdsClientBackoffMaxInterval, + Factor: p.settings.XdsClientBackoffRandomFactor, + Jitter: p.settings.XdsClientBackoffJitter, + } for retryEvent := range p.connectionRetryChannel { if conn != nil { @@ -76,10 +84,18 @@ func (p *XdsGrpcSotwProvider) initXdsClient() { logger.Info("Stopping xDS client watch for rate limit configurations") break } + d := p.getJitteredExponentialBackOffDuration(b) + logger.Debugf("Sleeping for %s using exponential backoff\n", d) + time.Sleep(d) conn = p.initializeAndWatch() } } +func (p *XdsGrpcSotwProvider) getJitteredExponentialBackOffDuration(b *backoff.Backoff) time.Duration { + logger.Debugf("Retry attempt# %f", b.Attempt()) + return b.Duration() +} + func (p *XdsGrpcSotwProvider) initializeAndWatch() *grpc.ClientConn { conn, err := p.getGrpcConnection() if err != nil { @@ -99,11 +115,8 @@ func (p *XdsGrpcSotwProvider) watchConfigs() { resp, err := p.adsClient.Fetch() if err != nil { logger.Errorf("Failed to receive configuration from xDS Management Server: %s", err.Error()) - if sotw.IsConnError(err) { - p.retryGrpcConn() - return - } - p.adsClient.Nack(err.Error()) + p.retryGrpcConn() + return } else { logger.Tracef("Response received from xDS Management Server: %v", resp) p.sendConfigs(resp.Resources) diff --git a/src/settings/settings.go b/src/settings/settings.go index 09704781..f3c3721d 100644 --- a/src/settings/settings.go +++ b/src/settings/settings.go @@ -69,6 +69,12 @@ type Settings struct { // GrpcClientTlsSAN is the SAN to validate from the client cert during mTLS auth ConfigGrpcXdsServerTlsSAN string `envconfig:"CONFIG_GRPC_XDS_SERVER_TLS_SAN" default:""` + // xDS client backoff configuration + XdsClientBackoffInitialInterval time.Duration `envconfig:"XDS_CLIENT_BACKOFF_INITIAL_INTERVAL" default:"10s"` + XdsClientBackoffMaxInterval time.Duration `envconfig:"XDS_CLIENT_BACKOFF_MAX_INTERVAL" default:"60s"` + XdsClientBackoffRandomFactor float64 `envconfig:"XDS_CLIENT_BACKOFF_RANDOM_FACTOR" default:"0.5"` + XdsClientBackoffJitter bool `envconfig:"XDS_CLIENT_BACKOFF_JITTER" default:"true"` + // Stats-related settings UseStatsd bool `envconfig:"USE_STATSD" default:"true"` StatsdHost string `envconfig:"STATSD_HOST" default:"localhost"`