Skip to content

Commit

Permalink
Retry the grpc connection when there's an error (#503)
Browse files Browse the repository at this point in the history
Signed-off-by: alekhya.kondapuram <[email protected]>
  • Loading branch information
akondapuram authored Feb 23, 2024
1 parent f3b6730 commit 19f2079
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 5 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,15 @@ To enable this behavior set `MERGE_DOMAIN_CONFIG` to `true`.
xDS Management Server is a gRPC server which implements the [Aggregated Discovery Service (ADS)](https://github.com/envoyproxy/data-plane-api/blob/97b6dae39046f7da1331a4dc57830d20e842fc26/envoy/service/discovery/v3/ads.proto).
The xDS Management server serves [Discovery Response](https://github.com/envoyproxy/data-plane-api/blob/97b6dae39046f7da1331a4dc57830d20e842fc26/envoy/service/discovery/v3/discovery.proto#L69) with [Ratelimit Configuration Resources](api/ratelimit/config/ratelimit/v3/rls_conf.proto)
and with Type URL `"type.googleapis.com/ratelimit.config.ratelimit.v3.RateLimitConfig"`.
The xDS client in the Rate limit service configure Rate limit service with the provided configuration.
In case of connection failures, the xDS Client retries the connection to the xDS server with exponential backoff and the backoff parameters are configurable.
1. `XDS_CLIENT_BACKOFF_JITTER`: set to `"true"` to add jitter to the exponential backoff.
2. `XDS_CLIENT_BACKOFF_INITIAL_INTERVAL`: The base amount of time the xDS client waits before retyring the connection after failure. Default: "10s"
3. `XDS_CLIENT_BACKOFF_MAX_INTERVAL`: The max backoff interval is the upper limit on the amount of time the xDS client will wait between retries. After reaching the max backoff interval, the next retries will continue using the max interval. Default: "60s"
4. `XDS_CLIENT_BACKOFF_RANDOM_FACTOR`: This is a factor by which the initial interval is multiplied to calculate the next backoff interval. Default: "0.5"
For more information on xDS protocol please refer to the [envoy proxy documentation](https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol).
You can refer to [the sample xDS configuration management server](examples/xds-sotw-config-server/README.md).
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/google/uuid v1.4.0
github.com/gorilla/mux v1.8.1
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0
github.com/jpillora/backoff v1.0.0
github.com/kavu/go_reuseport v1.5.0
github.com/kelseyhightower/envconfig v1.4.0
github.com/lyft/goruntime v0.3.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDa
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/kavu/go_reuseport v1.5.0 h1:UNuiY2OblcqAtVDE8Gsg1kZz8zbBWg907sP1ceBV+bk=
github.com/kavu/go_reuseport v1.5.0/go.mod h1:CG8Ee7ceMFSMnx/xr25Vm0qXaj2Z4i5PWoUx+JZ5/CU=
github.com/kelseyhightower/envconfig v1.4.0 h1:Im6hONhd3pLkfDFsbRgu68RDNkGF1r3dvMUtDTo2cv8=
Expand Down
23 changes: 18 additions & 5 deletions src/provider/xds_grpc_sotw_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ import (
"context"
"fmt"
"strings"
"time"

"google.golang.org/grpc/metadata"

corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
"github.com/envoyproxy/go-control-plane/pkg/resource/v3"
"github.com/golang/protobuf/ptypes/any"
grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
"github.com/jpillora/backoff"
logger "github.com/sirupsen/logrus"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
Expand Down Expand Up @@ -67,6 +69,12 @@ func (p *XdsGrpcSotwProvider) Stop() {
func (p *XdsGrpcSotwProvider) initXdsClient() {
logger.Info("Starting xDS client connection for rate limit configurations")
conn := p.initializeAndWatch()
b := &backoff.Backoff{
Min: p.settings.XdsClientBackoffInitialInterval,
Max: p.settings.XdsClientBackoffMaxInterval,
Factor: p.settings.XdsClientBackoffRandomFactor,
Jitter: p.settings.XdsClientBackoffJitter,
}

for retryEvent := range p.connectionRetryChannel {
if conn != nil {
Expand All @@ -76,10 +84,18 @@ func (p *XdsGrpcSotwProvider) initXdsClient() {
logger.Info("Stopping xDS client watch for rate limit configurations")
break
}
d := p.getJitteredExponentialBackOffDuration(b)
logger.Debugf("Sleeping for %s using exponential backoff\n", d)
time.Sleep(d)
conn = p.initializeAndWatch()
}
}

func (p *XdsGrpcSotwProvider) getJitteredExponentialBackOffDuration(b *backoff.Backoff) time.Duration {
logger.Debugf("Retry attempt# %f", b.Attempt())
return b.Duration()
}

func (p *XdsGrpcSotwProvider) initializeAndWatch() *grpc.ClientConn {
conn, err := p.getGrpcConnection()
if err != nil {
Expand All @@ -99,11 +115,8 @@ func (p *XdsGrpcSotwProvider) watchConfigs() {
resp, err := p.adsClient.Fetch()
if err != nil {
logger.Errorf("Failed to receive configuration from xDS Management Server: %s", err.Error())
if sotw.IsConnError(err) {
p.retryGrpcConn()
return
}
p.adsClient.Nack(err.Error())
p.retryGrpcConn()
return
} else {
logger.Tracef("Response received from xDS Management Server: %v", resp)
p.sendConfigs(resp.Resources)
Expand Down
6 changes: 6 additions & 0 deletions src/settings/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ type Settings struct {
// GrpcClientTlsSAN is the SAN to validate from the client cert during mTLS auth
ConfigGrpcXdsServerTlsSAN string `envconfig:"CONFIG_GRPC_XDS_SERVER_TLS_SAN" default:""`

// xDS client backoff configuration
XdsClientBackoffInitialInterval time.Duration `envconfig:"XDS_CLIENT_BACKOFF_INITIAL_INTERVAL" default:"10s"`
XdsClientBackoffMaxInterval time.Duration `envconfig:"XDS_CLIENT_BACKOFF_MAX_INTERVAL" default:"60s"`
XdsClientBackoffRandomFactor float64 `envconfig:"XDS_CLIENT_BACKOFF_RANDOM_FACTOR" default:"0.5"`
XdsClientBackoffJitter bool `envconfig:"XDS_CLIENT_BACKOFF_JITTER" default:"true"`

// Stats-related settings
UseStatsd bool `envconfig:"USE_STATSD" default:"true"`
StatsdHost string `envconfig:"STATSD_HOST" default:"localhost"`
Expand Down

0 comments on commit 19f2079

Please sign in to comment.