Skip to content

Commit

Permalink
add global outlier detection
Browse files Browse the repository at this point in the history
Signed-off-by: yangyang <[email protected]>
  • Loading branch information
yangyy93 committed Oct 18, 2023
1 parent c0d066a commit 1d23f20
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 3 deletions.
15 changes: 15 additions & 0 deletions apis/projectcontour/v1/httpproxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -1037,11 +1037,19 @@ type Service struct {
// +optional
SlowStartPolicy *SlowStartPolicy `json:"slowStartPolicy,omitempty"`
// The policy for managing outlier detection on a service.
// If not specified, the global OutlierDetection policy will be used.
// +optional
OutlierDetection *OutlierDetection `json:"outlierDetection,omitempty"`
}

// OutlierDetection defines the configuration for outlier detection on a service.
type OutlierDetection struct {
// Disabled configures the Service to not use
// the default global OutlierDetection policy defined by the Contour configuration.
// Defaults to false.
// +optional
Disabled bool `json:"disabled,omitempty"`

// ConsecutiveServerErrors defines The number of consecutive server-side error responses before a consecutive 5xx ejection occurs.
// When the backend host encounters consecutive
// errors greater than or equal to ConsecutiveServerErrors, it will be
Expand Down Expand Up @@ -1092,6 +1100,13 @@ type OutlierDetection struct {
// +optional
// +kubebuilder:validation:Maximum=100
MaxEjectionPercent *uint32 `json:"maxEjectionPercent,omitempty"`

// MaxEjectionTimeJitter is The maximum amount of jitter to add to the ejection time,
// in order to prevent a ‘thundering herd’ effect where all proxies try to reconnect to host at the same time.
// Defaults to 0s.
// +optional
// +kubebuilder:validation:Pattern=`^(((\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$`
MaxEjectionTimeJitter *string `json:"maxEjectionTimeJitter,omitempty"`
}

// HTTPHealthCheckPolicy defines health checks on the upstream service.
Expand Down
5 changes: 5 additions & 0 deletions apis/projectcontour/v1alpha1/contourconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ type ContourConfigurationSpec struct {

// Tracing defines properties for exporting trace data to OpenTelemetry.
Tracing *TracingConfig `json:"tracing,omitempty"`

// GlobalOutlierDetection defines the configuration for outlier detection on all services.
// If defined, this will be used as the default for all services.
// +optional
GlobalOutlierDetection *contour_api_v1.OutlierDetection `json:"outlierDetection,omitempty"`
}

// XDSServerType is the type of xDS server implementation.
Expand Down
3 changes: 3 additions & 0 deletions cmd/contour/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ func (s *Server) doServe() error {
globalRateLimitService: contourConfiguration.RateLimitService,
maxRequestsPerConnection: contourConfiguration.Envoy.Cluster.MaxRequestsPerConnection,
perConnectionBufferLimitBytes: contourConfiguration.Envoy.Cluster.PerConnectionBufferLimitBytes,
globalOutlierDetection: contourConfiguration.GlobalOutlierDetection,
})

// Build the core Kubernetes event handler.
Expand Down Expand Up @@ -1085,6 +1086,7 @@ type dagBuilderConfig struct {
maxRequestsPerConnection *uint32
perConnectionBufferLimitBytes *uint32
globalRateLimitService *contour_api_v1alpha1.RateLimitServiceConfig
globalOutlierDetection *contour_api_v1.OutlierDetection
}

func (s *Server) getDAGBuilder(dbc dagBuilderConfig) *dag.Builder {
Expand Down Expand Up @@ -1177,6 +1179,7 @@ func (s *Server) getDAGBuilder(dbc dagBuilderConfig) *dag.Builder {
GlobalRateLimitService: dbc.globalRateLimitService,
PerConnectionBufferLimitBytes: dbc.perConnectionBufferLimitBytes,
SetSourceMetadataOnRoutes: true,
GlobalOutlierDetection: dbc.globalOutlierDetection,
},
}

Expand Down
1 change: 1 addition & 0 deletions internal/dag/dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -1273,4 +1273,5 @@ type OutlierDetectionPolicy struct {
SplitExternalLocalOriginErrors bool
ConsecutiveLocalOriginFailure uint32
MaxEjectionPercent uint32
MaxEjectionTimeJitter time.Duration
}
31 changes: 29 additions & 2 deletions internal/dag/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -810,16 +810,35 @@ func loadBalancerRequestHashPolicies(lbp *contour_api_v1.LoadBalancerPolicy, val

}

func outlierDetectionPolicy(outlierDetection *contour_api_v1.OutlierDetection) (*OutlierDetectionPolicy, error) {
func mergeOutlierDetectionPolicy(globalOutlierDetection, serviceOutlierDetection *contour_api_v1.OutlierDetection) *contour_api_v1.OutlierDetection {
if serviceOutlierDetection == nil {
if globalOutlierDetection == nil || globalOutlierDetection.Disabled {
return nil
}
return globalOutlierDetection
}

if serviceOutlierDetection.Disabled {
return nil
}

return serviceOutlierDetection
}

func outlierDetectionPolicy(globalOutlierDetection, serviceOutlierDetection *contour_api_v1.OutlierDetection) (*OutlierDetectionPolicy, error) {
var outlierDetection *contour_api_v1.OutlierDetection
outlierDetection = mergeOutlierDetectionPolicy(globalOutlierDetection, serviceOutlierDetection)

if outlierDetection == nil {
return nil, nil
}

out := &OutlierDetectionPolicy{
SplitExternalLocalOriginErrors: outlierDetection.SplitExternalLocalOriginErrors,
}

var err error
var interval, baseEjectionTime, maxEjectionTime time.Duration
var interval, baseEjectionTime, maxEjectionTime, maxEjectionTimeJitter time.Duration

if outlierDetection.Interval != nil {
interval, err = time.ParseDuration(ref.Val(outlierDetection.Interval, "10s"))
Expand Down Expand Up @@ -857,5 +876,13 @@ func outlierDetectionPolicy(outlierDetection *contour_api_v1.OutlierDetection) (
out.MaxEjectionPercent = ref.Val(outlierDetection.MaxEjectionPercent, 10)
}

if outlierDetection.MaxEjectionTimeJitter != nil {
maxEjectionTimeJitter, err = time.ParseDuration(ref.Val(outlierDetection.MaxEjectionTimeJitter, "0s"))
if err != nil {
return nil, fmt.Errorf("error parsing maxEjectionTimeJitter: %w", err)
}
out.MaxEjectionTimeJitter = maxEjectionTimeJitter
}

return out, nil
}
77 changes: 76 additions & 1 deletion internal/dag/policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,81 @@ func TestValidateHeaderAlteration(t *testing.T) {
}
}

func TestMergeOutlierDetectionPolicy(t *testing.T) {
tests := map[string]struct {
globalPolicy *contour_api_v1.OutlierDetection
servicePolicy *contour_api_v1.OutlierDetection
want *contour_api_v1.OutlierDetection
}{
"globalPolicy is nil and servicePolicy is nil": {
globalPolicy: nil,
servicePolicy: nil,
want: nil,
},
"globalPolicy is nil and servicePolicy is not nil and servicePolicy is enabled": {
globalPolicy: nil,
servicePolicy: &contour_api_v1.OutlierDetection{},
want: &contour_api_v1.OutlierDetection{},
},
"globalPolicy is nil and servicePolicy is not nil and servicePolicy is disabled": {
globalPolicy: nil,
servicePolicy: &contour_api_v1.OutlierDetection{
Disabled: true,
},
want: nil,
},
"globalPolicy is not nil and globalPolicy is enabled and servicePolicy is nil": {
globalPolicy: &contour_api_v1.OutlierDetection{},
servicePolicy: nil,
want: &contour_api_v1.OutlierDetection{},
},
"globalPolicy is not nil and globalPolicy is disabled and servicePolicy is nil": {
globalPolicy: &contour_api_v1.OutlierDetection{
Disabled: true,
},
servicePolicy: nil,
want: nil,
},
"globalPolicy is not nil and globalPolicy is enabled and servicePolicy is not nil and servicePolicy is enabled": {
globalPolicy: &contour_api_v1.OutlierDetection{
ConsecutiveServerErrors: ref.To(uint32(5)),
},
servicePolicy: &contour_api_v1.OutlierDetection{
ConsecutiveServerErrors: ref.To(uint32(10)),
},
want: &contour_api_v1.OutlierDetection{
ConsecutiveServerErrors: ref.To(uint32(10)),
},
},
"globalPolicy is not nil and globalPolicy is enabled and servicePolicy is not nil and servicePolicy is disabled": {
globalPolicy: &contour_api_v1.OutlierDetection{
ConsecutiveServerErrors: ref.To(uint32(5)),
},
servicePolicy: &contour_api_v1.OutlierDetection{
Disabled: true,
},
want: nil,
},
"globalPolicy is not nil and globalPolicy is disabled and servicePolicy is not nil and servicePolicy is enabled": {
globalPolicy: &contour_api_v1.OutlierDetection{
Disabled: true,
},
servicePolicy: &contour_api_v1.OutlierDetection{
ConsecutiveServerErrors: ref.To(uint32(10)),
},
want: &contour_api_v1.OutlierDetection{
ConsecutiveServerErrors: ref.To(uint32(10)),
},
},
}
for name, test := range tests {
t.Run(name, func(t *testing.T) {
got := mergeOutlierDetectionPolicy(test.globalPolicy, test.servicePolicy)
assert.Equal(t, test.want, got)
})
}
}

func TestOutlierDetectionPolicy(t *testing.T) {
tests := map[string]struct {
in *contour_api_v1.OutlierDetection
Expand Down Expand Up @@ -1338,7 +1413,7 @@ func TestOutlierDetectionPolicy(t *testing.T) {
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
got, gotErr := outlierDetectionPolicy(tc.in)
got, gotErr := outlierDetectionPolicy(nil, tc.in)
if tc.wantErr {
assert.Error(t, gotErr)
} else {
Expand Down
3 changes: 3 additions & 0 deletions internal/envoy/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ func Clustername(cluster *dag.Cluster) string {
if od.MaxEjectionPercent > 0 {
buf += strconv.Itoa(int(od.MaxEjectionPercent))
}
if od.MaxEjectionTimeJitter > 0 {
buf += od.MaxEjectionTimeJitter.String()
}
}
if uv := cluster.UpstreamValidation; uv != nil {
buf += uv.CACertificate.Object.ObjectMeta.Name
Expand Down
4 changes: 4 additions & 0 deletions internal/envoy/v3/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,5 +415,9 @@ func outlierDetection(policy *dag.OutlierDetectionPolicy) *envoy_cluster_v3.Outl
out.EnforcingLocalOriginSuccessRate = protobuf.UInt32Zero()
}

if policy.MaxEjectionTimeJitter > 0 {
out.MaxEjectionTimeJitter = durationpb.New(policy.MaxEjectionTimeJitter)
}

return out
}

0 comments on commit 1d23f20

Please sign in to comment.