From c0d066a46bfe4f2515fba4e026d31d848c61148b Mon Sep 17 00:00:00 2001 From: yy Date: Mon, 24 Jul 2023 16:38:11 +0800 Subject: [PATCH] add service outlier detection Signed-off-by: yy add changelog Signed-off-by: yy --- apis/projectcontour/v1/detailedconditions.go | 4 + apis/projectcontour/v1/httpproxy.go | 56 +++++++ .../v1/zz_generated.deepcopy.go | 50 ++++++ changelogs/unreleased/5575-yangyy93-minor.md | 3 + examples/contour/01-crds.yaml | 126 ++++++++++++++++ examples/render/contour-deployment.yaml | 126 ++++++++++++++++ .../render/contour-gateway-provisioner.yaml | 126 ++++++++++++++++ examples/render/contour-gateway.yaml | 126 ++++++++++++++++ examples/render/contour.yaml | 126 ++++++++++++++++ internal/dag/dag.go | 14 ++ internal/dag/httpproxy_processor.go | 8 + internal/dag/policy.go | 50 ++++++ internal/dag/policy_test.go | 79 ++++++++++ internal/envoy/cluster.go | 24 +++ internal/envoy/v3/cluster.go | 45 ++++++ internal/envoy/v3/cluster_test.go | 104 +++++++++++++ internal/protobuf/helpers.go | 5 + .../docs/main/config/api-reference.html | 142 ++++++++++++++++++ 18 files changed, 1214 insertions(+) create mode 100644 changelogs/unreleased/5575-yangyy93-minor.md diff --git a/apis/projectcontour/v1/detailedconditions.go b/apis/projectcontour/v1/detailedconditions.go index d7ac6e13856..2a2ebe08977 100644 --- a/apis/projectcontour/v1/detailedconditions.go +++ b/apis/projectcontour/v1/detailedconditions.go @@ -158,6 +158,10 @@ const ( // with an HTTPProxy resource which is not part of a delegation chain. ConditionTypeOrphanedError = "Orphaned" + // ConditionTypeOutlierDetectionError describes an error condition with + // an HTTPProxy Outlier Detection issue. + ConditionTypeOutlierDetectionError = "OutlierDetectionError" + // ConditionTypePrefixReplaceError describes an error condition with // an HTTPProxy path prefix replacement issue. ConditionTypePrefixReplaceError = "PrefixReplaceError" diff --git a/apis/projectcontour/v1/httpproxy.go b/apis/projectcontour/v1/httpproxy.go index 498be837a8b..8c141238d14 100644 --- a/apis/projectcontour/v1/httpproxy.go +++ b/apis/projectcontour/v1/httpproxy.go @@ -1036,6 +1036,62 @@ type Service struct { // Slow start will gradually increase amount of traffic to a newly added endpoint. // +optional SlowStartPolicy *SlowStartPolicy `json:"slowStartPolicy,omitempty"` + // The policy for managing outlier detection on a service. + OutlierDetection *OutlierDetection `json:"outlierDetection,omitempty"` +} + +// OutlierDetection defines the configuration for outlier detection on a service. +type OutlierDetection struct { + // ConsecutiveServerErrors defines The number of consecutive server-side error responses before a consecutive 5xx ejection occurs. + // When the backend host encounters consecutive + // errors greater than or equal to ConsecutiveServerErrors, it will be + // ejected from the load balancing pool. + // for HTTP services, a 5xx counts as an error and for TCP services + // connection failures and connection timeouts count as an error. + // It can be disabled by setting the value to 0. + // Defaults to 5. + // +optional + ConsecutiveServerErrors *uint32 `json:"consecutiveServerErrors,omitempty"` + + // Interval is the interval at which host status is evaluated. + // Defaults to 10s. + // +optional + // +kubebuilder:validation:Pattern=`^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$` + Interval *string `json:"interval,omitempty"` + + // BaseEjectionTime is the base time that a host is ejected for. + // A host will remain ejected for a period of time equal to the + // product of the ejection base duration and the number of times the host has been ejected. + // Defaults to 30s. + // +optional + // +kubebuilder:validation:Pattern=`^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$` + BaseEjectionTime *string `json:"baseEjectionTime,omitempty"` + + // MaxEjectionTime is the maximum time a host will be ejected for. + // After this amount of time, a host will be returned to normal operation. + // If not specified, the default value (300s) or BaseEjectionTime value is applied, whatever is larger. + // Defaults to 300s. + // +optional + // +kubebuilder:validation:Pattern=`^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$` + MaxEjectionTime *string `json:"maxEjectionTime,omitempty"` + + // SplitExternalLocalOriginErrors defines whether to split the local origin errors from the external origin errors. + // Defaults to false. + // +optional + // +kubebuilder:default=false + SplitExternalLocalOriginErrors bool `json:"splitExternalLocalOriginErrors"` + + // ConsecutiveLocalOriginFailure defines the number of consecutive local origin failures before a consecutive local origin ejection occurs. + // Parameters take effect only when SplitExternalLocalOriginErrors is true. + // Defaults to 5. + ConsecutiveLocalOriginFailure *uint32 `json:"consecutiveLocalOriginFailure,omitempty"` + + // MaxEjectionPercent is the max percentage of hosts in the load balancing pool for the upstream service that can be ejected. + // But will eject at least one host regardless of the value here. + // Defaults to 10%. + // +optional + // +kubebuilder:validation:Maximum=100 + MaxEjectionPercent *uint32 `json:"maxEjectionPercent,omitempty"` } // HTTPHealthCheckPolicy defines health checks on the upstream service. diff --git a/apis/projectcontour/v1/zz_generated.deepcopy.go b/apis/projectcontour/v1/zz_generated.deepcopy.go index 3c3537ef24e..22f244dd568 100644 --- a/apis/projectcontour/v1/zz_generated.deepcopy.go +++ b/apis/projectcontour/v1/zz_generated.deepcopy.go @@ -766,6 +766,51 @@ func (in *MatchCondition) DeepCopy() *MatchCondition { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OutlierDetection) DeepCopyInto(out *OutlierDetection) { + *out = *in + if in.ConsecutiveServerErrors != nil { + in, out := &in.ConsecutiveServerErrors, &out.ConsecutiveServerErrors + *out = new(uint32) + **out = **in + } + if in.Interval != nil { + in, out := &in.Interval, &out.Interval + *out = new(string) + **out = **in + } + if in.BaseEjectionTime != nil { + in, out := &in.BaseEjectionTime, &out.BaseEjectionTime + *out = new(string) + **out = **in + } + if in.MaxEjectionTime != nil { + in, out := &in.MaxEjectionTime, &out.MaxEjectionTime + *out = new(string) + **out = **in + } + if in.ConsecutiveLocalOriginFailure != nil { + in, out := &in.ConsecutiveLocalOriginFailure, &out.ConsecutiveLocalOriginFailure + *out = new(uint32) + **out = **in + } + if in.MaxEjectionPercent != nil { + in, out := &in.MaxEjectionPercent, &out.MaxEjectionPercent + *out = new(uint32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OutlierDetection. +func (in *OutlierDetection) DeepCopy() *OutlierDetection { + if in == nil { + return nil + } + out := new(OutlierDetection) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PathRewritePolicy) DeepCopyInto(out *PathRewritePolicy) { *out = *in @@ -1179,6 +1224,11 @@ func (in *Service) DeepCopyInto(out *Service) { *out = new(SlowStartPolicy) **out = **in } + if in.OutlierDetection != nil { + in, out := &in.OutlierDetection, &out.OutlierDetection + *out = new(OutlierDetection) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Service. diff --git a/changelogs/unreleased/5575-yangyy93-minor.md b/changelogs/unreleased/5575-yangyy93-minor.md new file mode 100644 index 00000000000..fb452b0646c --- /dev/null +++ b/changelogs/unreleased/5575-yangyy93-minor.md @@ -0,0 +1,3 @@ +## Add outlier detection related configuration detection for services + +Add [outlier detection](https://www.envoyproxy.io/docs/envoy/v1.26.3/intro/arch_overview/upstream/outlier#arch-overview-outlier-detection) related configuration detection for services, including consecutiveServerErrors and localOriginal errors, and passive health checks can be performed on clusters. diff --git a/examples/contour/01-crds.yaml b/examples/contour/01-crds.yaml index 940a1b288bf..06521e78065 100644 --- a/examples/contour/01-crds.yaml +++ b/examples/contour/01-crds.yaml @@ -6266,6 +6266,69 @@ spec: up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection + on a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the + ejection base duration and the number of times the + host has been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines + the number of consecutive local origin failures + before a consecutive local origin ejection occurs. + Parameters take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than + or equal to ConsecutiveServerErrors, it will be + ejected from the load balancing pool. for HTTP services, + a 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults + to 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a + host will be ejected for. After this amount of time, + a host will be returned to normal operation. If + not specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. @@ -6663,6 +6726,69 @@ spec: traffic. Names defined here will be used to look up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection on + a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the ejection + base duration and the number of times the host has + been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines the + number of consecutive local origin failures before + a consecutive local origin ejection occurs. Parameters + take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than or + equal to ConsecutiveServerErrors, it will be ejected + from the load balancing pool. for HTTP services, a + 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults to + 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a host + will be ejected for. After this amount of time, a + host will be returned to normal operation. If not + specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. diff --git a/examples/render/contour-deployment.yaml b/examples/render/contour-deployment.yaml index 18fab7f0adf..50ca917cb94 100644 --- a/examples/render/contour-deployment.yaml +++ b/examples/render/contour-deployment.yaml @@ -6485,6 +6485,69 @@ spec: up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection + on a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the + ejection base duration and the number of times the + host has been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines + the number of consecutive local origin failures + before a consecutive local origin ejection occurs. + Parameters take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than + or equal to ConsecutiveServerErrors, it will be + ejected from the load balancing pool. for HTTP services, + a 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults + to 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a + host will be ejected for. After this amount of time, + a host will be returned to normal operation. If + not specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. @@ -6882,6 +6945,69 @@ spec: traffic. Names defined here will be used to look up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection on + a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the ejection + base duration and the number of times the host has + been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines the + number of consecutive local origin failures before + a consecutive local origin ejection occurs. Parameters + take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than or + equal to ConsecutiveServerErrors, it will be ejected + from the load balancing pool. for HTTP services, a + 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults to + 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a host + will be ejected for. After this amount of time, a + host will be returned to normal operation. If not + specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. diff --git a/examples/render/contour-gateway-provisioner.yaml b/examples/render/contour-gateway-provisioner.yaml index 7bc48cb9ae8..82d57012a32 100644 --- a/examples/render/contour-gateway-provisioner.yaml +++ b/examples/render/contour-gateway-provisioner.yaml @@ -6277,6 +6277,69 @@ spec: up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection + on a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the + ejection base duration and the number of times the + host has been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines + the number of consecutive local origin failures + before a consecutive local origin ejection occurs. + Parameters take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than + or equal to ConsecutiveServerErrors, it will be + ejected from the load balancing pool. for HTTP services, + a 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults + to 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a + host will be ejected for. After this amount of time, + a host will be returned to normal operation. If + not specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. @@ -6674,6 +6737,69 @@ spec: traffic. Names defined here will be used to look up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection on + a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the ejection + base duration and the number of times the host has + been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines the + number of consecutive local origin failures before + a consecutive local origin ejection occurs. Parameters + take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than or + equal to ConsecutiveServerErrors, it will be ejected + from the load balancing pool. for HTTP services, a + 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults to + 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a host + will be ejected for. After this amount of time, a + host will be returned to normal operation. If not + specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. diff --git a/examples/render/contour-gateway.yaml b/examples/render/contour-gateway.yaml index 2dccfd92371..1b25410c689 100644 --- a/examples/render/contour-gateway.yaml +++ b/examples/render/contour-gateway.yaml @@ -6488,6 +6488,69 @@ spec: up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection + on a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the + ejection base duration and the number of times the + host has been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines + the number of consecutive local origin failures + before a consecutive local origin ejection occurs. + Parameters take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than + or equal to ConsecutiveServerErrors, it will be + ejected from the load balancing pool. for HTTP services, + a 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults + to 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a + host will be ejected for. After this amount of time, + a host will be returned to normal operation. If + not specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. @@ -6885,6 +6948,69 @@ spec: traffic. Names defined here will be used to look up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection on + a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the ejection + base duration and the number of times the host has + been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines the + number of consecutive local origin failures before + a consecutive local origin ejection occurs. Parameters + take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than or + equal to ConsecutiveServerErrors, it will be ejected + from the load balancing pool. for HTTP services, a + 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults to + 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a host + will be ejected for. After this amount of time, a + host will be returned to normal operation. If not + specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. diff --git a/examples/render/contour.yaml b/examples/render/contour.yaml index d1e47c9fecb..608e99b9261 100644 --- a/examples/render/contour.yaml +++ b/examples/render/contour.yaml @@ -6485,6 +6485,69 @@ spec: up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection + on a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the + ejection base duration and the number of times the + host has been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines + the number of consecutive local origin failures + before a consecutive local origin ejection occurs. + Parameters take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than + or equal to ConsecutiveServerErrors, it will be + ejected from the load balancing pool. for HTTP services, + a 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults + to 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a + host will be ejected for. After this amount of time, + a host will be returned to normal operation. If + not specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. @@ -6882,6 +6945,69 @@ spec: traffic. Names defined here will be used to look up corresponding endpoints which contain the ips to route. type: string + outlierDetection: + description: The policy for managing outlier detection on + a service. + properties: + baseEjectionTime: + description: BaseEjectionTime is the base time that + a host is ejected for. A host will remain ejected + for a period of time equal to the product of the ejection + base duration and the number of times the host has + been ejected. Defaults to 30s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + consecutiveLocalOriginFailure: + description: ConsecutiveLocalOriginFailure defines the + number of consecutive local origin failures before + a consecutive local origin ejection occurs. Parameters + take effect only when SplitExternalLocalOriginErrors + is true. Defaults to 5. + format: int32 + type: integer + consecutiveServerErrors: + description: ConsecutiveServerErrors defines The number + of consecutive server-side error responses before + a consecutive 5xx ejection occurs. When the backend + host encounters consecutive errors greater than or + equal to ConsecutiveServerErrors, it will be ejected + from the load balancing pool. for HTTP services, a + 5xx counts as an error and for TCP services connection + failures and connection timeouts count as an error. + It can be disabled by setting the value to 0. Defaults + to 5. + format: int32 + type: integer + interval: + description: Interval is the interval at which host + status is evaluated. Defaults to 10s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + maxEjectionPercent: + description: MaxEjectionPercent is the max percentage + of hosts in the load balancing pool for the upstream + service that can be ejected. But will eject at least + one host regardless of the value here. Defaults to + 10%. + format: int32 + maximum: 100 + type: integer + maxEjectionTime: + description: MaxEjectionTime is the maximum time a host + will be ejected for. After this amount of time, a + host will be returned to normal operation. If not + specified, the default value (300s) or BaseEjectionTime + value is applied, whatever is larger. Defaults to + 300s. + pattern: ^(((\d*(\.\d*)?h)|(\d*(\.\d*)?m)|(\d*(\.\d*)?s)|(\d*(\.\d*)?ms))+)$ + type: string + splitExternalLocalOriginErrors: + default: false + description: SplitExternalLocalOriginErrors defines + whether to split the local origin errors from the + external origin errors. Defaults to false. + type: boolean + type: object port: description: Port (defined as Integer) to proxy traffic to since a service can have multiple defined. diff --git a/internal/dag/dag.go b/internal/dag/dag.go index aea888e4b16..86a94b7091a 100644 --- a/internal/dag/dag.go +++ b/internal/dag/dag.go @@ -1040,6 +1040,9 @@ type Cluster struct { // PerConnectionBufferLimitBytes defines the soft limit on size of the cluster’s new connection read and write buffers. PerConnectionBufferLimitBytes *uint32 + + // OutlierDetection defines how to detect unhealthy hosts in the cluster, and evict them. + OutlierDetectionPolicy *OutlierDetectionPolicy } // WeightedService represents the load balancing weight of a @@ -1260,3 +1263,14 @@ type SlowStartConfig struct { func (s *SlowStartConfig) String() string { return fmt.Sprintf("%s%f%d", s.Window.String(), s.Aggression, s.MinWeightPercent) } + +// OutlierDetectionPolicy holds configuration for outlier detection. +type OutlierDetectionPolicy struct { + ConsecutiveServerErrors uint32 + Interval time.Duration + BaseEjectionTime time.Duration + MaxEjectionTime time.Duration + SplitExternalLocalOriginErrors bool + ConsecutiveLocalOriginFailure uint32 + MaxEjectionPercent uint32 +} diff --git a/internal/dag/httpproxy_processor.go b/internal/dag/httpproxy_processor.go index 255a1fa0b71..af4208c59f8 100644 --- a/internal/dag/httpproxy_processor.go +++ b/internal/dag/httpproxy_processor.go @@ -981,6 +981,13 @@ func (p *HTTPProxyProcessor) computeRoutes( return nil } + outlierDetection, err := outlierDetectionPolicy(service.OutlierDetection) + if err != nil { + validCond.AddErrorf(contour_api_v1.ConditionTypeOutlierDetectionError, "OutlierDetectionInvalid", + "%s on outlier detection", err) + return nil + } + var clientCertSecret *Secret if p.ClientCertificate != nil { // Since the client certificate is configured by admin, explicit delegation is not required. @@ -1026,6 +1033,7 @@ func (p *HTTPProxyProcessor) computeRoutes( SlowStartConfig: slowStart, MaxRequestsPerConnection: p.MaxRequestsPerConnection, PerConnectionBufferLimitBytes: p.PerConnectionBufferLimitBytes, + OutlierDetectionPolicy: outlierDetection, } if service.Mirror && len(r.MirrorPolicies) > 0 { validCond.AddError(contour_api_v1.ConditionTypeServiceError, "OnlyOneMirror", diff --git a/internal/dag/policy.go b/internal/dag/policy.go index 85231724c05..2a9a7e7fd89 100644 --- a/internal/dag/policy.go +++ b/internal/dag/policy.go @@ -809,3 +809,53 @@ func loadBalancerRequestHashPolicies(lbp *contour_api_v1.LoadBalancerPolicy, val } } + +func outlierDetectionPolicy(outlierDetection *contour_api_v1.OutlierDetection) (*OutlierDetectionPolicy, error) { + if outlierDetection == nil { + return nil, nil + } + out := &OutlierDetectionPolicy{ + SplitExternalLocalOriginErrors: outlierDetection.SplitExternalLocalOriginErrors, + } + + var err error + var interval, baseEjectionTime, maxEjectionTime time.Duration + + if outlierDetection.Interval != nil { + interval, err = time.ParseDuration(ref.Val(outlierDetection.Interval, "10s")) + if err != nil { + return nil, fmt.Errorf("error parsing interval: %w", err) + } + out.Interval = interval + } + + if outlierDetection.BaseEjectionTime != nil { + baseEjectionTime, err = time.ParseDuration(ref.Val(outlierDetection.BaseEjectionTime, "30s")) + if err != nil { + return nil, fmt.Errorf("error parsing baseEjectionTime: %w", err) + } + out.BaseEjectionTime = baseEjectionTime + } + + if outlierDetection.MaxEjectionTime != nil { + maxEjectionTime, err = time.ParseDuration(ref.Val(outlierDetection.MaxEjectionTime, "300s")) + if err != nil { + return nil, fmt.Errorf("error parsing maxEjectionTime: %w", err) + } + out.MaxEjectionTime = maxEjectionTime + } + + if outlierDetection.ConsecutiveServerErrors != nil { + out.ConsecutiveServerErrors = ref.Val(outlierDetection.ConsecutiveServerErrors, 5) + } + + if outlierDetection.ConsecutiveLocalOriginFailure != nil { + out.ConsecutiveLocalOriginFailure = ref.Val(outlierDetection.ConsecutiveLocalOriginFailure, 5) + } + + if outlierDetection.MaxEjectionPercent != nil { + out.MaxEjectionPercent = ref.Val(outlierDetection.MaxEjectionPercent, 10) + } + + return out, nil +} diff --git a/internal/dag/policy_test.go b/internal/dag/policy_test.go index c58f49b7087..cfb32531155 100644 --- a/internal/dag/policy_test.go +++ b/internal/dag/policy_test.go @@ -21,6 +21,7 @@ import ( "time" contour_api_v1 "github.com/projectcontour/contour/apis/projectcontour/v1" + "github.com/projectcontour/contour/internal/ref" "github.com/projectcontour/contour/internal/timeout" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" @@ -1270,6 +1271,84 @@ func TestValidateHeaderAlteration(t *testing.T) { } } +func TestOutlierDetectionPolicy(t *testing.T) { + tests := map[string]struct { + in *contour_api_v1.OutlierDetection + want *OutlierDetectionPolicy + wantErr bool + }{ + "nil": { + in: nil, + want: nil, + }, + "empty": { + in: &contour_api_v1.OutlierDetection{}, + want: &OutlierDetectionPolicy{}, + }, + "consecutive server errors": { + in: &contour_api_v1.OutlierDetection{ + ConsecutiveServerErrors: ref.To(uint32(5)), + }, + want: &OutlierDetectionPolicy{ + ConsecutiveServerErrors: 5, + }, + }, + "interval no unit": { + in: &contour_api_v1.OutlierDetection{ + Interval: ref.To("10"), + }, + want: nil, + wantErr: true, + }, + "interval bad unit": { + in: &contour_api_v1.OutlierDetection{ + Interval: ref.To("10f"), + }, + want: nil, + wantErr: true, + }, + "interval good": { + in: &contour_api_v1.OutlierDetection{ + Interval: ref.To("10s"), + }, + want: &OutlierDetectionPolicy{ + Interval: 10 * time.Second, + }, + }, + "normal": { + in: &contour_api_v1.OutlierDetection{ + ConsecutiveServerErrors: ref.To(uint32(5)), + Interval: ref.To("10s"), + BaseEjectionTime: ref.To("30s"), + MaxEjectionTime: ref.To("300s"), + SplitExternalLocalOriginErrors: true, + ConsecutiveLocalOriginFailure: ref.To(uint32(3)), + MaxEjectionPercent: ref.To(uint32(50)), + }, + want: &OutlierDetectionPolicy{ + ConsecutiveServerErrors: 5, + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + SplitExternalLocalOriginErrors: true, + ConsecutiveLocalOriginFailure: 3, + MaxEjectionPercent: 50, + }, + }, + } + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + got, gotErr := outlierDetectionPolicy(tc.in) + if tc.wantErr { + assert.Error(t, gotErr) + } else { + assert.Equal(t, tc.want, got) + assert.NoError(t, gotErr) + } + }) + } +} + func TestExtractHeaderValue(t *testing.T) { tests := map[string]string{ "%REQ(X-Header-Name)%": "X-Header-Name", diff --git a/internal/envoy/cluster.go b/internal/envoy/cluster.go index e6d4bf94fbe..4eac619aa34 100644 --- a/internal/envoy/cluster.go +++ b/internal/envoy/cluster.go @@ -46,6 +46,30 @@ func Clustername(cluster *dag.Cluster) string { } buf += hc.Path } + if od := cluster.OutlierDetectionPolicy; od != nil { + if od.ConsecutiveServerErrors > 0 { + buf += strconv.Itoa(int(od.ConsecutiveServerErrors)) + } + if od.Interval > 0 { + buf += od.Interval.String() + } + if od.BaseEjectionTime > 0 { + buf += od.BaseEjectionTime.String() + } + if od.MaxEjectionTime > 0 { + buf += od.MaxEjectionTime.String() + } + if od.MaxEjectionPercent > 0 { + buf += strconv.Itoa(int(od.MaxEjectionPercent)) + } + buf += strconv.FormatBool(od.SplitExternalLocalOriginErrors) + if od.SplitExternalLocalOriginErrors { + buf += strconv.Itoa(int(od.ConsecutiveLocalOriginFailure)) + } + if od.MaxEjectionPercent > 0 { + buf += strconv.Itoa(int(od.MaxEjectionPercent)) + } + } if uv := cluster.UpstreamValidation; uv != nil { buf += uv.CACertificate.Object.ObjectMeta.Name buf += uv.SubjectName diff --git a/internal/envoy/v3/cluster.go b/internal/envoy/v3/cluster.go index e39beedadee..ea646439e2d 100644 --- a/internal/envoy/v3/cluster.go +++ b/internal/envoy/v3/cluster.go @@ -138,6 +138,10 @@ func Cluster(c *dag.Cluster) *envoy_cluster_v3.Cluster { } } + if c.OutlierDetectionPolicy != nil { + cluster.OutlierDetection = outlierDetection(c.OutlierDetectionPolicy) + } + return cluster } @@ -372,3 +376,44 @@ func slowStartConfig(slowStartConfig *dag.SlowStartConfig) *envoy_cluster_v3.Clu }, } } + +func outlierDetection(policy *dag.OutlierDetectionPolicy) *envoy_cluster_v3.OutlierDetection { + out := &envoy_cluster_v3.OutlierDetection{ + EnforcingConsecutive_5Xx: protobuf.UInt32Zero(), + EnforcingSuccessRate: protobuf.UInt32Zero(), + EnforcingConsecutiveGatewayFailure: protobuf.UInt32Zero(), + } + if policy.ConsecutiveServerErrors > 0 { + out.Consecutive_5Xx = protobuf.UInt32OrNil(policy.ConsecutiveServerErrors) + out.EnforcingConsecutive_5Xx = protobuf.UInt32OrNil(100) + } + + if policy.Interval > 0 { + out.Interval = durationpb.New(policy.Interval) + } + + if policy.BaseEjectionTime > 0 { + out.BaseEjectionTime = durationpb.New(policy.BaseEjectionTime) + } + + if policy.MaxEjectionTime > 0 { + out.MaxEjectionTime = durationpb.New(policy.MaxEjectionTime) + } + + if policy.MaxEjectionPercent > 0 { + out.MaxEjectionPercent = protobuf.UInt32OrNil(policy.MaxEjectionPercent) + } + + if policy.SplitExternalLocalOriginErrors { + out.SplitExternalLocalOriginErrors = true + if policy.ConsecutiveLocalOriginFailure > 0 { + out.ConsecutiveLocalOriginFailure = protobuf.UInt32OrNil(policy.ConsecutiveLocalOriginFailure) + } else { + // Default to 5 if not specified + out.ConsecutiveLocalOriginFailure = protobuf.UInt32OrNil(5) + } + out.EnforcingLocalOriginSuccessRate = protobuf.UInt32Zero() + } + + return out +} diff --git a/internal/envoy/v3/cluster_test.go b/internal/envoy/v3/cluster_test.go index 42ebd5b108e..eb69d3478c2 100644 --- a/internal/envoy/v3/cluster_test.go +++ b/internal/envoy/v3/cluster_test.go @@ -721,6 +721,110 @@ func TestCluster(t *testing.T) { }, }, }, + "outlier detection only server error": { + cluster: &dag.Cluster{ + Upstream: service(s1), + OutlierDetectionPolicy: &dag.OutlierDetectionPolicy{ + ConsecutiveServerErrors: 5, + }, + }, + want: &envoy_cluster_v3.Cluster{ + Name: "default/kuard/443/9edb41b67b", + AltStatName: "default_kuard_443", + ClusterDiscoveryType: ClusterDiscoveryType(envoy_cluster_v3.Cluster_EDS), + EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{ + EdsConfig: ConfigSource("contour"), + ServiceName: "default/kuard/http", + }, + OutlierDetection: &envoy_cluster_v3.OutlierDetection{ + Consecutive_5Xx: wrapperspb.UInt32(5), + EnforcingSuccessRate: wrapperspb.UInt32(0), + EnforcingConsecutiveGatewayFailure: wrapperspb.UInt32(0), + EnforcingConsecutive_5Xx: wrapperspb.UInt32(100), + }, + }, + }, + "outlier detection split local origin error": { + cluster: &dag.Cluster{ + Upstream: service(s1), + OutlierDetectionPolicy: &dag.OutlierDetectionPolicy{ + ConsecutiveServerErrors: 5, + SplitExternalLocalOriginErrors: true, + }, + }, + want: &envoy_cluster_v3.Cluster{ + Name: "default/kuard/443/3bebc12a28", + AltStatName: "default_kuard_443", + ClusterDiscoveryType: ClusterDiscoveryType(envoy_cluster_v3.Cluster_EDS), + EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{ + EdsConfig: ConfigSource("contour"), + ServiceName: "default/kuard/http", + }, + OutlierDetection: &envoy_cluster_v3.OutlierDetection{ + Consecutive_5Xx: wrapperspb.UInt32(5), + EnforcingSuccessRate: wrapperspb.UInt32(0), + EnforcingConsecutiveGatewayFailure: wrapperspb.UInt32(0), + EnforcingConsecutive_5Xx: wrapperspb.UInt32(100), + SplitExternalLocalOriginErrors: true, + ConsecutiveLocalOriginFailure: wrapperspb.UInt32(5), + EnforcingLocalOriginSuccessRate: wrapperspb.UInt32(0), + }, + }, + }, + "outlier detection split local origin error and consecutive local origin failure": { + cluster: &dag.Cluster{ + Upstream: service(s1), + OutlierDetectionPolicy: &dag.OutlierDetectionPolicy{ + ConsecutiveServerErrors: 5, + SplitExternalLocalOriginErrors: true, + ConsecutiveLocalOriginFailure: 10, + }, + }, + want: &envoy_cluster_v3.Cluster{ + Name: "default/kuard/443/880ee463fa", + AltStatName: "default_kuard_443", + ClusterDiscoveryType: ClusterDiscoveryType(envoy_cluster_v3.Cluster_EDS), + EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{ + EdsConfig: ConfigSource("contour"), + ServiceName: "default/kuard/http", + }, + OutlierDetection: &envoy_cluster_v3.OutlierDetection{ + Consecutive_5Xx: wrapperspb.UInt32(5), + EnforcingSuccessRate: wrapperspb.UInt32(0), + EnforcingConsecutiveGatewayFailure: wrapperspb.UInt32(0), + EnforcingConsecutive_5Xx: wrapperspb.UInt32(100), + SplitExternalLocalOriginErrors: true, + ConsecutiveLocalOriginFailure: wrapperspb.UInt32(10), + EnforcingLocalOriginSuccessRate: wrapperspb.UInt32(0), + }, + }, + }, + "outlier detection only local origin error": { + cluster: &dag.Cluster{ + Upstream: service(s1), + OutlierDetectionPolicy: &dag.OutlierDetectionPolicy{ + SplitExternalLocalOriginErrors: true, + ConsecutiveLocalOriginFailure: 10, + }, + }, + want: &envoy_cluster_v3.Cluster{ + Name: "default/kuard/443/011e0937a7", + AltStatName: "default_kuard_443", + ClusterDiscoveryType: ClusterDiscoveryType(envoy_cluster_v3.Cluster_EDS), + EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{ + EdsConfig: ConfigSource("contour"), + ServiceName: "default/kuard/http", + }, + OutlierDetection: &envoy_cluster_v3.OutlierDetection{ + EnforcingSuccessRate: wrapperspb.UInt32(0), + EnforcingConsecutiveGatewayFailure: wrapperspb.UInt32(0), + EnforcingConsecutive_5Xx: wrapperspb.UInt32(0), + SplitExternalLocalOriginErrors: true, + ConsecutiveLocalOriginFailure: wrapperspb.UInt32(10), + EnforcingLocalOriginSuccessRate: wrapperspb.UInt32(0), + }, + }, + }, } for name, tc := range tests { diff --git a/internal/protobuf/helpers.go b/internal/protobuf/helpers.go index 837d839ae26..c8e647c3fc1 100644 --- a/internal/protobuf/helpers.go +++ b/internal/protobuf/helpers.go @@ -42,6 +42,11 @@ func UInt32OrNil(val uint32) *wrapperspb.UInt32Value { } } +// UInt32Zero returns a wrapped UInt32Value with a value of 0. +func UInt32Zero() *wrapperspb.UInt32Value { + return wrapperspb.UInt32(0) +} + // AsMessages casts the given slice of values (that implement the proto.Message // interface) to a slice of proto.Message. If the length of the slice is 0, it // returns nil. diff --git a/site/content/docs/main/config/api-reference.html b/site/content/docs/main/config/api-reference.html index b988b1ac283..3de91278b9d 100644 --- a/site/content/docs/main/config/api-reference.html +++ b/site/content/docs/main/config/api-reference.html @@ -2676,6 +2676,134 @@

MatchCondition +

OutlierDetection +

+

+(Appears on: +Service) +

+

+

OutlierDetection defines the configuration for outlier detection on a service.

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+consecutiveServerErrors +
+ +uint32 + +
+(Optional) +

ConsecutiveServerErrors defines The number of consecutive server-side error responses before a consecutive 5xx ejection occurs. +When the backend host encounters consecutive +errors greater than or equal to ConsecutiveServerErrors, it will be +ejected from the load balancing pool. +for HTTP services, a 5xx counts as an error and for TCP services +connection failures and connection timeouts count as an error. +It can be disabled by setting the value to 0. +Defaults to 5.

+
+interval +
+ +string + +
+(Optional) +

Interval is the interval at which host status is evaluated. +Defaults to 10s.

+
+baseEjectionTime +
+ +string + +
+(Optional) +

BaseEjectionTime is the base time that a host is ejected for. +A host will remain ejected for a period of time equal to the +product of the ejection base duration and the number of times the host has been ejected. +Defaults to 30s.

+
+maxEjectionTime +
+ +string + +
+(Optional) +

MaxEjectionTime is the maximum time a host will be ejected for. +After this amount of time, a host will be returned to normal operation. +If not specified, the default value (300s) or BaseEjectionTime value is applied, whatever is larger. +Defaults to 300s.

+
+splitExternalLocalOriginErrors +
+ +bool + +
+(Optional) +

SplitExternalLocalOriginErrors defines whether to split the local origin errors from the external origin errors. +Defaults to false.

+
+consecutiveLocalOriginFailure +
+ +uint32 + +
+

ConsecutiveLocalOriginFailure defines the number of consecutive local origin failures before a consecutive local origin ejection occurs. +Parameters take effect only when SplitExternalLocalOriginErrors is true. +Defaults to 5.

+
+maxEjectionPercent +
+ +uint32 + +
+(Optional) +

MaxEjectionPercent is the max percentage of hosts in the load balancing pool for the upstream service that can be ejected. +But will eject at least one host regardless of the value here. +Defaults to 10%.

+

PathRewritePolicy

@@ -4036,6 +4164,20 @@

Service

Slow start will gradually increase amount of traffic to a newly added endpoint.

+ + +outlierDetection +
+ + +OutlierDetection + + + + +

The policy for managing outlier detection on a service.

+ +

SlowStartPolicy