From 672c7d65e22ab1a6cb85b1909074189dd1cd8f25 Mon Sep 17 00:00:00 2001 From: yandongxiao Date: Fri, 10 Nov 2023 11:28:00 +0800 Subject: [PATCH] [Enhancement] add failure seconds for liveness and readiness (#309) Signed-off-by: yandongxiao --- .../starrocks.com_starrocksclusters.yaml | 96 +++++++++++++++++-- deploy/starrocks.com_starrocksclusters.yaml | 24 +++++ .../starrocks/templates/starrockscluster.yaml | 33 +++++++ .../charts/starrocks/values.yaml | 52 ++++++++++ pkg/apis/starrocks/v1/load_type.go | 30 +++++- .../starrocks/v1/zz_generated.deepcopy.go | 10 ++ pkg/k8sutils/templates/pod/spec.go | 10 +- pkg/k8sutils/templates/pod/spec_test.go | 44 +++++++-- pkg/sub_controller/be/be_pod.go | 4 +- pkg/sub_controller/cn/cn_pod.go | 4 +- pkg/sub_controller/fe/fe_pod.go | 4 +- .../feproxy/feproxy_controller.go | 4 +- 12 files changed, 284 insertions(+), 31 deletions(-) diff --git a/config/crd/bases/starrocks.com_starrocksclusters.yaml b/config/crd/bases/starrocks.com_starrocksclusters.yaml index 0f56ef9b..eb836165 100644 --- a/config/crd/bases/starrocks.com_starrocksclusters.yaml +++ b/config/crd/bases/starrocks.com_starrocksclusters.yaml @@ -1107,6 +1107,16 @@ spec: description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object + livenessProbeFailureSeconds: + description: LivenessProbeFailureSeconds defines the total failure + seconds of liveness Probe. Default failureThreshold is 3 and + the periodSeconds is 5, this means the liveness will fail if + the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds + is the total time of seconds before the container restart. If + LivenessProbeFailureSeconds can't be divided by defaultPeriodSeconds, + the failureThreshold will be rounded up. + format: int32 + type: integer nodeSelector: additionalProperties: type: string @@ -1118,6 +1128,16 @@ spec: type: string description: the pod labels for user select or classify pods. type: object + readinessProbeFailureSeconds: + description: ReadinessProbeFailureSeconds defines the total failure + seconds of readiness Probe. Default failureThreshold is 3 and + the periodSeconds is 5, this means the readiness will fail if + the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds + is the total time of seconds before pods becomes not ready. + If ReadinessProbeFailureSeconds can't be divided by defaultPeriodSeconds, + the failureThreshold will be rounded up. + format: int32 + type: integer replicas: description: 'Replicas is the number of desired Pod. When HPA policy is enabled with a fixed replica count: every time the @@ -1231,13 +1251,13 @@ spec: type: string startupProbeFailureSeconds: description: StartupProbeFailureSeconds defines the total failure - seconds of startupProbe. Default failureThreshold is 60 and + seconds of startup Probe. Default failureThreshold is 60 and the periodSeconds is 5, this means the startup will fail if the pod can't start in 300 seconds. Your StartupProbeFailureSeconds is the total time of seconds before startupProbe give up and fail the container start. If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold will - be rounded up + be rounded up. format: int32 type: integer storageVolumes: @@ -3065,6 +3085,16 @@ spec: description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object + livenessProbeFailureSeconds: + description: LivenessProbeFailureSeconds defines the total failure + seconds of liveness Probe. Default failureThreshold is 3 and + the periodSeconds is 5, this means the liveness will fail if + the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds + is the total time of seconds before the container restart. If + LivenessProbeFailureSeconds can't be divided by defaultPeriodSeconds, + the failureThreshold will be rounded up. + format: int32 + type: integer nodeSelector: additionalProperties: type: string @@ -3076,6 +3106,16 @@ spec: type: string description: the pod labels for user select or classify pods. type: object + readinessProbeFailureSeconds: + description: ReadinessProbeFailureSeconds defines the total failure + seconds of readiness Probe. Default failureThreshold is 3 and + the periodSeconds is 5, this means the readiness will fail if + the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds + is the total time of seconds before pods becomes not ready. + If ReadinessProbeFailureSeconds can't be divided by defaultPeriodSeconds, + the failureThreshold will be rounded up. + format: int32 + type: integer replicas: description: 'Replicas is the number of desired Pod. When HPA policy is enabled with a fixed replica count: every time the @@ -3189,13 +3229,13 @@ spec: type: string startupProbeFailureSeconds: description: StartupProbeFailureSeconds defines the total failure - seconds of startupProbe. Default failureThreshold is 60 and + seconds of startup Probe. Default failureThreshold is 60 and the periodSeconds is 5, this means the startup will fail if the pod can't start in 300 seconds. Your StartupProbeFailureSeconds is the total time of seconds before startupProbe give up and fail the container start. If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold will - be rounded up + be rounded up. format: int32 type: integer storageVolumes: @@ -4193,6 +4233,16 @@ spec: description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object + livenessProbeFailureSeconds: + description: LivenessProbeFailureSeconds defines the total failure + seconds of liveness Probe. Default failureThreshold is 3 and + the periodSeconds is 5, this means the liveness will fail if + the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds + is the total time of seconds before the container restart. If + LivenessProbeFailureSeconds can't be divided by defaultPeriodSeconds, + the failureThreshold will be rounded up. + format: int32 + type: integer nodeSelector: additionalProperties: type: string @@ -4204,6 +4254,16 @@ spec: type: string description: the pod labels for user select or classify pods. type: object + readinessProbeFailureSeconds: + description: ReadinessProbeFailureSeconds defines the total failure + seconds of readiness Probe. Default failureThreshold is 3 and + the periodSeconds is 5, this means the readiness will fail if + the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds + is the total time of seconds before pods becomes not ready. + If ReadinessProbeFailureSeconds can't be divided by defaultPeriodSeconds, + the failureThreshold will be rounded up. + format: int32 + type: integer replicas: description: 'Replicas is the number of desired Pod. When HPA policy is enabled with a fixed replica count: every time the @@ -4294,13 +4354,13 @@ spec: type: string startupProbeFailureSeconds: description: StartupProbeFailureSeconds defines the total failure - seconds of startupProbe. Default failureThreshold is 60 and + seconds of startup Probe. Default failureThreshold is 60 and the periodSeconds is 5, this means the startup will fail if the pod can't start in 300 seconds. Your StartupProbeFailureSeconds is the total time of seconds before startupProbe give up and fail the container start. If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold will - be rounded up + be rounded up. format: int32 type: integer storageVolumes: @@ -5435,6 +5495,16 @@ spec: description: 'Limits describes the maximum amount of compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/' type: object + livenessProbeFailureSeconds: + description: LivenessProbeFailureSeconds defines the total failure + seconds of liveness Probe. Default failureThreshold is 3 and + the periodSeconds is 5, this means the liveness will fail if + the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds + is the total time of seconds before the container restart. If + LivenessProbeFailureSeconds can't be divided by defaultPeriodSeconds, + the failureThreshold will be rounded up. + format: int32 + type: integer nodeSelector: additionalProperties: type: string @@ -5446,6 +5516,16 @@ spec: type: string description: the pod labels for user select or classify pods. type: object + readinessProbeFailureSeconds: + description: ReadinessProbeFailureSeconds defines the total failure + seconds of readiness Probe. Default failureThreshold is 3 and + the periodSeconds is 5, this means the readiness will fail if + the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds + is the total time of seconds before pods becomes not ready. + If ReadinessProbeFailureSeconds can't be divided by defaultPeriodSeconds, + the failureThreshold will be rounded up. + format: int32 + type: integer replicas: description: 'Replicas is the number of desired Pod. When HPA policy is enabled with a fixed replica count: every time the @@ -5559,13 +5639,13 @@ spec: type: string startupProbeFailureSeconds: description: StartupProbeFailureSeconds defines the total failure - seconds of startupProbe. Default failureThreshold is 60 and + seconds of startup Probe. Default failureThreshold is 60 and the periodSeconds is 5, this means the startup will fail if the pod can't start in 300 seconds. Your StartupProbeFailureSeconds is the total time of seconds before startupProbe give up and fail the container start. If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold will - be rounded up + be rounded up. format: int32 type: integer storageVolumes: diff --git a/deploy/starrocks.com_starrocksclusters.yaml b/deploy/starrocks.com_starrocksclusters.yaml index 165fe3eb..8eba56ef 100644 --- a/deploy/starrocks.com_starrocksclusters.yaml +++ b/deploy/starrocks.com_starrocksclusters.yaml @@ -507,6 +507,9 @@ spec: pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true type: object + livenessProbeFailureSeconds: + format: int32 + type: integer nodeSelector: additionalProperties: type: string @@ -515,6 +518,9 @@ spec: additionalProperties: type: string type: object + readinessProbeFailureSeconds: + format: int32 + type: integer replicas: format: int32 minimum: 0 @@ -1410,6 +1416,9 @@ spec: pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true type: object + livenessProbeFailureSeconds: + format: int32 + type: integer nodeSelector: additionalProperties: type: string @@ -1418,6 +1427,9 @@ spec: additionalProperties: type: string type: object + readinessProbeFailureSeconds: + format: int32 + type: integer replicas: format: int32 minimum: 0 @@ -1903,6 +1915,9 @@ spec: pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true type: object + livenessProbeFailureSeconds: + format: int32 + type: integer nodeSelector: additionalProperties: type: string @@ -1911,6 +1926,9 @@ spec: additionalProperties: type: string type: object + readinessProbeFailureSeconds: + format: int32 + type: integer replicas: format: int32 minimum: 0 @@ -2464,6 +2482,9 @@ spec: pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true type: object + livenessProbeFailureSeconds: + format: int32 + type: integer nodeSelector: additionalProperties: type: string @@ -2472,6 +2493,9 @@ spec: additionalProperties: type: string type: object + readinessProbeFailureSeconds: + format: int32 + type: integer replicas: format: int32 minimum: 0 diff --git a/helm-charts/charts/kube-starrocks/charts/starrocks/templates/starrockscluster.yaml b/helm-charts/charts/kube-starrocks/charts/starrocks/templates/starrockscluster.yaml index 8eabcb74..cc670df1 100644 --- a/helm-charts/charts/kube-starrocks/charts/starrocks/templates/starrockscluster.yaml +++ b/helm-charts/charts/kube-starrocks/charts/starrocks/templates/starrockscluster.yaml @@ -106,6 +106,15 @@ spec: {{toYaml .Values.starrocksFESpec.tolerations | indent 4 }} {{- end }} terminationGracePeriodSeconds: {{ .Values.starrocksFESpec.terminationGracePeriodSeconds }} + {{- if .Values.starrocksFESpec.startupProbeFailureSeconds }} + startupProbeFailureSeconds: {{ .Values.starrocksFESpec.startupProbeFailureSeconds }} + {{- end }} + {{- if .Values.starrocksFESpec.livenessProbeFailureSeconds }} + livenessProbeFailureSeconds: {{ .Values.starrocksFESpec.livenessProbeFailureSeconds }} + {{- end }} + {{- if .Values.starrocksFESpec.readinessProbeFailureSeconds }} + readinessProbeFailureSeconds: {{ .Values.starrocksFESpec.readinessProbeFailureSeconds }} + {{- end }} {{- if .Values.starrocksFESpec.secrets }} secrets: {{- range .Values.starrocksFESpec.secrets }} @@ -243,6 +252,15 @@ spec: {{toYaml .Values.starrocksBeSpec.tolerations | indent 4 }} {{- end }} terminationGracePeriodSeconds: {{ .Values.starrocksBeSpec.terminationGracePeriodSeconds }} + {{- if .Values.starrocksBeSpec.startupProbeFailureSeconds }} + startupProbeFailureSeconds: {{ .Values.starrocksBeSpec.startupProbeFailureSeconds }} + {{- end }} + {{- if .Values.starrocksBeSpec.livenessProbeFailureSeconds }} + livenessProbeFailureSeconds: {{ .Values.starrocksBeSpec.livenessProbeFailureSeconds }} + {{- end }} + {{- if .Values.starrocksBeSpec.readinessProbeFailureSeconds }} + readinessProbeFailureSeconds: {{ .Values.starrocksBeSpec.readinessProbeFailureSeconds }} + {{- end }} {{- if .Values.starrocksBeSpec.secrets }} secrets: {{- range .Values.starrocksBeSpec.secrets }} @@ -336,6 +354,15 @@ spec: {{toYaml .Values.starrocksCnSpec.tolerations | indent 4 }} {{- end }} terminationGracePeriodSeconds: {{ .Values.starrocksCnSpec.terminationGracePeriodSeconds }} + {{- if .Values.starrocksCnSpec.startupProbeFailureSeconds }} + startupProbeFailureSeconds: {{ .Values.starrocksCnSpec.startupProbeFailureSeconds }} + {{- end }} + {{- if .Values.starrocksCnSpec.livenessProbeFailureSeconds }} + livenessProbeFailureSeconds: {{ .Values.starrocksCnSpec.livenessProbeFailureSeconds }} + {{- end }} + {{- if .Values.starrocksCnSpec.readinessProbeFailureSeconds }} + readinessProbeFailureSeconds: {{ .Values.starrocksCnSpec.readinessProbeFailureSeconds }} + {{- end }} {{- if and .Values.starrocksCluster.enabledCn .Values.starrocksCnSpec.autoScalingPolicy }} autoScalingPolicy: {{ toYaml .Values.starrocksCnSpec.autoScalingPolicy | indent 6 }} @@ -451,4 +478,10 @@ spec: tolerations: {{- toYaml .Values.starrocksFeProxySpec.tolerations | nindent 6 }} {{- end }} + {{- if .Values.starrocksFeProxySpec.livenessProbeFailureSeconds }} + livenessProbeFailureSeconds: {{ .Values.starrocksFeProxySpec.livenessProbeFailureSeconds }} + {{- end }} + {{- if .Values.starrocksFeProxySpec.readinessProbeFailureSeconds }} + readinessProbeFailureSeconds: {{ .Values.starrocksFeProxySpec.readinessProbeFailureSeconds }} + {{- end }} {{- end }} diff --git a/helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml b/helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml index 567a0500..84d5f81c 100644 --- a/helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml +++ b/helm-charts/charts/kube-starrocks/charts/starrocks/values.yaml @@ -181,8 +181,22 @@ starrocksFESpec: # mountPath: /etc/my-configmap # subPath: "" # terminationGracePeriodSeconds defines duration in seconds the FE pod needs to terminate gracefully. + # default value is 120 seconds terminationGracePeriodSeconds: 120 + # Please upgrade the CRD with v1.8.7 released version, if you want to use the following configuration. + # including: startupProbeFailureSeconds, livenessProbeFailureSeconds, readinessProbeFailureSeconds + + # StartupProbeFailureSeconds defines the total failure seconds of startup Probe. + # default value is 300 seconds + startupProbeFailureSeconds: + # LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe. + # default value is 15 seconds + livenessProbeFailureSeconds: + # ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe. + # default value is 15 seconds + readinessProbeFailureSeconds: + # spec for compute node, compute node provide compute function. starrocksCnSpec: # number of replicas to deploy for cn component. @@ -342,8 +356,22 @@ starrocksCnSpec: # mountPath: /etc/my-configmap # subPath: "" # terminationGracePeriodSeconds defines duration in seconds the CN pod needs to terminate gracefully. + # default value is 120 seconds terminationGracePeriodSeconds: 120 + # Please upgrade the CRD with v1.8.7 released version, if you want to use the following configuration. + # including: startupProbeFailureSeconds, livenessProbeFailureSeconds, readinessProbeFailureSeconds + + # StartupProbeFailureSeconds defines the total failure seconds of startup Probe. + # default value is 300 seconds + startupProbeFailureSeconds: + # LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe. + # default value is 15 seconds + livenessProbeFailureSeconds: + # ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe. + # default value is 15 seconds + readinessProbeFailureSeconds: + # spec for component be, provide storage and compute function. starrocksBeSpec: # number of replicas to deploy. @@ -467,8 +495,22 @@ starrocksBeSpec: # mountPath: /etc/my-configmap # subPath: "" # terminationGracePeriodSeconds defines duration in seconds the BE pod needs to terminate gracefully. + # default value is 120 seconds terminationGracePeriodSeconds: 120 + # Please upgrade the CRD with v1.8.7 released version, if you want to use the following configuration. + # including: startupProbeFailureSeconds, livenessProbeFailureSeconds, readinessProbeFailureSeconds + + # StartupProbeFailureSeconds defines the total failure seconds of startup Probe. + # default value is 300 seconds + startupProbeFailureSeconds: + # LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe. + # default value is 15 seconds + livenessProbeFailureSeconds: + # ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe. + # default value is 15 seconds + readinessProbeFailureSeconds: + # create secrets if necessary. secrets: [] # e.g. create my-secret @@ -542,3 +584,13 @@ starrocksFeProxySpec: # operator: "Equal|Exists" # value: "value" # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + + # Please upgrade the CRD with v1.8.7 released version, if you want to use the following configuration. + # including: livenessProbeFailureSeconds, readinessProbeFailureSeconds + + # LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe. + # default value is 15 seconds + livenessProbeFailureSeconds: + # ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe. + # default value is 15 seconds + readinessProbeFailureSeconds: diff --git a/pkg/apis/starrocks/v1/load_type.go b/pkg/apis/starrocks/v1/load_type.go index 31e674cc..11ef9098 100644 --- a/pkg/apis/starrocks/v1/load_type.go +++ b/pkg/apis/starrocks/v1/load_type.go @@ -14,6 +14,8 @@ type loadInterface interface { GetAffinity() *corev1.Affinity GetTolerations() []corev1.Toleration GetStartupProbeFailureSeconds() *int32 + GetLivenessProbeFailureSeconds() *int32 + GetReadinessProbeFailureSeconds() *int32 GetStorageVolumes() []StorageVolume GetServiceAccount() string @@ -85,14 +87,30 @@ type StarRocksLoadSpec struct { // +optional ConfigMapInfo ConfigMapInfo `json:"configMapInfo,omitempty"` - // StartupProbeFailureSeconds defines the total failure seconds of startupProbe. + // StartupProbeFailureSeconds defines the total failure seconds of startup Probe. // Default failureThreshold is 60 and the periodSeconds is 5, this means the startup // will fail if the pod can't start in 300 seconds. Your StartupProbeFailureSeconds is // the total time of seconds before startupProbe give up and fail the container start. // If startupProbeFailureSeconds can't be divided by defaultPeriodSeconds, the failureThreshold - // will be rounded up + // will be rounded up. // +optional StartupProbeFailureSeconds *int32 `json:"startupProbeFailureSeconds,omitempty"` + + // LivenessProbeFailureSeconds defines the total failure seconds of liveness Probe. + // Default failureThreshold is 3 and the periodSeconds is 5, this means the liveness + // will fail if the pod can't respond in 15 seconds. Your LivenessProbeFailureSeconds is + // the total time of seconds before the container restart. If LivenessProbeFailureSeconds + // can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up. + // +optional + LivenessProbeFailureSeconds *int32 `json:"livenessProbeFailureSeconds,omitempty"` + + // ReadinessProbeFailureSeconds defines the total failure seconds of readiness Probe. + // Default failureThreshold is 3 and the periodSeconds is 5, this means the readiness + // will fail if the pod can't respond in 15 seconds. Your ReadinessProbeFailureSeconds is + // the total time of seconds before pods becomes not ready. If ReadinessProbeFailureSeconds + // can't be divided by defaultPeriodSeconds, the failureThreshold will be rounded up. + // +optional + ReadinessProbeFailureSeconds *int32 `json:"readinessProbeFailureSeconds,omitempty"` } // StarRocksService defines external service for starrocks component. @@ -201,3 +219,11 @@ func (spec *StarRocksLoadSpec) GetSchedulerName() string { func (spec *StarRocksLoadSpec) GetStartupProbeFailureSeconds() *int32 { return spec.StartupProbeFailureSeconds } + +func (spec *StarRocksLoadSpec) GetLivenessProbeFailureSeconds() *int32 { + return spec.LivenessProbeFailureSeconds +} + +func (spec *StarRocksLoadSpec) GetReadinessProbeFailureSeconds() *int32 { + return spec.ReadinessProbeFailureSeconds +} diff --git a/pkg/apis/starrocks/v1/zz_generated.deepcopy.go b/pkg/apis/starrocks/v1/zz_generated.deepcopy.go index b31b4fa4..adcef60e 100644 --- a/pkg/apis/starrocks/v1/zz_generated.deepcopy.go +++ b/pkg/apis/starrocks/v1/zz_generated.deepcopy.go @@ -840,6 +840,16 @@ func (in *StarRocksLoadSpec) DeepCopyInto(out *StarRocksLoadSpec) { *out = new(int32) **out = **in } + if in.LivenessProbeFailureSeconds != nil { + in, out := &in.LivenessProbeFailureSeconds, &out.LivenessProbeFailureSeconds + *out = new(int32) + **out = **in + } + if in.ReadinessProbeFailureSeconds != nil { + in, out := &in.ReadinessProbeFailureSeconds, &out.ReadinessProbeFailureSeconds + *out = new(int32) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StarRocksLoadSpec. diff --git a/pkg/k8sutils/templates/pod/spec.go b/pkg/k8sutils/templates/pod/spec.go index 1d33c4f1..40e68304 100644 --- a/pkg/k8sutils/templates/pod/spec.go +++ b/pkg/k8sutils/templates/pod/spec.go @@ -38,18 +38,18 @@ func StartupProbe(startupProbeFailureSeconds *int32, port int32, path string) *c return completeProbe(startupProbeFailureSeconds, defaultFailureThreshold, defaultPeriodSeconds, getProbe(port, path)) } -// LivenessProbe returns a liveness. -func LivenessProbe(port int32, path string) *corev1.Probe { +// LivenessProbe returns a liveness probe. +func LivenessProbe(livenessProbeFailureSeconds *int32, port int32, path string) *corev1.Probe { var defaultFailureThreshold int32 = 3 var defaultPeriodSeconds int32 = 5 - return completeProbe(nil, defaultFailureThreshold, defaultPeriodSeconds, getProbe(port, path)) + return completeProbe(livenessProbeFailureSeconds, defaultFailureThreshold, defaultPeriodSeconds, getProbe(port, path)) } // ReadinessProbe returns a readiness probe. -func ReadinessProbe(port int32, path string) *corev1.Probe { +func ReadinessProbe(readinessProbeFailureSeconds *int32, port int32, path string) *corev1.Probe { var defaultFailureThreshold int32 = 3 var defaultPeriodSeconds int32 = 5 - return completeProbe(nil, defaultFailureThreshold, defaultPeriodSeconds, getProbe(port, path)) + return completeProbe(readinessProbeFailureSeconds, defaultFailureThreshold, defaultPeriodSeconds, getProbe(port, path)) } // LifeCycle returns a lifecycle. diff --git a/pkg/k8sutils/templates/pod/spec_test.go b/pkg/k8sutils/templates/pod/spec_test.go index 85df711d..09c51829 100644 --- a/pkg/k8sutils/templates/pod/spec_test.go +++ b/pkg/k8sutils/templates/pod/spec_test.go @@ -29,8 +29,9 @@ import ( func TestMakeLivenessProbe(t *testing.T) { type args struct { - port int32 - path string + seconds *int32 + port int32 + path string } tests := []struct { name string @@ -38,7 +39,7 @@ func TestMakeLivenessProbe(t *testing.T) { want *corev1.Probe }{ { - name: "test", + name: "liveness probe with default seconds", args: args{ port: 8080, path: "/api/health2", @@ -49,10 +50,23 @@ func TestMakeLivenessProbe(t *testing.T) { ProbeHandler: getProbe(8080, "/api/health2"), }, }, + { + name: "liveness probe with specified seconds", + args: args{ + seconds: func() *int32 { s := int32(50); return &s }(), + port: 8080, + path: "/api/health2", + }, + want: &corev1.Probe{ + PeriodSeconds: 5, + FailureThreshold: 10, + ProbeHandler: getProbe(8080, "/api/health2"), + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := LivenessProbe(tt.args.port, tt.args.path); !reflect.DeepEqual(got, tt.want) { + if got := LivenessProbe(tt.args.seconds, tt.args.port, tt.args.path); !reflect.DeepEqual(got, tt.want) { t.Errorf("LivenessProbe() = %v, want %v", got, tt.want) } }) @@ -61,8 +75,9 @@ func TestMakeLivenessProbe(t *testing.T) { func TestMakeReadinessProbe(t *testing.T) { type args struct { - port int32 - path string + seconds *int32 + port int32 + path string } tests := []struct { name string @@ -70,7 +85,7 @@ func TestMakeReadinessProbe(t *testing.T) { want *corev1.Probe }{ { - name: "test", + name: "readiness probe with default seconds", args: args{ port: 8080, path: "/api/health2", @@ -81,10 +96,23 @@ func TestMakeReadinessProbe(t *testing.T) { ProbeHandler: getProbe(8080, "/api/health2"), }, }, + { + name: "readiness probe with specified seconds", + args: args{ + seconds: func() *int32 { s := int32(50); return &s }(), + port: 8080, + path: "/api/health2", + }, + want: &corev1.Probe{ + PeriodSeconds: 5, + FailureThreshold: 10, + ProbeHandler: getProbe(8080, "/api/health2"), + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := ReadinessProbe(tt.args.port, tt.args.path); !reflect.DeepEqual(got, tt.want) { + if got := ReadinessProbe(tt.args.seconds, tt.args.port, tt.args.path); !reflect.DeepEqual(got, tt.want) { t.Errorf("ReadinessProbe() = %v, want %v", got, tt.want) } }) diff --git a/pkg/sub_controller/be/be_pod.go b/pkg/sub_controller/be/be_pod.go index dfbf38ff..819a5f10 100644 --- a/pkg/sub_controller/be/be_pod.go +++ b/pkg/sub_controller/be/be_pod.go @@ -88,8 +88,8 @@ func (be *BeController) buildPodTemplate(src *srapi.StarRocksCluster, config map ImagePullPolicy: corev1.PullIfNotPresent, VolumeMounts: volumeMounts, StartupProbe: pod.StartupProbe(beSpec.GetStartupProbeFailureSeconds(), webServerPort, pod.HEALTH_API_PATH), - LivenessProbe: pod.LivenessProbe(webServerPort, pod.HEALTH_API_PATH), - ReadinessProbe: pod.ReadinessProbe(webServerPort, pod.HEALTH_API_PATH), + LivenessProbe: pod.LivenessProbe(beSpec.GetLivenessProbeFailureSeconds(), webServerPort, pod.HEALTH_API_PATH), + ReadinessProbe: pod.ReadinessProbe(beSpec.GetReadinessProbeFailureSeconds(), webServerPort, pod.HEALTH_API_PATH), Lifecycle: pod.LifeCycle("/opt/starrocks/be_prestop.sh"), SecurityContext: pod.ContainerSecurityContext(beSpec), } diff --git a/pkg/sub_controller/cn/cn_pod.go b/pkg/sub_controller/cn/cn_pod.go index 9df84831..2f79f481 100644 --- a/pkg/sub_controller/cn/cn_pod.go +++ b/pkg/sub_controller/cn/cn_pod.go @@ -71,8 +71,8 @@ func (cc *CnController) buildPodTemplate(src *srapi.StarRocksCluster, config map ImagePullPolicy: corev1.PullIfNotPresent, VolumeMounts: volumeMounts, StartupProbe: pod.StartupProbe(cnSpec.GetStartupProbeFailureSeconds(), webServerPort, pod.HEALTH_API_PATH), - LivenessProbe: pod.LivenessProbe(webServerPort, pod.HEALTH_API_PATH), - ReadinessProbe: pod.ReadinessProbe(webServerPort, pod.HEALTH_API_PATH), + LivenessProbe: pod.LivenessProbe(cnSpec.GetLivenessProbeFailureSeconds(), webServerPort, pod.HEALTH_API_PATH), + ReadinessProbe: pod.ReadinessProbe(cnSpec.GetReadinessProbeFailureSeconds(), webServerPort, pod.HEALTH_API_PATH), Lifecycle: pod.LifeCycle("/opt/starrocks/cn_prestop.sh"), SecurityContext: pod.ContainerSecurityContext(cnSpec), } diff --git a/pkg/sub_controller/fe/fe_pod.go b/pkg/sub_controller/fe/fe_pod.go index 0c6c0c74..7ff93287 100644 --- a/pkg/sub_controller/fe/fe_pod.go +++ b/pkg/sub_controller/fe/fe_pod.go @@ -87,8 +87,8 @@ func (fc *FeController) buildPodTemplate(src *srapi.StarRocksCluster, config map VolumeMounts: volMounts, ImagePullPolicy: corev1.PullIfNotPresent, StartupProbe: pod.StartupProbe(feSpec.GetStartupProbeFailureSeconds(), httpPort, pod.HEALTH_API_PATH), - LivenessProbe: pod.LivenessProbe(httpPort, pod.HEALTH_API_PATH), - ReadinessProbe: pod.ReadinessProbe(httpPort, pod.HEALTH_API_PATH), + LivenessProbe: pod.LivenessProbe(feSpec.GetLivenessProbeFailureSeconds(), httpPort, pod.HEALTH_API_PATH), + ReadinessProbe: pod.ReadinessProbe(feSpec.GetReadinessProbeFailureSeconds(), httpPort, pod.HEALTH_API_PATH), Lifecycle: pod.LifeCycle("/opt/starrocks/fe_prestop.sh"), SecurityContext: pod.ContainerSecurityContext(feSpec), } diff --git a/pkg/sub_controller/feproxy/feproxy_controller.go b/pkg/sub_controller/feproxy/feproxy_controller.go index 3e252f95..9b0293e6 100644 --- a/pkg/sub_controller/feproxy/feproxy_controller.go +++ b/pkg/sub_controller/feproxy/feproxy_controller.go @@ -200,8 +200,8 @@ func (controller *FeProxyController) buildPodTemplate(src *srapi.StarRocksCluste Resources: feProxySpec.ResourceRequirements, ImagePullPolicy: corev1.PullIfNotPresent, VolumeMounts: volumeMounts, - LivenessProbe: pod.LivenessProbe(port, "/nginx/health"), - ReadinessProbe: pod.ReadinessProbe(port, "/nginx/health"), + LivenessProbe: pod.LivenessProbe(feProxySpec.GetLivenessProbeFailureSeconds(), port, "/nginx/health"), + ReadinessProbe: pod.ReadinessProbe(feProxySpec.GetReadinessProbeFailureSeconds(), port, "/nginx/health"), SecurityContext: pod.ContainerSecurityContext(feProxySpec), }