Skip to content

Commit

Permalink
Replace grpc_health_probe with the built-in gRPC container probe feature
Browse files Browse the repository at this point in the history
Signed-off-by: Yuki Iwai <[email protected]>
  • Loading branch information
tenzen-y committed Aug 2, 2023
1 parent e69235d commit db8f9ed
Show file tree
Hide file tree
Showing 13 changed files with 18 additions and 108 deletions.
6 changes: 0 additions & 6 deletions cmd/db-manager/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
FROM golang:alpine AS build-env

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

WORKDIR /go/src/github.com/kubeflow/katib

Expand All @@ -18,13 +17,8 @@ COPY pkg/ pkg/
# Build the binary.
RUN CGO_ENABLED=0 GOOS=linux GOARCH="${TARGETARCH}" go build -a -o katib-db-manager ./cmd/db-manager/v1beta1

# Add GRPC health probe.
RUN wget -qO /bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

# Copy the db-manager into a thin image.
FROM alpine:3.15
WORKDIR /app
COPY --from=build-env /bin/grpc_health_probe /bin/
COPY --from=build-env /go/src/github.com/kubeflow/katib/katib-db-manager /app/
ENTRYPOINT ["./katib-db-manager"]
6 changes: 0 additions & 6 deletions cmd/suggestion/goptuna/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
FROM golang:alpine AS build-env

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

WORKDIR /go/src/github.com/kubeflow/katib

Expand All @@ -18,18 +17,13 @@ COPY pkg/ pkg/
# Build the binary.
RUN CGO_ENABLED=0 GOOS=linux GOARCH=${TARGETARCH} go build -a -o goptuna-suggestion ./cmd/suggestion/goptuna/v1beta1

# Add GRPC health probe.
RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

# Copy the Goptuna suggestion into a thin image.
FROM alpine:3.15

ENV TARGET_DIR /opt/katib

WORKDIR ${TARGET_DIR}

COPY --from=build-env /bin/grpc_health_probe /bin/
COPY --from=build-env /go/src/github.com/kubeflow/katib/goptuna-suggestion ${TARGET_DIR}/

RUN chgrp -R 0 ${TARGET_DIR} \
Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/hyperband/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ] || [ "${TARGETARCH}" = "arm64" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/hyperopt/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/nas/darts/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 as downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
10 changes: 0 additions & 10 deletions cmd/suggestion/nas/enas/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
ENV TARGET_DIR /opt/katib
ENV SUGGESTION_DIR cmd/suggestion/nas/enas/v1beta1
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15
ENV PYTHONPATH ${TARGET_DIR}:${TARGET_DIR}/pkg/apis/manager/v1beta1/python:${TARGET_DIR}/pkg/apis/manager/health/python

RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \
Expand All @@ -23,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/optuna/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO /bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/pbt/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO /bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
9 changes: 0 additions & 9 deletions cmd/suggestion/skopt/v1beta1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
FROM alpine:3.15 AS downloader

ARG TARGETARCH
ENV GRPC_HEALTH_PROBE_VERSION v0.4.15

RUN wget -qO /bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-${TARGETARCH} \
&& chmod +x /bin/grpc_health_probe

FROM python:3.10-slim

ARG TARGETARCH
Expand All @@ -22,7 +14,6 @@ RUN if [ "${TARGETARCH}" = "ppc64le" ]; then \

ADD ./pkg/ ${TARGET_DIR}/pkg/
ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/
COPY --from=downloader /bin/grpc_health_probe /bin/grpc_health_probe

WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR}

Expand Down
4 changes: 2 additions & 2 deletions manifests/v1beta1/components/db-manager/db-manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ spec:
- name: api
containerPort: 6789
livenessProbe:
exec:
command: ["/bin/grpc_health_probe", "-addr=:6789"]
grpc:
port: 6789
initialDelaySeconds: 10
periodSeconds: 60
failureThreshold: 5
8 changes: 4 additions & 4 deletions pkg/controller.v1beta1/consts/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ const (
// DefaultEarlyStoppingPort is the default port of EarlyStopping service.
DefaultEarlyStoppingPort = 6788

// DefaultGRPCService is the default suggestion service name,
// which is used to run healthz check using grpc probe.
DefaultGRPCService = "manager.v1beta1.Suggestion"

// DefaultGRPCRetryAttempts is the the maximum number of retries for gRPC calls
DefaultGRPCRetryAttempts = 10
// DefaultGRPCRetryPeriod is a fixed period of time between gRPC call retries
Expand Down Expand Up @@ -171,6 +167,10 @@ var (
// DefaultKatibDBManagerServicePort is the default Port of Katib DB Manager
DefaultKatibDBManagerServicePort = env.GetEnvOrDefault(DefaultKatibDBManagerServicePortEnvName, "6789")

// DefaultGRPCService is the default suggestion service name,
// which is used to run healthz check using grpc probe.
DefaultGRPCService = "manager.v1beta1.Suggestion"

// List of all valid keys of trial metadata for substitution in Trial template
TrialTemplateMetaKeys = []string{
TrialTemplateMetaKeyOfName,
Expand Down
20 changes: 6 additions & 14 deletions pkg/controller.v1beta1/suggestion/composer/composer.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ const (
defaultPeriodForReady = 10
defaultPeriodForLive = 120
defaultFailureThreshold = 12
// Ref https://github.com/grpc-ecosystem/grpc-health-probe/
defaultGRPCHealthCheckProbe = "/bin/grpc_health_probe"
)

var (
Expand Down Expand Up @@ -210,12 +208,9 @@ func (g *General) desiredContainers(s *suggestionsv1beta1.Suggestion,
if viper.GetBool(consts.ConfigEnableGRPCProbeInSuggestion) && suggestionContainer.ReadinessProbe == nil {
suggestionContainer.ReadinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
defaultGRPCHealthCheckProbe,
fmt.Sprintf("-addr=:%d", consts.DefaultSuggestionPort),
fmt.Sprintf("-service=%s", consts.DefaultGRPCService),
},
GRPC: &corev1.GRPCAction{
Port: consts.DefaultSuggestionPort,
Service: &consts.DefaultGRPCService,
},
},
InitialDelaySeconds: defaultInitialDelaySeconds,
Expand All @@ -225,12 +220,9 @@ func (g *General) desiredContainers(s *suggestionsv1beta1.Suggestion,
if viper.GetBool(consts.ConfigEnableGRPCProbeInSuggestion) && suggestionContainer.LivenessProbe == nil {
suggestionContainer.LivenessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
defaultGRPCHealthCheckProbe,
fmt.Sprintf("-addr=:%d", consts.DefaultSuggestionPort),
fmt.Sprintf("-service=%s", consts.DefaultGRPCService),
},
GRPC: &corev1.GRPCAction{
Port: consts.DefaultSuggestionPort,
Service: &consts.DefaultGRPCService,
},
},
// Ref https://srcco.de/posts/kubernetes-liveness-probes-are-dangerous.html
Expand Down
18 changes: 6 additions & 12 deletions pkg/controller.v1beta1/suggestion/composer/composer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,25 +817,19 @@ func newFakeContainers() []corev1.Container {
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
defaultGRPCHealthCheckProbe,
fmt.Sprintf("-addr=:%d", consts.DefaultSuggestionPort),
fmt.Sprintf("-service=%s", consts.DefaultGRPCService),
},
GRPC: &corev1.GRPCAction{
Port: consts.DefaultSuggestionPort,
Service: &consts.DefaultGRPCService,
},
},
InitialDelaySeconds: defaultInitialDelaySeconds,
PeriodSeconds: defaultPeriodForReady,
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
defaultGRPCHealthCheckProbe,
fmt.Sprintf("-addr=:%d", consts.DefaultSuggestionPort),
fmt.Sprintf("-service=%s", consts.DefaultGRPCService),
},
GRPC: &corev1.GRPCAction{
Port: consts.DefaultSuggestionPort,
Service: &consts.DefaultGRPCService,
},
},
InitialDelaySeconds: defaultInitialDelaySeconds,
Expand Down

0 comments on commit db8f9ed

Please sign in to comment.