From e017a1133bbdf2bed986a6155dd64aece1d456bb Mon Sep 17 00:00:00 2001 From: Alon Maor <48641682+alonmr@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:36:25 +0200 Subject: [PATCH] [MLRun] Alerts deployment and service (#1041) --- stable/mlrun/Chart.yaml | 4 +- stable/mlrun/templates/_helpers.tpl | 39 ++++ stable/mlrun/templates/alerts-deployment.yaml | 205 ++++++++++++++++++ stable/mlrun/templates/alerts-service.yaml | 38 ++++ .../mlrun/templates/api-chief-deployment.yaml | 6 + stable/mlrun/templates/api-chief-ingress.yaml | 2 +- stable/mlrun/templates/api-chief-service.yaml | 2 +- stable/mlrun/templates/api-service.yaml | 6 +- .../templates/api-worker-deployment.yaml | 10 +- stable/mlrun/values.yaml | 45 ++++ 10 files changed, 350 insertions(+), 7 deletions(-) create mode 100644 stable/mlrun/templates/alerts-deployment.yaml create mode 100644 stable/mlrun/templates/alerts-service.yaml diff --git a/stable/mlrun/Chart.yaml b/stable/mlrun/Chart.yaml index 3eed6116e..08bf679f5 100644 --- a/stable/mlrun/Chart.yaml +++ b/stable/mlrun/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 name: mlrun -version: 0.9.28 -appVersion: 1.6.4 +version: 0.10.0 +appVersion: 1.7.0 description: Machine Learning automation and tracking sources: - https://github.com/mlrun/mlrun diff --git a/stable/mlrun/templates/_helpers.tpl b/stable/mlrun/templates/_helpers.tpl index c215fe81f..1e1aed59e 100644 --- a/stable/mlrun/templates/_helpers.tpl +++ b/stable/mlrun/templates/_helpers.tpl @@ -66,6 +66,18 @@ We truncate at 63 chars because some Kubernetes name fields are limited to this {{- end -}} {{- end -}} +{{/* +Create a fully qualified alerts service name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "mlrun.api.microservices.alerts.fullname" -}} +{{- if .Values.api.microservices.alerts.fullnameOverride -}} +{{- .Values.api.microservices.alerts.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-alerts" (include "mlrun.fullname" .) | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + {{/* Create a fully qualified api opa name. @@ -277,6 +289,14 @@ API worker labels {{ include "mlrun.api.worker.selectorLabels" . }} {{- end -}} +{{/* +Alerts service labels +*/}} +{{- define "mlrun.api.microservices.alerts.labels" -}} +{{ include "mlrun.common.labels" . }} +{{ include "mlrun.api.microservices.alerts.selectorLabels" . }} +{{- end -}} + {{/* API selector labels */}} @@ -301,6 +321,25 @@ API worker selector labels app.kubernetes.io/sub-component: "worker" {{- end -}} +{{/* +Alerts service selector labels +*/}} +{{- define "mlrun.api.microservices.alerts.selectorLabels" -}} +{{ include "mlrun.api.selectorLabels" . }} +app.kubernetes.io/sub-component: "alerts" +{{- end -}} + +{{/* +Worker replicas +*/}} +{{- define "mlrun.api.worker.minReplicas" -}} +{{- if .Values.api.microservices.enabled -}} +{{ coalesce .Values.api.worker.minReplicas 1 }} +{{- else -}} +{{- .Values.api.worker.minReplicas -}} +{{- end -}} +{{- end -}} + {{/* DB labels */}} diff --git a/stable/mlrun/templates/alerts-deployment.yaml b/stable/mlrun/templates/alerts-deployment.yaml new file mode 100644 index 000000000..1c0fff8bd --- /dev/null +++ b/stable/mlrun/templates/alerts-deployment.yaml @@ -0,0 +1,205 @@ +{{ if .Values.api.microservices.enabled}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "mlrun.api.microservices.alerts.fullname" . }} + labels: + {{- include "mlrun.api.microservices.alerts.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.api.microservices.alerts.minReplicas }} + strategy: + type: Recreate + selector: + matchLabels: + {{- include "mlrun.api.microservices.alerts.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "mlrun.api.microservices.alerts.selectorLabels" . | nindent 8 }} + annotations: + kubectl.kubernetes.io/default-container: {{ template "mlrun.name" . }}-{{ .Values.api.name }} + spec: + {{- with .Values.api.image.pullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.serviceAccounts.serviceAccountName}} + serviceAccountName: {{ .Values.serviceAccounts.serviceAccountName }} + {{- else }} + serviceAccountName: {{ include "mlrun.serviceAccountName.api" . }} + {{- end }} + securityContext: + {{- toYaml .Values.api.podSecurityContext | nindent 8 }} + {{- if .Values.api.extraInitContainers }} + initContainers: + {{- toYaml .Values.api.extraInitContainers | nindent 8 }} + {{- end }} + containers: + - name: {{ template "mlrun.name" . }}-{{ .Values.api.name }} + securityContext: + {{- toYaml .Values.api.securityContext | nindent 12 }} + image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag }}" + imagePullPolicy: {{ .Values.api.image.pullPolicy }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + env: + - name: MLRUN_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: MLRUN_SERVICES__SERVICE_NAME + value: alerts + - name: MLRUN_HTTPDB__CLUSTERIZATION__ROLE + value: "worker" + - name: MLRUN_LOG_LEVEL + value: {{ .Values.api.logLevel }} + - name: MLRUN_HTTPDB__DB_TYPE + {{- if or (eq .Values.httpDB.dbType "mysql") (eq .Values.httpDB.dbType "sqlite") }} + value: "sqldb" + {{- else }} + value: "filedb" + {{- end }} + - name: MLRUN_HTTPDB__API_URL + value: http://{{ include "mlrun.api.fullname" . }}:{{ .Values.api.service.port }} + - name: MLRUN_HTTPDB__DIRPATH + value: {{ .Values.httpDB.dirPath }} + - name: MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY + value: {{ template "mlrun.defaultDockerRegistry.url" . }} + {{- if or .Values.defaultDockerRegistrySecretName .Values.global.registry.secretName }} + - name: MLRUN_HTTPDB__BUILDER__DOCKER_REGISTRY_SECRET + value: {{ template "mlrun.defaultDockerRegistry.builderSecretName" . }} + {{- end }} + {{- if or .Values.api.function.spec.image_pull_secret.default .Values.global.registry.secretName }} + - name: MLRUN_FUNCTION__SPEC__IMAGE_PULL_SECRET__DEFAULT + value: {{ template "mlrun.defaultDockerRegistry.imagePullSecretName" . }} + {{- end }} + - name: MLRUN_HTTPDB__DSN + value: {{ .Values.httpDB.dsn }} + - name: MLRUN_HTTPDB__OLD_DSN + value: {{ .Values.httpDB.oldDsn }} + {{- if .Values.v3io.enabled }} + - name: V3IO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-v3io-fuse + key: accessKey + - name: V3IO_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-v3io-fuse + key: username + {{- end }} + - name: MLRUN_NUCLIO_MODE + value: {{ .Values.nuclio.mode }} + {{- if eq .Values.nuclio.mode "enabled" }} + - name: MLRUN_NUCLIO_DASHBOARD_URL + value: {{ template "mlrun.nuclio.apiURL" . }} + {{- end }} + {{- if .Values.api.extraEnv }} + {{- range .Values.api.extraEnv }} + {{- if not (hasKey $.Values.api.extraEnvKeyValue .name) }} + - name: {{ .name }} + value: {{ .value }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.api.extraEnvKeyValue }} + {{- range $name, $value := .Values.api.extraEnvKeyValue }} + - name: {{ $name }} + value: {{ $value | quote }} + {{- end }} + {{- end }} + {{- if .Values.api.envFrom }} + envFrom: + {{ toYaml .Values.api.envFrom | nindent 10 }} + {{- end }} + {{- if .Values.api.microservices.alerts.livenessProbe }} + livenessProbe: + {{ toYaml .Values.api.microservices.alerts.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.api.microservices.alerts.readinessProbe }} + readinessProbe: + {{ toYaml .Values.api.microservices.alerts.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.api.resources | nindent 12 }} + volumeMounts: + {{- range .Values.api.extraPersistentVolumeMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath | default "" }} + readOnly: {{ .readOnly }} + {{- end }} + {{- if .Values.api.opa.enabled }} + - name: {{ template "mlrun.name" . }}-{{ .Values.api.opa.name }} + securityContext: + {{- toYaml .Values.api.opa.securityContext | nindent 12 }} + image: "{{ .Values.api.opa.image.repository }}:{{ .Values.api.opa.image.tag }}" + imagePullPolicy: {{ .Values.api.opa.image.pullPolicy }} + ports: + - name: http + containerPort: 8181 + {{- if .Values.api.opa.livenessProbe }} + livenessProbe: + {{ toYaml .Values.api.opa.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.api.opa.readinessProbe }} + readinessProbe: + {{ toYaml .Values.api.opa.readinessProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.api.opa.resources | nindent 12 }} + args: + - "run" + - "--server" + - "--config-file=/config/config.yaml" + - "--log-level={{ .Values.api.opa.logLevel }}" + - "--log-format={{ .Values.api.opa.logFormat }}" + - "--ignore=.*" + volumeMounts: + - readOnly: true + mountPath: /config + name: config + {{- end }} + volumes: + {{- range .Values.api.extraPersistentVolumeMounts }} + - name: {{ .name }} + persistentVolumeClaim: + claimName: {{ .existingClaim }} + {{- end }} + {{- if .Values.api.opa.enabled }} + - name: config + secret: + secretName: {{ template "mlrun.api.opa.fullname" . }} + {{- end }} + {{- with .Values.api.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.api.microservices.alerts.affinity }} + affinity: + {{- toYaml .Values.api.microservices.alerts.affinity | nindent 8 }} + {{- else if .Values.api.affinity }} + affinity: + {{- toYaml .Values.api.affinity | nindent 8 }} + {{- else }} + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "mlrun.api.selectorLabels" . | nindent 20 }} + topologyKey: "kubernetes.io/hostname" + {{- end }} + {{- with .Values.api.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.api.priorityClassName }} + priorityClassName: {{ .Values.api.priorityClassName | quote }} + {{- end }} +{{- end }} diff --git a/stable/mlrun/templates/alerts-service.yaml b/stable/mlrun/templates/alerts-service.yaml new file mode 100644 index 000000000..be4e6aeba --- /dev/null +++ b/stable/mlrun/templates/alerts-service.yaml @@ -0,0 +1,38 @@ +{{ if .Values.api.microservices.enabled}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "mlrun.api.microservices.alerts.fullname" . }} +spec: +{{- if (or (eq .Values.api.microservices.alerts.service.type "ClusterIP") (empty .Values.api.microservices.alerts.service.type)) }} + type: ClusterIP + {{- if .Values.api.microservices.alerts.service.clusterIP }} + clusterIP: {{ .Values.api.microservices.alerts.service.clusterIP }} + {{end}} +{{- else if eq .Values.api.microservices.alerts.service.type "LoadBalancer" }} + type: {{ .Values.api.microservices.alerts.service.type }} + {{- if .Values.api.microservices.alerts.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.api.microservices.alerts.service.loadBalancerIP }} + {{- end }} + {{- if .Values.api.microservices.alerts.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml .Values.api.microservices.alerts.service.loadBalancerSourceRanges | indent 4 }} + {{- end -}} +{{- else }} + type: {{ .Values.api.microservices.alerts.service.type }} +{{- end }} +{{- if .Values.api.microservices.alerts.service.externalIPs }} + externalIPs: +{{ toYaml .Values.api.microservices.alerts.service.externalIPs | indent 4 }} +{{- end }} + ports: + - name: http + port: {{ .Values.api.microservices.alerts.service.port }} + protocol: TCP + targetPort: http +{{ if (and (eq .Values.api.microservices.alerts.service.type "NodePort") (not (empty .Values.api.microservices.alerts.service.nodePort))) }} + nodePort: {{.Values.api.microservices.alerts.service.nodePort}} +{{ end }} + selector: + {{- include "mlrun.api.microservices.alerts.selectorLabels" . | nindent 4 }} +{{- end -}} diff --git a/stable/mlrun/templates/api-chief-deployment.yaml b/stable/mlrun/templates/api-chief-deployment.yaml index 45f65af7f..5a0512278 100644 --- a/stable/mlrun/templates/api-chief-deployment.yaml +++ b/stable/mlrun/templates/api-chief-deployment.yaml @@ -54,6 +54,12 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + - name: MLRUN_SERVICES__SERVICE_NAME + value: api + {{- if .Values.api.microservices.enabled }} + - name: MLRUN_SERVICES__HYDRA__SERVICES + value: "" + {{- end }} - name: MLRUN_HTTPDB__CLUSTERIZATION__ROLE value: "chief" - name: MLRUN_LOG_LEVEL diff --git a/stable/mlrun/templates/api-chief-ingress.yaml b/stable/mlrun/templates/api-chief-ingress.yaml index f47ba82d0..1ded67df7 100644 --- a/stable/mlrun/templates/api-chief-ingress.yaml +++ b/stable/mlrun/templates/api-chief-ingress.yaml @@ -1,4 +1,4 @@ -{{- if .Values.api.worker.minReplicas -}} +{{- if "mlrun.api.worker.minReplicas" -}} {{- if semverCompare ">=1.1.0-X" .Values.api.image.tag -}} {{- if .Values.api.chief.ingress.enabled -}} {{- $fullName := include "mlrun.api.chief.fullname" . -}} diff --git a/stable/mlrun/templates/api-chief-service.yaml b/stable/mlrun/templates/api-chief-service.yaml index 580d6ccb8..0eb090369 100644 --- a/stable/mlrun/templates/api-chief-service.yaml +++ b/stable/mlrun/templates/api-chief-service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.api.worker.minReplicas -}} +{{- if "mlrun.api.worker.minReplicas" -}} {{- if semverCompare ">=1.1.0-X" .Values.api.image.tag -}} apiVersion: v1 kind: Service diff --git a/stable/mlrun/templates/api-service.yaml b/stable/mlrun/templates/api-service.yaml index 2a4150c85..d8e5c99d4 100644 --- a/stable/mlrun/templates/api-service.yaml +++ b/stable/mlrun/templates/api-service.yaml @@ -35,7 +35,11 @@ spec: nodePort: {{.Values.api.service.nodePort}} {{ end }} selector: - {{- include "mlrun.api.selectorLabels" . | nindent 4 }} + {{- if .Values.api.microservices.enabled}} + {{- include "mlrun.api.worker.selectorLabels" . | nindent 4 }} + {{- else }} + {{- include "mlrun.api.selectorLabels" . | nindent 4 }} + {{- end }} {{- with .Values.api.service.selectorLabels }} {{- toYaml . | nindent 4 }} {{- end }} diff --git a/stable/mlrun/templates/api-worker-deployment.yaml b/stable/mlrun/templates/api-worker-deployment.yaml index abad504c6..9c1a3e218 100644 --- a/stable/mlrun/templates/api-worker-deployment.yaml +++ b/stable/mlrun/templates/api-worker-deployment.yaml @@ -1,4 +1,4 @@ -{{- if .Values.api.worker.minReplicas -}} +{{- if "mlrun.api.worker.minReplicas" -}} {{- if semverCompare ">=1.1.0-X" .Values.api.image.tag -}} apiVersion: apps/v1 kind: Deployment @@ -7,7 +7,7 @@ metadata: labels: {{- include "mlrun.api.worker.labels" . | nindent 4 }} spec: - replicas: {{ .Values.api.worker.minReplicas }} + replicas: {{ include "mlrun.api.worker.minReplicas" . }} selector: matchLabels: {{- include "mlrun.api.worker.selectorLabels" . | nindent 6 }} @@ -48,6 +48,12 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + - name: MLRUN_SERVICES__SERVICE_NAME + value: api + {{- if .Values.api.microservices.enabled }} + - name: MLRUN_SERVICES__HYDRA__SERVICES + value: "" + {{- end }} - name: MLRUN_HTTPDB__CLUSTERIZATION__ROLE value: "worker" - name: MLRUN_LOG_LEVEL diff --git a/stable/mlrun/values.yaml b/stable/mlrun/values.yaml index 002051252..2d115a395 100644 --- a/stable/mlrun/values.yaml +++ b/stable/mlrun/values.yaml @@ -5,6 +5,51 @@ fullnameOverride: api: name: api fullnameOverride: + microservices: + enabled: false + alerts: + fullnameOverride: + # auto-scaling is currently not supported and the min value is treated as the fixed value + minReplicas: 1 + + service: + type: ClusterIP + port: 8080 + targetPort: 8080 + + ## Affinity for pod assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + ## + # Setting the affinity here has precedence over api.affinity + # Note that by default we're assigning affinity to make the services pods to run on different nodes, if you're + # overriding the affinity it's your responsibility to keep applying this behavior + affinity: {} + + startupProbe: + httpGet: + path: /alerts/v1/healthz + port: http + + timeoutSeconds: 10 + periodSeconds: 15 + failureThreshold: 40 + + readinessProbe: + httpGet: + path: /alerts/v1/healthz + port: http + timeoutSeconds: 30 + periodSeconds: 45 + failureThreshold: 3 + + livenessProbe: + httpGet: + path: /alerts/v1/healthz + port: http + + timeoutSeconds: 60 + periodSeconds: 30 + failureThreshold: 30 persistence: enabled: false