From 7af652043224f5db6a54e566775dcb68474695cb Mon Sep 17 00:00:00 2001 From: vraiyani Date: Thu, 9 Nov 2023 15:26:45 -0800 Subject: [PATCH 1/2] Add service monitor for flyte admin and propeller service Signed-off-by: vraiyani --- charts/flyte-core/README.md | 8 +++++++ .../templates/admin/service-monitor.yaml | 19 +++++++++++++++ .../templates/propeller/service-monitor.yaml | 19 +++++++++++++++ .../templates/propeller/service.yaml | 15 ++++++++++++ charts/flyte-core/values-eks.yaml | 7 ++++++ charts/flyte-core/values-gcp.yaml | 6 +++++ ...loak-idp-flyteclients-without-browser.yaml | 2 ++ charts/flyte-core/values-sandbox.yaml | 2 ++ charts/flyte-core/values.yaml | 23 ++++++++++++++++++- 9 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 charts/flyte-core/templates/admin/service-monitor.yaml create mode 100644 charts/flyte-core/templates/propeller/service-monitor.yaml create mode 100644 charts/flyte-core/templates/propeller/service.yaml diff --git a/charts/flyte-core/README.md b/charts/flyte-core/README.md index 4610da1009..09f00f9001 100644 --- a/charts/flyte-core/README.md +++ b/charts/flyte-core/README.md @@ -176,6 +176,9 @@ helm install gateway bitnami/contour -n flyte | flyteadmin.serviceAccount.create | bool | `true` | Should a service account be created for flyteadmin | | flyteadmin.serviceAccount.createClusterRole | bool | `true` | Should a ClusterRole be created for Flyteadmin | | flyteadmin.serviceAccount.imagePullSecrets | list | `[]` | ImagePullSecrets to automatically assign to the service account | +| flyteadmin.serviceMonitor | object | `{"enabled":false,"labels":null}` | Settings for flyteadmin service monitor | +| flyteadmin.serviceMonitor.enabled | bool | `false` | If enabled create the flyteadmin service monitor. | +| flyteadmin.serviceMonitor.labels | string | `nil` | Sets the labels for the service monitor which are required by the prometheus to auto-detect the service monitor and start scrapping the metrics | | flyteadmin.tolerations | list | `[]` | tolerations for Flyteadmin deployment | | flyteagent.enabled | bool | `false` | | | flyteconsole.affinity | object | `{}` | affinity for Flyteconsole deployment | @@ -211,10 +214,15 @@ helm install gateway bitnami/contour -n flyte | flytepropeller.priorityClassName | string | `""` | Sets priorityClassName for propeller pod(s). | | flytepropeller.replicaCount | int | `1` | Replicas count for Flytepropeller deployment | | flytepropeller.resources | object | `{"limits":{"cpu":"200m","ephemeral-storage":"100Mi","memory":"200Mi"},"requests":{"cpu":"10m","ephemeral-storage":"50Mi","memory":"100Mi"}}` | Default resources requests and limits for Flytepropeller deployment | +| flytepropeller.service | object | `{"enabled":false}` | Settings for flytepropeller service | +| flytepropeller.service.enabled | bool | `false` | If enabled create the flytepropeller service | | flytepropeller.serviceAccount | object | `{"annotations":{},"create":true,"imagePullSecrets":[]}` | Configuration for service accounts for FlytePropeller | | flytepropeller.serviceAccount.annotations | object | `{}` | Annotations for ServiceAccount attached to FlytePropeller pods | | flytepropeller.serviceAccount.create | bool | `true` | Should a service account be created for FlytePropeller | | flytepropeller.serviceAccount.imagePullSecrets | list | `[]` | ImagePullSecrets to automatically assign to the service account | +| flytepropeller.serviceMonitor | object | `{"enabled":false,"labels":null}` | Settings for flytepropeller service monitor | +| flytepropeller.serviceMonitor.enabled | bool | `false` | If enabled create the flyetepropeller service monitor. | +| flytepropeller.serviceMonitor.labels | string | `nil` | Sets the labels for the service monitor which are required by the prometheus to auto-detect the service monitor and start scrapping the metrics | | flytepropeller.terminationMessagePolicy | string | `"FallbackToLogsOnError"` | Error reporting | | flytepropeller.tolerations | list | `[]` | tolerations for Flytepropeller deployment | | flytescheduler.additionalContainers | list | `[]` | Appends additional containers to the deployment spec. May include template values. | diff --git a/charts/flyte-core/templates/admin/service-monitor.yaml b/charts/flyte-core/templates/admin/service-monitor.yaml new file mode 100644 index 0000000000..89e71933b6 --- /dev/null +++ b/charts/flyte-core/templates/admin/service-monitor.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.flyteadmin.serviceMonitor.enabled .Values.flyteadmin.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "flyteadmin.name" . }} + namespace: {{ template "flyte.namespace" . }} + labels: + {{- with .Values.flyteadmin.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + endpoints: + - interval: 60s + port: http-metrics + path: /metrics + scrapeTimeout: 30s + selector: + matchLabels: {{ include "flyteadmin.selectorLabels" . | nindent 6 }} +{{- end }} \ No newline at end of file diff --git a/charts/flyte-core/templates/propeller/service-monitor.yaml b/charts/flyte-core/templates/propeller/service-monitor.yaml new file mode 100644 index 0000000000..f7cac8bac1 --- /dev/null +++ b/charts/flyte-core/templates/propeller/service-monitor.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.flytepropeller.service.enabled .Values.flytepropeller.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + namespace: {{ template "flyte.namespace" . }} + name: {{ template "flytepropeller.name" . }} + labels: + {{- with .Values.flytepropeller.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + endpoints: + - interval: 60s + port: http-metrics + path: /metrics + scrapeTimeout: 30s + selector: + matchLabels: {{ include "flytepropeller.selectorLabels" . | nindent 6 }} +{{- end }} \ No newline at end of file diff --git a/charts/flyte-core/templates/propeller/service.yaml b/charts/flyte-core/templates/propeller/service.yaml new file mode 100644 index 0000000000..7569f3e6fc --- /dev/null +++ b/charts/flyte-core/templates/propeller/service.yaml @@ -0,0 +1,15 @@ +{{- if and .Values.flytepropeller.enabled .Values.flytepropeller.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + namespace: {{ template "flyte.namespace" . }} + name: {{ template "flytepropeller.name" . }} + labels: {{ include "flytepropeller.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - name: http-metrics + protocol: TCP + port: 10254 + selector: {{ include "flytepropeller.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/charts/flyte-core/values-eks.yaml b/charts/flyte-core/values-eks.yaml index 7b7ca446f5..03276598bf 100644 --- a/charts/flyte-core/values-eks.yaml +++ b/charts/flyte-core/values-eks.yaml @@ -23,6 +23,9 @@ flyteadmin: annotations: eks.amazonaws.com/role-arn: arn:aws:iam::{{ .Values.userSettings.accountNumber }}:role/iam-role-flyte + serviceMonitor: + enabled: false + resources: limits: ephemeral-storage: 200Mi @@ -78,6 +81,10 @@ datacatalog: flytepropeller: replicaCount: 2 manager: false + serviceMonitor: + enabled: false + service: + enabled: false serviceAccount: # -- If the service account is created by you, make this false create: true diff --git a/charts/flyte-core/values-gcp.yaml b/charts/flyte-core/values-gcp.yaml index 1a53510062..4051fb23b9 100644 --- a/charts/flyte-core/values-gcp.yaml +++ b/charts/flyte-core/values-gcp.yaml @@ -14,6 +14,8 @@ userSettings: flyteadmin: replicaCount: 1 + serviceMonitor: + enabled: false serviceAccount: # -- If the service account is created by you, make this false, else a new service account will be created and the flyteadmin role will be added # you can change the name of this role @@ -82,6 +84,10 @@ datacatalog: flytepropeller: replicaCount: 1 manager: false + serviceMonitor: + enabled: false + service: + enabled: false serviceAccount: # -- If the service account is created by you, make this false, else a new service account will be created and the iam-role-flyte will be added # you can change the name of this role diff --git a/charts/flyte-core/values-keycloak-idp-flyteclients-without-browser.yaml b/charts/flyte-core/values-keycloak-idp-flyteclients-without-browser.yaml index c3ed3f5ddd..380db08be7 100644 --- a/charts/flyte-core/values-keycloak-idp-flyteclients-without-browser.yaml +++ b/charts/flyte-core/values-keycloak-idp-flyteclients-without-browser.yaml @@ -11,6 +11,8 @@ flyteadmin: enabled: true + serviceMonitor: + enabled: false # -- Replicas count for Flyteadmin deployment replicaCount: 1 image: diff --git a/charts/flyte-core/values-sandbox.yaml b/charts/flyte-core/values-sandbox.yaml index 31ee4cedc6..c0e6d38404 100644 --- a/charts/flyte-core/values-sandbox.yaml +++ b/charts/flyte-core/values-sandbox.yaml @@ -1,5 +1,7 @@ flyteadmin: + serviceMonitor: + enabled: false service: annotations: projectcontour.io/upstream-protocol.h2c: grpc diff --git a/charts/flyte-core/values.yaml b/charts/flyte-core/values.yaml index 7ad1707e89..1bfd6f3d5a 100755 --- a/charts/flyte-core/values.yaml +++ b/charts/flyte-core/values.yaml @@ -110,6 +110,14 @@ flyteadmin: # -- Sets priorityClassName for flyteadmin pod(s). priorityClassName: "" + # -- Settings for flyteadmin service monitor + serviceMonitor: + # -- If enabled create the flyteadmin service monitor. + enabled: false + # -- Sets the labels for the service monitor which are required by the + # prometheus to auto-detect the service monitor and start scrapping the metrics + labels: + # # FLYTESCHEDULER SETTINGS # @@ -290,6 +298,19 @@ flytepropeller: # -- Sets priorityClassName for propeller pod(s). priorityClassName: "" + # -- Settings for flytepropeller service + service: + # -- If enabled create the flytepropeller service + enabled: false + + # -- Settings for flytepropeller service monitor + serviceMonitor: + # -- If enabled create the flyetepropeller service monitor. + enabled: false + # -- Sets the labels for the service monitor which are required by the + # prometheus to auto-detect the service monitor and start scrapping the metrics + labels: + # # FLYTECONSOLE SETTINGS # @@ -895,4 +916,4 @@ databricks: databricks: entrypointFile: dbfs:///FileStore/tables/entrypoint.py # Databricks account - databricksInstance: dbc-a53b7a3c-614c + databricksInstance: dbc-a53b7a3c-614c \ No newline at end of file From 0e147de76de63adc6cd2d1ea95169b2933746b13 Mon Sep 17 00:00:00 2001 From: vraiyani Date: Wed, 15 Nov 2023 20:00:47 -0800 Subject: [PATCH 2/2] make interval/scrapeTimeout configurable for service monitors Signed-off-by: vraiyani --- charts/flyte-core/README.md | 16 ++++++++++------ .../templates/admin/service-monitor.yaml | 4 ++-- .../templates/propeller/service-monitor.yaml | 4 ++-- charts/flyte-core/values.yaml | 18 ++++++++++++------ 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/charts/flyte-core/README.md b/charts/flyte-core/README.md index 09f00f9001..418172aef0 100644 --- a/charts/flyte-core/README.md +++ b/charts/flyte-core/README.md @@ -176,9 +176,11 @@ helm install gateway bitnami/contour -n flyte | flyteadmin.serviceAccount.create | bool | `true` | Should a service account be created for flyteadmin | | flyteadmin.serviceAccount.createClusterRole | bool | `true` | Should a ClusterRole be created for Flyteadmin | | flyteadmin.serviceAccount.imagePullSecrets | list | `[]` | ImagePullSecrets to automatically assign to the service account | -| flyteadmin.serviceMonitor | object | `{"enabled":false,"labels":null}` | Settings for flyteadmin service monitor | -| flyteadmin.serviceMonitor.enabled | bool | `false` | If enabled create the flyteadmin service monitor. | -| flyteadmin.serviceMonitor.labels | string | `nil` | Sets the labels for the service monitor which are required by the prometheus to auto-detect the service monitor and start scrapping the metrics | +| flyteadmin.serviceMonitor | object | `{"enabled":false,"interval":"60s","labels":{},"scrapeTimeout":"30s"}` | Settings for flyteadmin service monitor | +| flyteadmin.serviceMonitor.enabled | bool | `false` | If enabled create the flyteadmin service monitor | +| flyteadmin.serviceMonitor.interval | string | `"60s"` | Sets the interval at which metrics will be scraped by prometheus | +| flyteadmin.serviceMonitor.labels | object | `{}` | Sets the labels for the service monitor which are required by the prometheus to auto-detect the service monitor and start scrapping the metrics | +| flyteadmin.serviceMonitor.scrapeTimeout | string | `"30s"` | Sets the timeout after which request to scrape metrics will time out | | flyteadmin.tolerations | list | `[]` | tolerations for Flyteadmin deployment | | flyteagent.enabled | bool | `false` | | | flyteconsole.affinity | object | `{}` | affinity for Flyteconsole deployment | @@ -220,9 +222,11 @@ helm install gateway bitnami/contour -n flyte | flytepropeller.serviceAccount.annotations | object | `{}` | Annotations for ServiceAccount attached to FlytePropeller pods | | flytepropeller.serviceAccount.create | bool | `true` | Should a service account be created for FlytePropeller | | flytepropeller.serviceAccount.imagePullSecrets | list | `[]` | ImagePullSecrets to automatically assign to the service account | -| flytepropeller.serviceMonitor | object | `{"enabled":false,"labels":null}` | Settings for flytepropeller service monitor | -| flytepropeller.serviceMonitor.enabled | bool | `false` | If enabled create the flyetepropeller service monitor. | -| flytepropeller.serviceMonitor.labels | string | `nil` | Sets the labels for the service monitor which are required by the prometheus to auto-detect the service monitor and start scrapping the metrics | +| flytepropeller.serviceMonitor | object | `{"enabled":false,"interval":"60s","labels":{},"scrapeTimeout":"30s"}` | Settings for flytepropeller service monitor | +| flytepropeller.serviceMonitor.enabled | bool | `false` | If enabled create the flyetepropeller service monitor | +| flytepropeller.serviceMonitor.interval | string | `"60s"` | Sets the interval at which metrics will be scraped by prometheus | +| flytepropeller.serviceMonitor.labels | object | `{}` | Sets the labels for the service monitor which are required by the prometheus to auto-detect the service monitor and start scrapping the metrics | +| flytepropeller.serviceMonitor.scrapeTimeout | string | `"30s"` | Sets the timeout after which request to scrape metrics will time out | | flytepropeller.terminationMessagePolicy | string | `"FallbackToLogsOnError"` | Error reporting | | flytepropeller.tolerations | list | `[]` | tolerations for Flytepropeller deployment | | flytescheduler.additionalContainers | list | `[]` | Appends additional containers to the deployment spec. May include template values. | diff --git a/charts/flyte-core/templates/admin/service-monitor.yaml b/charts/flyte-core/templates/admin/service-monitor.yaml index 89e71933b6..7d5be0352f 100644 --- a/charts/flyte-core/templates/admin/service-monitor.yaml +++ b/charts/flyte-core/templates/admin/service-monitor.yaml @@ -10,10 +10,10 @@ metadata: {{- end }} spec: endpoints: - - interval: 60s + - interval: {{ .Values.flyteadmin.serviceMonitor.interval }} port: http-metrics path: /metrics - scrapeTimeout: 30s + scrapeTimeout: {{ .Values.flyteadmin.serviceMonitor.scrapeTimeout }} selector: matchLabels: {{ include "flyteadmin.selectorLabels" . | nindent 6 }} {{- end }} \ No newline at end of file diff --git a/charts/flyte-core/templates/propeller/service-monitor.yaml b/charts/flyte-core/templates/propeller/service-monitor.yaml index f7cac8bac1..9a1e5c2eca 100644 --- a/charts/flyte-core/templates/propeller/service-monitor.yaml +++ b/charts/flyte-core/templates/propeller/service-monitor.yaml @@ -10,10 +10,10 @@ metadata: {{- end }} spec: endpoints: - - interval: 60s + - interval: {{ .Values.flytepropeller.serviceMonitor.interval }} port: http-metrics path: /metrics - scrapeTimeout: 30s + scrapeTimeout: {{ .Values.flytepropeller.serviceMonitor.scrapeTimeout }} selector: matchLabels: {{ include "flytepropeller.selectorLabels" . | nindent 6 }} {{- end }} \ No newline at end of file diff --git a/charts/flyte-core/values.yaml b/charts/flyte-core/values.yaml index 1bfd6f3d5a..6b5d7ea58e 100755 --- a/charts/flyte-core/values.yaml +++ b/charts/flyte-core/values.yaml @@ -112,12 +112,15 @@ flyteadmin: # -- Settings for flyteadmin service monitor serviceMonitor: - # -- If enabled create the flyteadmin service monitor. + # -- If enabled create the flyteadmin service monitor enabled: false + # -- Sets the interval at which metrics will be scraped by prometheus + interval: 60s + # -- Sets the timeout after which request to scrape metrics will time out + scrapeTimeout: 30s # -- Sets the labels for the service monitor which are required by the # prometheus to auto-detect the service monitor and start scrapping the metrics - labels: - + labels: {} # # FLYTESCHEDULER SETTINGS # @@ -305,12 +308,15 @@ flytepropeller: # -- Settings for flytepropeller service monitor serviceMonitor: - # -- If enabled create the flyetepropeller service monitor. + # -- If enabled create the flyetepropeller service monitor enabled: false # -- Sets the labels for the service monitor which are required by the # prometheus to auto-detect the service monitor and start scrapping the metrics - labels: - + labels: {} + # -- Sets the interval at which metrics will be scraped by prometheus + interval: 60s + # -- Sets the timeout after which request to scrape metrics will time out + scrapeTimeout: 30s # # FLYTECONSOLE SETTINGS #