From e7df20414ebcb3fa7ac14f3f547cf22a10fe5532 Mon Sep 17 00:00:00 2001 From: win5923 Date: Thu, 26 Dec 2024 23:56:07 +0800 Subject: [PATCH] [Grafana] Use PodMonitor instead of ServiceMonitor for the Head Node to avoid metric duplication Signed-off-by: win5923 --- config/prometheus/podMonitor.yaml | 39 +++++++++++++++++++++++++++ config/prometheus/serviceMonitor.yaml | 25 ----------------- 2 files changed, 39 insertions(+), 25 deletions(-) delete mode 100644 config/prometheus/serviceMonitor.yaml diff --git a/config/prometheus/podMonitor.yaml b/config/prometheus/podMonitor.yaml index 29aaf353be..5af17a3fe1 100644 --- a/config/prometheus/podMonitor.yaml +++ b/config/prometheus/podMonitor.yaml @@ -22,3 +22,42 @@ spec: relabelings: - sourceLabels: [__meta_kubernetes_pod_label_ray_io_cluster] targetLabel: ray_io_cluster +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + labels: + # `release: $HELM_RELEASE`: Prometheus can only detect PodMonitor with this label. + release: prometheus + name: ray-head-monitor + namespace: prometheus-system +spec: + jobLabel: ray-head + # Only select Kubernetes Pods in the "default" namespace. + namespaceSelector: + matchNames: + - default + # Only select Kubernetes Pods with "matchLabels". + selector: + matchLabels: + ray.io/node-type: head + # A list of endpoints allowed as part of this PodMonitor. + podMetricsEndpoints: + - port: metrics + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_label_ray_io_cluster + targetLabel: ray_io_cluster + - port: as-metrics # autoscaler metrics + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_label_ray_io_cluster + targetLabel: ray_io_cluster + - port: dash-metrics # dashboard metrics + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_label_ray_io_cluster + targetLabel: ray_io_cluster diff --git a/config/prometheus/serviceMonitor.yaml b/config/prometheus/serviceMonitor.yaml deleted file mode 100644 index cc3eb6905d..0000000000 --- a/config/prometheus/serviceMonitor.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: ray-head-monitor - namespace: prometheus-system - labels: - # `release: $HELM_RELEASE`: Prometheus can only detect ServiceMonitor with this label. - release: prometheus -spec: - jobLabel: ray-head - # Only select Kubernetes Services in the "default" namespace. - namespaceSelector: - matchNames: - - default - # Only select Kubernetes Services with "matchLabels". - selector: - matchLabels: - ray.io/node-type: head - # A list of endpoints allowed as part of this ServiceMonitor. - endpoints: - - port: metrics - - port: as-metrics # autoscaler metrics - - port: dash-metrics # dashboard metrics - targetLabels: - - ray.io/cluster