-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathprometheus-adapter-values.yaml
34 lines (34 loc) · 1.48 KB
/
prometheus-adapter-values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
prometheus:
# kube-prometheus-stack chart
url: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local
port: 9090
rules:
default: true
custom:
- seriesQuery: 'DCGM_FI_DEV_GPU_UTIL{exported_namespace!="",exported_container!="",exported_pod!=""}'
name:
as: "DCGM_FI_DEV_GPU_UTIL_AVG"
resources:
overrides:
exported_namespace: {resource: "namespace"}
exported_container: {resource: "service"}
exported_pod: {resource: "pod"}
metricsQuery: avg by (exported_namespace, exported_container) (round(avg_over_time(<<.Series>>[1m])))
- seriesQuery: 'DCGM_FI_DEV_GPU_UTIL{exported_namespace!="",exported_container!="",exported_pod!=""}'
name:
as: "DCGM_FI_DEV_GPU_UTIL_MIN"
resources:
overrides:
exported_container: {resource: "service"}
exported_namespace: {resource: "namespace"}
exported_pod: {resource: "pod"}
metricsQuery: min by (exported_namespace, exported_container) (round(min_over_time(<<.Series>>[1m])))
- seriesQuery: 'DCGM_FI_DEV_GPU_UTIL{exported_namespace!="",exported_container!="",exported_pod!=""}'
name:
as: "DCGM_FI_DEV_GPU_UTIL_MAX"
resources:
overrides:
exported_container: {resource: "service"}
exported_namespace: {resource: "namespace"}
exported_pod: {resource: "pod"}
metricsQuery: max by (exported_namespace, exported_container) (round(max_over_time(<<.Series>>[1m])))