Skip to content

Commit a35ef2b

Browse files
committed
[release-4.14] Add 'managedBy' label to ceph queries
Signed-off-by: Arun Kumar Mohan <[email protected]>
1 parent 19bbcca commit a35ef2b

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

controllers/storagecluster/prometheus/localcephrules.yaml

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,24 @@ spec:
1414
kube_node_status_condition{condition="Ready",job="kube-state-metrics",status="true"} * on (node) group_right() max by (node, namespace) (label_replace(ceph_disk_occupation{job="rook-ceph-mgr"},"node","$1","exported_instance","(.*)"))
1515
record: cluster:ceph_node_down:join_kube
1616
- expr: |
17-
avg by (namespace) (topk by (ceph_daemon, namespace) (1, label_replace(label_replace(ceph_disk_occupation{job="rook-ceph-mgr"}, "instance", "$1", "exported_instance", "(.*)"), "device", "$1", "device", "/dev/(.*)")) * on(instance, device) group_right(ceph_daemon, namespace) topk by (instance, device, namespace) (1,(irate(node_disk_read_time_seconds_total[1m]) + irate(node_disk_write_time_seconds_total[1m]) / (clamp_min(irate(node_disk_reads_completed_total[1m]), 1) + irate(node_disk_writes_completed_total[1m])))))
17+
avg by (namespace, managedBy) (topk by (ceph_daemon, namespace, managedBy) (1, label_replace(label_replace(ceph_disk_occupation{job="rook-ceph-mgr"}, "instance", "$1", "exported_instance", "(.*)"), "device", "$1", "device", "/dev/(.*)")) * on(instance, device) group_right(ceph_daemon, namespace, managedBy) topk by (instance, device, namespace, managedBy) (1,(irate(node_disk_read_time_seconds_total[1m]) + irate(node_disk_write_time_seconds_total[1m]) / (clamp_min(irate(node_disk_reads_completed_total[1m]), 1) + irate(node_disk_writes_completed_total[1m])))))
1818
record: cluster:ceph_disk_latency:join_ceph_node_disk_irate1m
1919
- name: telemeter.rules
2020
rules:
2121
- expr: |
22-
count by (namespace) (ceph_osd_metadata{job="rook-ceph-mgr"})
22+
count by (namespace, managedBy) (ceph_osd_metadata{job="rook-ceph-mgr"})
2323
record: job:ceph_osd_metadata:count
2424
- expr: |
2525
count by (namespace) (kube_persistentvolume_info * on (storageclass) group_left(provisioner, namespace) kube_storageclass_info {provisioner=~"(.*rbd.csi.ceph.com)|(.*cephfs.csi.ceph.com)|(.*topolvm.cybozu.com)"})
2626
record: job:odf_system_pvs:count
2727
- expr: |
28-
sum by (namespace) (ceph_pool_rd{job="rook-ceph-mgr"} + on(pool_id, namespace) ceph_pool_wr)
28+
sum by (namespace, managedBy) (ceph_pool_rd{job="rook-ceph-mgr"} + on(pool_id, namespace, managedBy) ceph_pool_wr)
2929
record: job:ceph_pools_iops:total
3030
- expr: |
31-
sum by (namespace) (ceph_pool_rd_bytes{job="rook-ceph-mgr"} + on(pool_id, namespace) ceph_pool_wr_bytes)
31+
sum by (namespace, managedBy) (ceph_pool_rd_bytes{job="rook-ceph-mgr"} + on(pool_id, namespace, managedBy) ceph_pool_wr_bytes)
3232
record: job:ceph_pools_iops_bytes:total
3333
- expr: |
34-
count by (namespace) (count by (ceph_version, namespace) (ceph_mon_metadata{job="rook-ceph-mgr"} or ceph_osd_metadata{job="rook-ceph-mgr"} or ceph_rgw_metadata{job="rook-ceph-mgr"} or ceph_mds_metadata{job="rook-ceph-mgr"} or ceph_mgr_metadata{job="rook-ceph-mgr"}))
34+
count by (namespace, managedBy) (count by (ceph_version, namespace, managedBy) (ceph_mon_metadata{job="rook-ceph-mgr"} or ceph_osd_metadata{job="rook-ceph-mgr"} or ceph_rgw_metadata{job="rook-ceph-mgr"} or ceph_mds_metadata{job="rook-ceph-mgr"} or ceph_mgr_metadata{job="rook-ceph-mgr"}))
3535
record: job:ceph_versions_running:count
3636
- name: ceph-mgr-status
3737
rules:
@@ -66,7 +66,7 @@ spec:
6666
severity_level: warning
6767
storage_type: ceph
6868
expr: |
69-
sum by (namespace) (ceph_mds_metadata{job="rook-ceph-mgr"} == 1) < 2
69+
sum by (namespace, managedBy) (ceph_mds_metadata{job="rook-ceph-mgr"} == 1) < 2
7070
for: 5m
7171
labels:
7272
severity: warning
@@ -79,7 +79,7 @@ spec:
7979
severity_level: error
8080
storage_type: ceph
8181
expr: |
82-
count by (namespace) (ceph_mon_quorum_status{job="rook-ceph-mgr"} == 1) <= (floor(count by (namespace) (ceph_mon_metadata{job="rook-ceph-mgr"}) / 2) + 1)
82+
count by (namespace, managedBy) (ceph_mon_quorum_status{job="rook-ceph-mgr"} == 1) <= (floor(count by (namespace, managedBy) (ceph_mon_metadata{job="rook-ceph-mgr"}) / 2) + 1)
8383
for: 15m
8484
labels:
8585
severity: critical
@@ -101,7 +101,7 @@ spec:
101101
severity_level: warning
102102
storage_type: ceph
103103
expr: |
104-
(ceph_mon_metadata{job="rook-ceph-mgr"} * on (ceph_daemon, namespace) group_left() (rate(ceph_mon_num_elections{job="rook-ceph-exporter"}[5m]) * 60)) > 0.95
104+
(ceph_mon_metadata{job="rook-ceph-mgr"} * on (ceph_daemon, namespace, managedBy) group_left() (rate(ceph_mon_num_elections{job="rook-ceph-exporter"}[5m]) * 60)) > 0.95
105105
for: 5m
106106
labels:
107107
severity: warning
@@ -127,7 +127,7 @@ spec:
127127
severity_level: error
128128
storage_type: ceph
129129
expr: |
130-
(ceph_osd_metadata * on (ceph_daemon, namespace) group_right(device_class,hostname) (ceph_osd_stat_bytes_used / ceph_osd_stat_bytes)) >= 0.80
130+
(ceph_osd_metadata * on (ceph_daemon, namespace, managedBy) group_right(device_class,hostname) (ceph_osd_stat_bytes_used / ceph_osd_stat_bytes)) >= 0.80
131131
for: 40s
132132
labels:
133133
severity: critical
@@ -149,7 +149,7 @@ spec:
149149
severity_level: warning
150150
storage_type: ceph
151151
expr: |
152-
(ceph_osd_metadata * on (ceph_daemon, namespace) group_right(device_class,hostname) (ceph_osd_stat_bytes_used / ceph_osd_stat_bytes)) >= 0.75
152+
(ceph_osd_metadata * on (ceph_daemon, namespace, managedBy) group_right(device_class,hostname) (ceph_osd_stat_bytes_used / ceph_osd_stat_bytes)) >= 0.75
153153
for: 40s
154154
labels:
155155
severity: warning
@@ -160,7 +160,7 @@ spec:
160160
severity_level: error
161161
storage_type: ceph
162162
expr: |
163-
label_replace((ceph_osd_in == 1 and ceph_osd_up == 0),"disk","$1","ceph_daemon","osd.(.*)") + on(ceph_daemon, namespace) group_left(host, device) label_replace(ceph_disk_occupation{job="rook-ceph-mgr"},"host","$1","exported_instance","(.*)")
163+
label_replace((ceph_osd_in == 1 and ceph_osd_up == 0),"disk","$1","ceph_daemon","osd.(.*)") + on(ceph_daemon, namespace, managedBy) group_left(host, device) label_replace(ceph_disk_occupation{job="rook-ceph-mgr"},"host","$1","exported_instance","(.*)")
164164
for: 15m
165165
labels:
166166
severity: critical
@@ -171,7 +171,7 @@ spec:
171171
severity_level: error
172172
storage_type: ceph
173173
expr: |
174-
label_replace((ceph_osd_in == 0 and ceph_osd_up == 0),"disk","$1","ceph_daemon","osd.(.*)") + on(ceph_daemon, namespace) group_left(host, device) label_replace(ceph_disk_occupation{job="rook-ceph-mgr"},"host","$1","exported_instance","(.*)")
174+
label_replace((ceph_osd_in == 0 and ceph_osd_up == 0),"disk","$1","ceph_daemon","osd.(.*)") + on(ceph_daemon, namespace, managedBy) group_left(host, device) label_replace(ceph_disk_occupation{job="rook-ceph-mgr"},"host","$1","exported_instance","(.*)")
175175
for: 1m
176176
labels:
177177
severity: critical
@@ -263,7 +263,7 @@ spec:
263263
severity_level: warning
264264
storage_type: ceph
265265
expr: |
266-
count by (namespace) (count by (ceph_version, namespace) (ceph_osd_metadata{job="rook-ceph-mgr", ceph_version != ""})) > 1
266+
count by (namespace, managedBy) (count by (ceph_version, namespace, managedBy) (ceph_osd_metadata{job="rook-ceph-mgr", ceph_version != ""})) > 1
267267
for: 10m
268268
labels:
269269
severity: warning
@@ -274,7 +274,7 @@ spec:
274274
severity_level: warning
275275
storage_type: ceph
276276
expr: |
277-
count by (namespace) (count by (ceph_version, namespace) (ceph_mon_metadata{job="rook-ceph-mgr", ceph_version != ""})) > 1
277+
count by (namespace, managedBy) (count by (ceph_version, namespace, managedBy) (ceph_mon_metadata{job="rook-ceph-mgr", ceph_version != ""})) > 1
278278
for: 10m
279279
labels:
280280
severity: warning
@@ -322,7 +322,7 @@ spec:
322322
severity_level: warning
323323
storage_type: ceph
324324
expr: |
325-
(ceph_pool_stored_raw * on (pool_id) group_left(name, namespace)ceph_pool_metadata) / ((ceph_pool_quota_bytes * on (pool_id) group_left(name, namespace)ceph_pool_metadata) > 0) > 0.70
325+
(ceph_pool_stored_raw * on (pool_id, managedBy) group_left(name, namespace)ceph_pool_metadata) / ((ceph_pool_quota_bytes * on (pool_id, managedBy) group_left(name, namespace)ceph_pool_metadata) > 0) > 0.70
326326
for: 1m
327327
labels:
328328
severity: warning
@@ -333,7 +333,7 @@ spec:
333333
severity_level: critical
334334
storage_type: ceph
335335
expr: |
336-
(ceph_pool_stored_raw * on (pool_id) group_left(name, namespace)ceph_pool_metadata) / ((ceph_pool_quota_bytes * on (pool_id) group_left(name, namespace)ceph_pool_metadata) > 0) > 0.90
336+
(ceph_pool_stored_raw * on (pool_id, managedBy) group_left(name, namespace)ceph_pool_metadata) / ((ceph_pool_quota_bytes * on (pool_id, managedBy) group_left(name, namespace)ceph_pool_metadata) > 0) > 0.90
337337
for: 1m
338338
labels:
339339
severity: critical

0 commit comments

Comments
 (0)