From 1e8b413801132fb4ffd2be5c2eb9e2f2e649b1eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= Date: Fri, 6 Sep 2024 19:18:10 +0200 Subject: [PATCH] Adding a Grafana monitoring chart with GKE nodes types (reserved, spot, on-demand) (#799) Adding a monitoring chart with GKE nodes types (reserved, spot, on-demand) --- .../monitoring/deploy-dashboard.yaml | 148 +++++++++++++++++- .../monitoring/kueue-dashboard.json | 146 +++++++++++++++++ 2 files changed, 293 insertions(+), 1 deletion(-) diff --git a/best-practices/gke-batch-refarch/02_platform/monitoring/deploy-dashboard.yaml b/best-practices/gke-batch-refarch/02_platform/monitoring/deploy-dashboard.yaml index f503303f8..2e263fcc2 100644 --- a/best-practices/gke-batch-refarch/02_platform/monitoring/deploy-dashboard.yaml +++ b/best-practices/gke-batch-refarch/02_platform/monitoring/deploy-dashboard.yaml @@ -1223,6 +1223,152 @@ data: ], "title": "Nodes", "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 18, + "x": 0, + "y": 30 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(key) (kube_node_spec_taint{key=\"reserved\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "reserved nodes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(key) (kube_node_spec_taint{key=\"spot\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "spot nodes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "sum of all nodes", + "range": true, + "refId": "Total number of nodes", + "useBackend": false + }, + { + "datasource": { + "name": "Expression", + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "${Total number of nodes} - ${reserved nodes} - ${spot nodes}", + "hide": false, + "refId": "On-demand nodes", + "type": "math" + } + ], + "title": "Node per type", + "type": "timeseries" } ], "refresh": "5s", @@ -1255,4 +1401,4 @@ data: } ], "folderUid": "" - } + } \ No newline at end of file diff --git a/best-practices/gke-batch-refarch/02_platform/monitoring/kueue-dashboard.json b/best-practices/gke-batch-refarch/02_platform/monitoring/kueue-dashboard.json index 401bfd558..7da0d6340 100644 --- a/best-practices/gke-batch-refarch/02_platform/monitoring/kueue-dashboard.json +++ b/best-practices/gke-batch-refarch/02_platform/monitoring/kueue-dashboard.json @@ -1191,6 +1191,152 @@ ], "title": "Nodes", "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 18, + "x": 0, + "y": 30 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(key) (kube_node_spec_taint{key=\"reserved\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "reserved nodes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(key) (kube_node_spec_taint{key=\"spot\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "spot nodes", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "sum of all nodes", + "range": true, + "refId": "Total number of nodes", + "useBackend": false + }, + { + "datasource": { + "name": "Expression", + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "${Total number of nodes} - ${reserved nodes} - ${spot nodes}", + "hide": false, + "refId": "On-demand nodes", + "type": "math" + } + ], + "title": "Node per type", + "type": "timeseries" } ], "refresh": "5s",