diff --git a/examples/production_monitoring/grafana.json b/examples/production_monitoring/grafana.json index f48b6314eb055..071f134c6e5e0 100644 --- a/examples/production_monitoring/grafana.json +++ b/examples/production_monitoring/grafana.json @@ -1,35 +1,4 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__elements": {}, - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "10.2.3" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], "annotations": { "list": [ { @@ -42,6 +11,12 @@ "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] @@ -50,14 +25,14 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": null, + "id": 29, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "End to end request latency measured in seconds.", "fieldConfig": { @@ -66,7 +41,6 @@ "mode": "palette-classic" }, "custom": { - "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -80,7 +54,6 @@ "tooltip": false, "viz": false }, - "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -138,11 +111,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -154,11 +127,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -171,11 +144,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -188,11 +161,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -205,10 +178,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", - "expr": "rate(vllm:e2e_request_latency_seconds_sum[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count[$__rate_interval])", + "expr": "rate(vllm:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", "hide": false, "instant": false, "legendFormat": "Average", @@ -222,7 +195,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "Number of tokens processed per second", "fieldConfig": { @@ -231,7 +204,6 @@ "mode": "palette-classic" }, "custom": { - "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -245,7 +217,6 @@ "tooltip": false, "viz": false }, - "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -302,11 +273,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "rate(vllm:prompt_tokens_total[$__rate_interval])", + "expr": "rate(vllm:prompt_tokens_total{model_name=\"$model_name\"}[$__rate_interval])", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -318,11 +289,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "rate(vllm:generation_tokens_total[$__rate_interval])", + "expr": "rate(vllm:generation_tokens_total{model_name=\"$model_name\"}[$__rate_interval])", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -339,7 +310,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "Inter token latency in seconds.", "fieldConfig": { @@ -348,7 +319,6 @@ "mode": "palette-classic" }, "custom": { - "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -362,7 +332,6 @@ "tooltip": false, "viz": false }, - "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -420,11 +389,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -436,11 +405,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -453,11 +422,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -470,11 +439,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -487,10 +456,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", - "expr": "rate(vllm:time_per_output_token_seconds_sum[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count[$__rate_interval])", + "expr": "rate(vllm:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", "hide": false, "instant": false, "legendFormat": "Mean", @@ -504,7 +473,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "Number of requests in RUNNING, WAITING, and SWAPPED state", "fieldConfig": { @@ -513,7 +482,6 @@ "mode": "palette-classic" }, "custom": { - "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -527,7 +495,6 @@ "tooltip": false, "viz": false }, - "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -585,11 +552,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "vllm:num_requests_running", + "expr": "vllm:num_requests_running{model_name=\"$model_name\"}", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -601,11 +568,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "vllm:num_requests_swapped", + "expr": "vllm:num_requests_swapped{model_name=\"$model_name\"}", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -618,11 +585,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "vllm:num_requests_waiting", + "expr": "vllm:num_requests_waiting{model_name=\"$model_name\"}", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -639,7 +606,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "P50, P90, P95, and P99 TTFT latency in seconds.", "fieldConfig": { @@ -648,7 +615,6 @@ "mode": "palette-classic" }, "custom": { - "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -662,7 +628,6 @@ "tooltip": false, "viz": false }, - "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -720,11 +685,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -737,11 +702,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -753,11 +718,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -770,11 +735,11 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))", + "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, @@ -787,10 +752,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", - "expr": "rate(vllm:time_to_first_token_seconds_sum[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count[$__rate_interval])", + "expr": "rate(vllm:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", "hide": false, "instant": false, "legendFormat": "Average", @@ -804,7 +769,7 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "description": "Percentage of used cache blocks by vLLM.", "fieldConfig": { @@ -813,7 +778,6 @@ "mode": "palette-classic" }, "custom": { - "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -827,7 +791,6 @@ "tooltip": false, "viz": false }, - "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -885,10 +848,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", - "expr": "vllm:gpu_cache_usage_perc", + "expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}", "instant": false, "legendFormat": "GPU Cache Usage", "range": true, @@ -897,10 +860,10 @@ { "datasource": { "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "uid": "prometheus" }, "editorMode": "code", - "expr": "vllm:cpu_cache_usage_perc", + "expr": "vllm:cpu_cache_usage_perc{model_name=\"$model_name\"}", "hide": false, "instant": false, "legendFormat": "CPU Cache Usage", @@ -913,10 +876,39 @@ } ], "refresh": "", - "schemaVersion": 39, + "schemaVersion": 37, + "style": "dark", "tags": [], "templating": { - "list": [] + "list": [ + { + "current": { + "selected": false, + "text": "vllm", + "value": "vllm" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(model_name)", + "hide": 0, + "includeAll": false, + "label": "model_name", + "multi": false, + "name": "model_name", + "options": [], + "query": { + "query": "label_values(model_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { "from": "now-5m",