From 4cbcac4d51fc38843f32cd4a08f25ec49d690c0c Mon Sep 17 00:00:00 2001 From: djshow832 <873581766@qq.com> Date: Mon, 18 Dec 2023 18:22:55 +0800 Subject: [PATCH 1/4] add metrics --- pkg/manager/router/backend_observer.go | 18 +- pkg/manager/router/backend_observer_test.go | 4 + pkg/manager/router/metrics.go | 10 +- pkg/metrics/backend.go | 8 + pkg/metrics/grafana/tiproxy_summary.json | 352 +++++++++++++++++--- pkg/metrics/grafana/tiproxy_summary.jsonnet | 80 +++-- pkg/metrics/metrics.go | 6 +- pkg/metrics/server.go | 8 + pkg/proxy/backend/backend_conn_mgr.go | 2 +- pkg/proxy/client/client_conn.go | 2 + 10 files changed, 409 insertions(+), 81 deletions(-) diff --git a/pkg/manager/router/backend_observer.go b/pkg/manager/router/backend_observer.go index d814dda1..1b9c2527 100644 --- a/pkg/manager/router/backend_observer.go +++ b/pkg/manager/router/backend_observer.go @@ -15,6 +15,7 @@ import ( "github.com/pingcap/tiproxy/lib/config" "github.com/pingcap/tiproxy/lib/util/errors" "github.com/pingcap/tiproxy/lib/util/waitgroup" + "github.com/pingcap/tiproxy/pkg/metrics" pnet "github.com/pingcap/tiproxy/pkg/proxy/net" "go.uber.org/zap" ) @@ -156,6 +157,7 @@ func (bo *BackendObserver) Refresh() { func (bo *BackendObserver) observe(ctx context.Context) { for ctx.Err() == nil { + startTime := time.Now() backendInfo, err := bo.fetcher.GetBackendList(ctx) if err != nil { bo.logger.Error("fetching backends encounters error", zap.Error(err)) @@ -167,11 +169,17 @@ func (bo *BackendObserver) observe(ctx context.Context) { } bo.notifyIfChanged(bhMap) } - select { - case <-time.After(bo.healthCheckConfig.Interval): - case <-bo.refreshChan: - case <-ctx.Done(): - return + + cost := time.Since(startTime) + metrics.HealthCheckCycleGauge.Set(cost.Seconds()) + wait := bo.healthCheckConfig.Interval - cost + if wait > 0 { + select { + case <-time.After(wait): + case <-bo.refreshChan: + case <-ctx.Done(): + return + } } } } diff --git a/pkg/manager/router/backend_observer_test.go b/pkg/manager/router/backend_observer_test.go index e57187e8..64c4b82d 100644 --- a/pkg/manager/router/backend_observer_test.go +++ b/pkg/manager/router/backend_observer_test.go @@ -172,6 +172,10 @@ func (ts *observerTestSuite) checkStatus(backend *backendServer, expectedStatus require.True(ts.t, ok) require.Equal(ts.t, expectedStatus, health.status) require.True(ts.t, checkBackendStatusMetrics(backend.sqlAddr, health.status)) + cycle, err := readHealthCheckCycle() + require.NoError(ts.t, err) + require.Greater(ts.t, cycle.Nanoseconds(), int64(0)) + require.Less(ts.t, cycle.Nanoseconds(), 3*time.Second) } func (ts *observerTestSuite) getBackendsFromCh() map[string]*backendHealth { diff --git a/pkg/manager/router/metrics.go b/pkg/manager/router/metrics.go index 958e3150..909bebd3 100644 --- a/pkg/manager/router/metrics.go +++ b/pkg/manager/router/metrics.go @@ -19,7 +19,7 @@ func checkBackendStatusMetrics(addr string, status BackendStatus) bool { if err != nil { return false } - return val == 1 + return int(val) == 1 } func setBackendConnMetrics(addr string, conns int) { @@ -27,7 +27,8 @@ func setBackendConnMetrics(addr string, conns int) { } func readBackendConnMetrics(addr string) (int, error) { - return metrics.ReadGauge(metrics.BackendConnGauge.WithLabelValues(addr)) + val, err := metrics.ReadGauge(metrics.BackendConnGauge.WithLabelValues(addr)) + return int(val), err } func succeedToLabel(succeed bool) string { @@ -53,3 +54,8 @@ func setPingBackendMetrics(addr string, succeed bool, startTime time.Time) { cost := time.Since(startTime) metrics.PingBackendGauge.WithLabelValues(addr).Set(cost.Seconds()) } + +func readHealthCheckCycle() (time.Duration, error) { + seconds, err := metrics.ReadGauge(metrics.HealthCheckCycleGauge) + return time.Duration(int(seconds * float64(time.Second))), err +} diff --git a/pkg/metrics/backend.go b/pkg/metrics/backend.go index 3389727c..150ee530 100644 --- a/pkg/metrics/backend.go +++ b/pkg/metrics/backend.go @@ -43,4 +43,12 @@ var ( Name: "ping_duration_seconds", Help: "Time (s) of pinging the SQL port of each backend.", }, []string{LblBackend}) + + HealthCheckCycleGauge = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: ModuleProxy, + Subsystem: LabelBackend, + Name: "health_check_seconds", + Help: "Time (s) of each health check cycle.", + }) ) diff --git a/pkg/metrics/grafana/tiproxy_summary.json b/pkg/metrics/grafana/tiproxy_summary.json index cfceb5ac..f66eabda 100644 --- a/pkg/metrics/grafana/tiproxy_summary.json +++ b/pkg/metrics/grafana/tiproxy_summary.json @@ -316,7 +316,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiProxy current goroutine counts.", + "description": "TiProxy disconnection count per minute.", "fill": 1, "fillGradient": 0, "gridPos": { @@ -351,6 +351,92 @@ "spaceLength": 10, "stack": false, "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(tiproxy_server_disconnection_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Disconnection OPM", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiProxy current goroutine counts.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "go_goroutines{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=\"tiproxy\"}", @@ -415,10 +501,10 @@ "gridPos": { "h": 6, "w": 12, - "x": 0, + "x": 12, "y": 0 }, - "id": 7, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -507,7 +593,7 @@ "x": 0, "y": 0 }, - "id": 8, + "id": 9, "panels": [ { "aliasColors": { }, @@ -524,7 +610,7 @@ "x": 0, "y": 0 }, - "id": 9, + "id": 10, "legend": { "alignAsTable": false, "avg": false, @@ -609,6 +695,178 @@ } ] }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiProxy P99 query durations by TiProxy instances.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tiproxy_session_query_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, instance))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 Duration By Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiProxy P99 query durations by backends.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.99, sum(rate(tiproxy_session_query_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, backend)), \"backend\", \"$1\", \"backend\", \"(.+-tidb-[0-9]+).*peer.*.svc.*\")", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{backend}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "P99 Duration By Backend", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, { "aliasColors": { }, "bars": false, @@ -624,7 +882,7 @@ "x": 12, "y": 0 }, - "id": 10, + "id": 13, "legend": { "alignAsTable": false, "avg": false, @@ -710,7 +968,7 @@ "x": 0, "y": 0 }, - "id": 11, + "id": 14, "legend": { "alignAsTable": false, "avg": false, @@ -796,7 +1054,7 @@ "x": 12, "y": 0 }, - "id": 12, + "id": 15, "legend": { "alignAsTable": false, "avg": false, @@ -885,7 +1143,7 @@ "x": 0, "y": 0 }, - "id": 13, + "id": 16, "panels": [ { "aliasColors": { }, @@ -902,7 +1160,7 @@ "x": 0, "y": 0 }, - "id": 14, + "id": 17, "legend": { "alignAsTable": false, "avg": false, @@ -979,7 +1237,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Number of session migrations on all backends.", + "description": "OPM of session migrations on all backends.", "fill": 1, "fillGradient": 0, "gridPos": { @@ -988,7 +1246,7 @@ "x": 12, "y": 0 }, - "id": 15, + "id": 18, "legend": { "alignAsTable": false, "avg": false, @@ -1016,7 +1274,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(label_replace(tiproxy_balance_migrate_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}, \"from\", \"$1\", \"from\", \"(.+-tidb-[0-9]+).*peer.*.svc.*\"), \"to\", \"$1\", \"to\", \"(.+-tidb-[0-9]+).*peer.*.svc.*\")", + "expr": "label_replace(label_replace(sum(increase(tiproxy_balance_migrate_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (migrate_res, from, to), \"from\", \"$1\", \"from\", \"(.+-tidb-[0-9]+).*peer.*.svc.*\"), \"to\", \"$1\", \"to\", \"(.+-tidb-[0-9]+).*peer.*.svc.*\")", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{migrate_res}}: {{from}} => {{to}}", @@ -1026,7 +1284,7 @@ "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Session Migrations", + "title": "Session Migration OPM", "tooltip": { "shared": true, "sort": 0, @@ -1074,7 +1332,7 @@ "x": 0, "y": 0 }, - "id": 16, + "id": 19, "legend": { "alignAsTable": false, "avg": false, @@ -1177,7 +1435,7 @@ "x": 0, "y": 0 }, - "id": 17, + "id": 20, "panels": [ { "aliasColors": { }, @@ -1185,7 +1443,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Number of getting an available backend.", + "description": "Duration of getting an available backend.", "fill": 1, "fillGradient": 0, "gridPos": { @@ -1194,7 +1452,7 @@ "x": 0, "y": 0 }, - "id": 18, + "id": 21, "legend": { "alignAsTable": false, "avg": false, @@ -1222,17 +1480,31 @@ "steppedLine": false, "targets": [ { - "expr": "tiproxy_backend_get_backend{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}", + "expr": "histogram_quantile(0.99, sum(rate(tiproxy_backend_get_backend_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} : {{res}}", + "legendFormat": "99", "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(tiproxy_backend_get_backend_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "95", + "refId": "B" + }, + { + "expr": "sum(rate(tiproxy_backend_get_backend_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) / sum(rate(tiproxy_backend_get_backend_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "avg", + "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Get Backend Count", + "title": "Get Backend Duration", "tooltip": { "shared": true, "sort": 0, @@ -1248,7 +1520,7 @@ }, "yaxes": [ { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -1256,7 +1528,7 @@ "show": true }, { - "format": "short", + "format": "s", "label": null, "logBase": 1, "max": null, @@ -1271,7 +1543,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of getting an available backend.", + "description": "Duration of Pinging backends.", "fill": 1, "fillGradient": 0, "gridPos": { @@ -1280,7 +1552,7 @@ "x": 12, "y": 0 }, - "id": 19, + "id": 22, "legend": { "alignAsTable": false, "avg": false, @@ -1308,31 +1580,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tiproxy_backend_get_backend_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", + "expr": "label_replace(tiproxy_backend_ping_duration_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}, \"backend\", \"$1\", \"backend\", \"(.+-tidb-[0-9]+).*peer.*.svc.*\")", "format": "time_series", "intervalFactor": 2, - "legendFormat": "99", + "legendFormat": "{{instance}} | {{backend}}", "refId": "A" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(tiproxy_backend_get_backend_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "95", - "refId": "B" - }, - { - "expr": "sum(rate(tiproxy_backend_get_backend_duration_seconds_sum{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[30s])) / sum(rate(tiproxy_backend_get_backend_duration_seconds_count{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\"}[30s]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg", - "refId": "C" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Get Backend Duration", + "title": "Ping Backend Duration", "tooltip": { "shared": true, "sort": 0, @@ -1371,7 +1629,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "Duration of Pinging backends.", + "description": "Duration of each health check cycle.", "fill": 1, "fillGradient": 0, "gridPos": { @@ -1380,7 +1638,7 @@ "x": 0, "y": 0 }, - "id": 20, + "id": 23, "legend": { "alignAsTable": false, "avg": false, @@ -1408,17 +1666,17 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(tiproxy_backend_ping_duration_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}, \"backend\", \"$1\", \"backend\", \"(.+-tidb-[0-9]+).*peer.*.svc.*\")", + "expr": "tiproxy_backend_health_check_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}} | {{backend}}", + "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Ping Backend Duration", + "title": "Health Check Cycle", "tooltip": { "shared": true, "sort": 0, diff --git a/pkg/metrics/grafana/tiproxy_summary.jsonnet b/pkg/metrics/grafana/tiproxy_summary.jsonnet index 22c65863..887b0c90 100644 --- a/pkg/metrics/grafana/tiproxy_summary.jsonnet +++ b/pkg/metrics/grafana/tiproxy_summary.jsonnet @@ -106,6 +106,20 @@ local connectionP = graphPanel.new( ) ); +local disconnP = graphPanel.new( + title='Disconnection OPM', + datasource=myDS, + legend_rightSide=true, + description='TiProxy disconnection count per minute.', + format='short', +) +.addTarget( + prometheus.target( + 'sum(increase(tiproxy_server_disconnection_total{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}[1m])) by (type)', + legendFormat='{{type}}', + ) +); + local goroutineP = graphPanel.new( title='Goroutine Count', datasource=myDS, @@ -208,17 +222,31 @@ local durationP = graphPanel.new( ) ); -local durationByBackP = graphPanel.new( - title='Duration By Backend', +local durByInstP = graphPanel.new( + title='P99 Duration By Instance', datasource=myDS, legend_rightSide=true, - description='TiProxy P99 query durations by instances and backends.', + description='TiProxy P99 query durations by TiProxy instances.', format='s', ) .addTarget( prometheus.target( - 'label_replace(histogram_quantile(0.99, sum(rate(tiproxy_session_query_duration_seconds_bucket{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}[1m])) by (le, instance, backend)), "backend", "$1", "backend", "(.+-tidb-[0-9]+).*peer.*.svc.*")', - legendFormat='{{instance}} | {{backend}}', + 'histogram_quantile(0.99, sum(rate(tiproxy_session_query_duration_seconds_bucket{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}[1m])) by (le, instance))', + legendFormat='{{instance}}', + ) +); + +local durByBackP = graphPanel.new( + title='P99 Duration By Backend', + datasource=myDS, + legend_rightSide=true, + description='TiProxy P99 query durations by backends.', + format='s', +) +.addTarget( + prometheus.target( + 'label_replace(histogram_quantile(0.99, sum(rate(tiproxy_session_query_duration_seconds_bucket{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}[1m])) by (le, backend)), "backend", "$1", "backend", "(.+-tidb-[0-9]+).*peer.*.svc.*")', + legendFormat='{{backend}}', ) ); @@ -281,15 +309,15 @@ local bConnP = graphPanel.new( ); local bMigCounterP = graphPanel.new( - title='Session Migrations', + title='Session Migration OPM', datasource=myDS, legend_rightSide=true, - description='Number of session migrations on all backends.', + description='OPM of session migrations on all backends.', format='short', ) .addTarget( prometheus.target( - 'label_replace(label_replace(tiproxy_balance_migrate_total{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}, "from", "$1", "from", "(.+-tidb-[0-9]+).*peer.*.svc.*"), "to", "$1", "to", "(.+-tidb-[0-9]+).*peer.*.svc.*")', + 'label_replace(label_replace(sum(increase(tiproxy_balance_migrate_total{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}[1m])) by (migrate_res, from, to), "from", "$1", "from", "(.+-tidb-[0-9]+).*peer.*.svc.*"), "to", "$1", "to", "(.+-tidb-[0-9]+).*peer.*.svc.*")', legendFormat='{{migrate_res}}: {{from}} => {{to}}', ) ); @@ -348,31 +376,32 @@ local bGetDurP = graphPanel.new( ) ); -local bGetBeP = graphPanel.new( - title='Get Backend Count', +local bPingBeP = graphPanel.new( + title='Ping Backend Duration', datasource=myDS, legend_rightSide=true, - description='Number of getting an available backend.', - format='short', + description='Duration of Pinging backends.', + format='s', ) .addTarget( prometheus.target( - 'tiproxy_backend_get_backend{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster"}', - legendFormat='{{instance}} : {{res}}', + 'label_replace(tiproxy_backend_ping_duration_seconds{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}, "backend", "$1", "backend", "(.+-tidb-[0-9]+).*peer.*.svc.*")', + legendFormat='{{instance}} | {{backend}}', ) ); -local bPingBeP = graphPanel.new( - title='Ping Backend Duration', +local bHealthCycleP = +graphPanel.new( + title='Health Check Cycle', datasource=myDS, legend_rightSide=true, - description='Duration of Pinging backends.', + description='Duration of each health check cycle.', format='s', ) .addTarget( prometheus.target( - 'label_replace(tiproxy_backend_ping_duration_seconds{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}, "backend", "$1", "backend", "(.+-tidb-[0-9]+).*peer.*.svc.*")', - legendFormat='{{instance}} | {{backend}}', + 'tiproxy_backend_health_check_seconds{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}', + legendFormat='{{instance}}', ) ); @@ -392,14 +421,17 @@ newDash .addPanel(cpuP, gridPos=leftPanelPos) .addPanel(memP, gridPos=rightPanelPos) .addPanel(connectionP, gridPos=leftPanelPos) - .addPanel(goroutineP, gridPos=rightPanelPos) - .addPanel(uptimeP, gridPos=leftPanelPos) + .addPanel(disconnP, gridPos=rightPanelPos) + .addPanel(goroutineP, gridPos=leftPanelPos) + .addPanel(uptimeP, gridPos=rightPanelPos) , gridPos=rowPos ) .addPanel( queryRow .addPanel(durationP, gridPos=leftPanelPos) + .addPanel(durByInstP, gridPos=rightPanelPos) + .addPanel(durByBackP, gridPos=leftPanelPos) .addPanel(cpsByInstP, gridPos=rightPanelPos) .addPanel(cpsByBackP, gridPos=leftPanelPos) .addPanel(cpsByCMDP, gridPos=rightPanelPos) @@ -416,9 +448,9 @@ newDash ) .addPanel( backendRow - .addPanel(bGetBeP, gridPos=leftPanelPos) - .addPanel(bGetDurP, gridPos=rightPanelPos) - .addPanel(bPingBeP, gridPos=leftPanelPos) + .addPanel(bGetDurP, gridPos=leftPanelPos) + .addPanel(bPingBeP, gridPos=rightPanelPos) + .addPanel(bHealthCycleP, gridPos=leftPanelPos) , gridPos=rowPos ) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index fc283196..e5cbf7b8 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -154,6 +154,7 @@ func registerProxyMetrics() { prometheus.MustRegister(collectors.NewGoCollector(collectors.WithGoCollections(collectors.GoRuntimeMetricsCollection | collectors.GoRuntimeMemStatsCollection))) prometheus.MustRegister(ConnGauge) + prometheus.MustRegister(DisConnCounter) prometheus.MustRegister(MaxProcsGauge) prometheus.MustRegister(ServerEventCounter) prometheus.MustRegister(ServerErrCounter) @@ -166,6 +167,7 @@ func registerProxyMetrics() { prometheus.MustRegister(GetBackendCounter) prometheus.MustRegister(PingBackendGauge) prometheus.MustRegister(BackendConnGauge) + prometheus.MustRegister(HealthCheckCycleGauge) prometheus.MustRegister(MigrateCounter) prometheus.MustRegister(MigrateDurationHistogram) } @@ -192,10 +194,10 @@ func ReadCounter(counter prometheus.Counter) (int, error) { } // ReadGauge reads the value from the gauge. It is only used for testing. -func ReadGauge(gauge prometheus.Gauge) (int, error) { +func ReadGauge(gauge prometheus.Gauge) (float64, error) { var metric dto.Metric if err := gauge.Write(&metric); err != nil { return 0, err } - return int(metric.Gauge.GetValue()), nil + return metric.Gauge.GetValue(), nil } diff --git a/pkg/metrics/server.go b/pkg/metrics/server.go index 683baa95..d93b8c4c 100644 --- a/pkg/metrics/server.go +++ b/pkg/metrics/server.go @@ -26,6 +26,14 @@ var ( Help: "Number of connections.", }) + DisConnCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: ModuleProxy, + Subsystem: LabelServer, + Name: "disconnection_total", + Help: "Number of disconnections.", + }, []string{LblType}) + MaxProcsGauge = prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: ModuleProxy, diff --git a/pkg/proxy/backend/backend_conn_mgr.go b/pkg/proxy/backend/backend_conn_mgr.go index 3faee4ae..8ea4d256 100644 --- a/pkg/proxy/backend/backend_conn_mgr.go +++ b/pkg/proxy/backend/backend_conn_mgr.go @@ -244,7 +244,7 @@ func (mgr *BackendConnManager) getBackendIO(cctx ConnContext, auth *Authenticato addGetBackendMetrics(duration, err == nil) if err != nil { mgr.logger.Error("get backend failed", zap.Duration("duration", duration), zap.NamedError("last_err", origErr)) - } else if duration >= 3*time.Second { + } else if duration >= time.Second { mgr.logger.Warn("get backend slow", zap.Duration("duration", duration), zap.NamedError("last_err", origErr), zap.String("backend_addr", mgr.ServerAddr())) } diff --git a/pkg/proxy/client/client_conn.go b/pkg/proxy/client/client_conn.go index b66afe16..c43e2ef8 100644 --- a/pkg/proxy/client/client_conn.go +++ b/pkg/proxy/client/client_conn.go @@ -9,6 +9,7 @@ import ( "net" "github.com/pingcap/tiproxy/lib/util/errors" + "github.com/pingcap/tiproxy/pkg/metrics" "github.com/pingcap/tiproxy/pkg/proxy/backend" pnet "github.com/pingcap/tiproxy/pkg/proxy/net" "go.uber.org/zap" @@ -62,6 +63,7 @@ clean: fields = append(fields, zap.Stringer("quit_source", src), zap.Error(err)) cc.logger.Warn(msg, fields...) } + metrics.DisConnCounter.WithLabelValues(src.String()).Inc() } func (cc *ClientConnection) processMsg(ctx context.Context) error { From 5187ab3329384ac7acbf091721e7cb7e15565705 Mon Sep 17 00:00:00 2001 From: djshow832 <873581766@qq.com> Date: Tue, 19 Dec 2023 09:58:26 +0800 Subject: [PATCH 2/4] add create conn counter --- pkg/metrics/grafana/tiproxy_summary.json | 170 +++++++++++++++----- pkg/metrics/grafana/tiproxy_summary.jsonnet | 19 ++- pkg/metrics/metrics.go | 1 + pkg/metrics/server.go | 8 + pkg/proxy/proxy.go | 1 + pkg/proxy/proxy_test.go | 41 +++++ 6 files changed, 196 insertions(+), 44 deletions(-) diff --git a/pkg/metrics/grafana/tiproxy_summary.json b/pkg/metrics/grafana/tiproxy_summary.json index f66eabda..6490e515 100644 --- a/pkg/metrics/grafana/tiproxy_summary.json +++ b/pkg/metrics/grafana/tiproxy_summary.json @@ -223,7 +223,7 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiProxy current connection counts.", + "description": "TiProxy uptime since the last restart.", "fill": 1, "fillGradient": 0, "gridPos": { @@ -258,6 +258,92 @@ "spaceLength": 10, "stack": false, "steppedLine": false, + "targets": [ + { + "expr": "time() - process_start_time_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=\"tiproxy\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Uptime", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "TiProxy current connection counts.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "expr": "tiproxy_server_connections{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", @@ -316,16 +402,16 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiProxy disconnection count per minute.", + "description": "TiProxy create connection count per minute.", "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 12, - "x": 12, + "x": 0, "y": 0 }, - "id": 6, + "id": 7, "legend": { "alignAsTable": false, "avg": false, @@ -353,17 +439,17 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(tiproxy_server_disconnection_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", + "expr": "sum(increase(tiproxy_server_create_connection_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{type}}", + "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Disconnection OPM", + "title": "Create Connection OPM", "tooltip": { "shared": true, "sort": 0, @@ -402,16 +488,16 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiProxy current goroutine counts.", + "description": "TiProxy disconnection count per minute.", "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 12, - "x": 0, + "x": 12, "y": 0 }, - "id": 7, + "id": 8, "legend": { "alignAsTable": false, "avg": false, @@ -439,24 +525,17 @@ "steppedLine": false, "targets": [ { - "expr": "go_goroutines{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=\"tiproxy\"}", + "expr": "sum(increase(tiproxy_server_disconnection_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (type)", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{instance}}", + "legendFormat": "{{type}}", "refId": "A" - }, - { - "expr": "sum(go_goroutines{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"tiproxy\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "total", - "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Goroutine Count", + "title": "Disconnection OPM", "tooltip": { "shared": true, "sort": 0, @@ -495,16 +574,16 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "TiProxy uptime since the last restart.", + "description": "TiProxy current goroutine counts.", "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 12, - "x": 12, + "x": 0, "y": 0 }, - "id": 8, + "id": 9, "legend": { "alignAsTable": false, "avg": false, @@ -532,17 +611,24 @@ "steppedLine": false, "targets": [ { - "expr": "time() - process_start_time_seconds{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=\"tiproxy\"}", + "expr": "go_goroutines{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=\"tiproxy\"}", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" + }, + { + "expr": "sum(go_goroutines{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", job=\"tiproxy\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "total", + "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Uptime", + "title": "Goroutine Count", "tooltip": { "shared": true, "sort": 0, @@ -558,7 +644,7 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -566,7 +652,7 @@ "show": true }, { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -593,7 +679,7 @@ "x": 0, "y": 0 }, - "id": 9, + "id": 10, "panels": [ { "aliasColors": { }, @@ -610,7 +696,7 @@ "x": 0, "y": 0 }, - "id": 10, + "id": 11, "legend": { "alignAsTable": false, "avg": false, @@ -710,7 +796,7 @@ "x": 12, "y": 0 }, - "id": 11, + "id": 12, "legend": { "alignAsTable": false, "avg": false, @@ -796,7 +882,7 @@ "x": 0, "y": 0 }, - "id": 12, + "id": 13, "legend": { "alignAsTable": false, "avg": false, @@ -882,7 +968,7 @@ "x": 12, "y": 0 }, - "id": 13, + "id": 14, "legend": { "alignAsTable": false, "avg": false, @@ -968,7 +1054,7 @@ "x": 0, "y": 0 }, - "id": 14, + "id": 15, "legend": { "alignAsTable": false, "avg": false, @@ -1054,7 +1140,7 @@ "x": 12, "y": 0 }, - "id": 15, + "id": 16, "legend": { "alignAsTable": false, "avg": false, @@ -1143,7 +1229,7 @@ "x": 0, "y": 0 }, - "id": 16, + "id": 17, "panels": [ { "aliasColors": { }, @@ -1160,7 +1246,7 @@ "x": 0, "y": 0 }, - "id": 17, + "id": 18, "legend": { "alignAsTable": false, "avg": false, @@ -1246,7 +1332,7 @@ "x": 12, "y": 0 }, - "id": 18, + "id": 19, "legend": { "alignAsTable": false, "avg": false, @@ -1332,7 +1418,7 @@ "x": 0, "y": 0 }, - "id": 19, + "id": 20, "legend": { "alignAsTable": false, "avg": false, @@ -1435,7 +1521,7 @@ "x": 0, "y": 0 }, - "id": 20, + "id": 21, "panels": [ { "aliasColors": { }, @@ -1452,7 +1538,7 @@ "x": 0, "y": 0 }, - "id": 21, + "id": 22, "legend": { "alignAsTable": false, "avg": false, @@ -1552,7 +1638,7 @@ "x": 12, "y": 0 }, - "id": 22, + "id": 23, "legend": { "alignAsTable": false, "avg": false, @@ -1638,7 +1724,7 @@ "x": 0, "y": 0 }, - "id": 23, + "id": 24, "legend": { "alignAsTable": false, "avg": false, diff --git a/pkg/metrics/grafana/tiproxy_summary.jsonnet b/pkg/metrics/grafana/tiproxy_summary.jsonnet index 887b0c90..b7ed5959 100644 --- a/pkg/metrics/grafana/tiproxy_summary.jsonnet +++ b/pkg/metrics/grafana/tiproxy_summary.jsonnet @@ -106,6 +106,20 @@ local connectionP = graphPanel.new( ) ); +local createConnP = graphPanel.new( + title='Create Connection OPM', + datasource=myDS, + legend_rightSide=true, + description='TiProxy create connection count per minute.', + format='short', +) +.addTarget( + prometheus.target( + 'sum(increase(tiproxy_server_create_connection_total{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}[1m]))', + legendFormat='{{instance}}', + ) +); + local disconnP = graphPanel.new( title='Disconnection OPM', datasource=myDS, @@ -420,10 +434,11 @@ newDash serverRow .addPanel(cpuP, gridPos=leftPanelPos) .addPanel(memP, gridPos=rightPanelPos) - .addPanel(connectionP, gridPos=leftPanelPos) + .addPanel(uptimeP, gridPos=leftPanelPos) + .addPanel(connectionP, gridPos=rightPanelPos) + .addPanel(createConnP, gridPos=leftPanelPos) .addPanel(disconnP, gridPos=rightPanelPos) .addPanel(goroutineP, gridPos=leftPanelPos) - .addPanel(uptimeP, gridPos=rightPanelPos) , gridPos=rowPos ) diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index e5cbf7b8..7ae0c4c1 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -154,6 +154,7 @@ func registerProxyMetrics() { prometheus.MustRegister(collectors.NewGoCollector(collectors.WithGoCollections(collectors.GoRuntimeMetricsCollection | collectors.GoRuntimeMemStatsCollection))) prometheus.MustRegister(ConnGauge) + prometheus.MustRegister(CreateConnCounter) prometheus.MustRegister(DisConnCounter) prometheus.MustRegister(MaxProcsGauge) prometheus.MustRegister(ServerEventCounter) diff --git a/pkg/metrics/server.go b/pkg/metrics/server.go index d93b8c4c..379eb851 100644 --- a/pkg/metrics/server.go +++ b/pkg/metrics/server.go @@ -26,6 +26,14 @@ var ( Help: "Number of connections.", }) + CreateConnCounter = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: ModuleProxy, + Subsystem: LabelServer, + Name: "create_connection_total", + Help: "Number of create connections.", + }) + DisConnCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: ModuleProxy, diff --git a/pkg/proxy/proxy.go b/pkg/proxy/proxy.go index 5c6baa8f..3cbc8873 100644 --- a/pkg/proxy/proxy.go +++ b/pkg/proxy/proxy.go @@ -180,6 +180,7 @@ func (s *SQLServer) onConn(ctx context.Context, conn net.Conn, addr string) { s.mu.Unlock() metrics.ConnGauge.Inc() + metrics.CreateConnCounter.Inc() defer func() { s.mu.Lock() diff --git a/pkg/proxy/proxy_test.go b/pkg/proxy/proxy_test.go index bc10a515..29ef7445 100644 --- a/pkg/proxy/proxy_test.go +++ b/pkg/proxy/proxy_test.go @@ -19,12 +19,53 @@ import ( "github.com/pingcap/tiproxy/lib/util/waitgroup" "github.com/pingcap/tiproxy/pkg/manager/cert" "github.com/pingcap/tiproxy/pkg/manager/router" + "github.com/pingcap/tiproxy/pkg/metrics" "github.com/pingcap/tiproxy/pkg/proxy/backend" "github.com/pingcap/tiproxy/pkg/proxy/client" pnet "github.com/pingcap/tiproxy/pkg/proxy/net" "github.com/stretchr/testify/require" ) +func TestCreateConn(t *testing.T) { + lg, _ := logger.CreateLoggerForTest(t) + certManager := cert.NewCertManager() + require.NoError(t, certManager.Init(&config.Config{}, lg, nil)) + server, err := NewSQLServer(lg, config.ProxyServer{}, certManager, &panicHsHandler{}) + require.NoError(t, err) + server.Run(context.Background(), nil) + defer func() { + require.NoError(t, server.Close()) + }() + + createConn := func() net.Conn { + conn, err := net.Dial("tcp", server.listeners[0].Addr().String()) + require.NoError(t, err) + return conn + } + checkMetrics := func(totalConns, createConns int) { + require.Eventually(t, func() bool { + connGauge, err := metrics.ReadGauge(metrics.ConnGauge) + require.NoError(t, err) + if totalConns != int(connGauge) { + return false + } + connCounter, err := metrics.ReadCounter(metrics.CreateConnCounter) + require.NoError(t, err) + return createConns == connCounter + }, time.Second, 10*time.Millisecond) + } + + checkMetrics(0, 0) + conn1 := createConn() + checkMetrics(1, 1) + conn2 := createConn() + checkMetrics(2, 2) + require.NoError(t, conn1.Close()) + checkMetrics(1, 2) + require.NoError(t, conn2.Close()) + checkMetrics(0, 2) +} + func TestGracefulCloseConn(t *testing.T) { // Graceful shutdown finishes immediately if there's no connection. lg, _ := logger.CreateLoggerForTest(t) From 0856a85b0551d331f6ba26d7dadcf6812e017f53 Mon Sep 17 00:00:00 2001 From: djshow832 <873581766@qq.com> Date: Tue, 19 Dec 2023 10:14:53 +0800 Subject: [PATCH 3/4] fix promQL --- pkg/metrics/grafana/tiproxy_summary.json | 2 +- pkg/metrics/grafana/tiproxy_summary.jsonnet | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/metrics/grafana/tiproxy_summary.json b/pkg/metrics/grafana/tiproxy_summary.json index 6490e515..8a61640b 100644 --- a/pkg/metrics/grafana/tiproxy_summary.json +++ b/pkg/metrics/grafana/tiproxy_summary.json @@ -439,7 +439,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(tiproxy_server_create_connection_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "expr": "sum(increase(tiproxy_server_create_connection_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance)", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}}", diff --git a/pkg/metrics/grafana/tiproxy_summary.jsonnet b/pkg/metrics/grafana/tiproxy_summary.jsonnet index b7ed5959..bcc76566 100644 --- a/pkg/metrics/grafana/tiproxy_summary.jsonnet +++ b/pkg/metrics/grafana/tiproxy_summary.jsonnet @@ -115,7 +115,7 @@ local createConnP = graphPanel.new( ) .addTarget( prometheus.target( - 'sum(increase(tiproxy_server_create_connection_total{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}[1m]))', + 'sum(increase(tiproxy_server_create_connection_total{k8s_cluster="$k8s_cluster", tidb_cluster="$tidb_cluster", instance=~"$instance"}[1m])) by (instance)', legendFormat='{{instance}}', ) ); From 2d85a05e49b7a2751b9df36e2e7baf2efe25bd26 Mon Sep 17 00:00:00 2001 From: djshow832 <873581766@qq.com> Date: Thu, 21 Dec 2023 19:45:38 +0800 Subject: [PATCH 4/4] remove github workflows --- .github/workflows/common.yml | 77 ---------------------------------- .github/workflows/main.yml | 66 ----------------------------- .github/workflows/release.yaml | 31 -------------- 3 files changed, 174 deletions(-) delete mode 100644 .github/workflows/common.yml delete mode 100644 .github/workflows/main.yml delete mode 100644 .github/workflows/release.yaml diff --git a/.github/workflows/common.yml b/.github/workflows/common.yml deleted file mode 100644 index f5cae548..00000000 --- a/.github/workflows/common.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: make -on: - workflow_call: - inputs: - debug: - type: boolean - description: "set tmate on failure" - required: true - target: - type: string - description: "makefile target" - required: true - ref: - type: string - description: "checkout specific ref" - required: true - all_platform: - type: boolean - description: "test on all platforms or not" - required: true - -defaults: - run: - shell: bash - -jobs: - make: - strategy: - matrix: - platform: ${{ inputs.all_platform && fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') || fromJSON('["ubuntu-latest"]') }} - runs-on: ${{ matrix.platform }} - steps: - - if: ${{ runner.os == 'Windows' }} - name: Use GNU tar for faster cache restore - shell: cmd - run: | - echo "Adding GNU tar to PATH" - echo C:\Program Files\Git\usr\bin>>"%GITHUB_PATH%" - - name: "checkout repo" - uses: actions/checkout@v3 - with: - ref: ${{ inputs.ref }} - - name: "setup golang" - uses: actions/setup-go@v3 - with: - go-version-file: go.mod - check-latest: true - - name: "set vars" - id: cache - run: | - echo "::set-output name=go_cache::$(go env GOCACHE)" - echo "::set-output name=go_mod_cache::$(go env GOMODCACHE)" - - name: "try to use build cache" - uses: actions/cache@v3 - with: - path: | - ${{ steps.cache.outputs.go_cache }} - ${{ steps.cache.outputs.go_mod_cache }} - key: ${{ runner.os }}-go-${{ inputs.target == 'cache' && github.run_id || hashFiles('**/go.sum')}} - restore-keys: | - ${{ runner.os }}-go- - - name : "tidy mod if needed" - id: tidy - if: ${{ inputs.target == 'cmd' && runner.os != 'Windows' }} - uses: evantorrie/mott-the-tidier@v1-beta - with: - gomods: | - go.mod - **/go.mod - - name: "fail if there are untidy files" - if: ${{ steps.tidy.outputs.changedfiles != '' }} - run: exit 2 - - name: make ${{ inputs.target }} - run: make ${{ inputs.target }} - - name: "set up tmate session if necessary" - if: ${{ failure() && inputs.debug }} - uses: mxschmitt/action-tmate@v3 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index 6a393375..00000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: main -on: - workflow_dispatch: - inputs: - debug: - type: boolean - description: "Run the build with tmate debugging enabled" - required: false - default: false - ref: - type: string - description: "Checkout specific ref for the workflow" - required: false - pull_request: - branches: - - main - - release-* - # cache only shares from the same branch, we need to trigger an extra flow on pushing - # thus all PRs will share the cache from main/release-* branches - push: - branches: - - main - - release-* - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - cmd: - if: ${{ github.event_name != 'push' }} - uses: ./.github/workflows/common.yml - with: - debug: ${{ github.event_name == 'workflow_dispatch' && inputs.debug }} - ref: ${{ inputs.ref || github.ref }} - target: "cmd" - all_platform: true - - lint: - if: ${{ github.event_name != 'push' }} - needs: cmd - uses: ./.github/workflows/common.yml - with: - debug: ${{ github.event_name == 'workflow_dispatch' && inputs.debug }} - ref: ${{ inputs.ref || github.ref }} - target: "lint" - all_platform: false - - test: - if: ${{ github.event_name != 'push' }} - needs: cmd - uses: ./.github/workflows/common.yml - with: - debug: ${{ github.event_name == 'workflow_dispatch' && inputs.debug }} - ref: ${{ inputs.ref || github.ref }} - target: "test" - all_platform: false - - cache: - if: ${{ github.event_name == 'push' }} - uses: ./.github/workflows/common.yml - with: - ref: ${{ inputs.ref || github.ref }} - debug: false - target: "build" - all_platform: true diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml deleted file mode 100644 index 08df0267..00000000 --- a/.github/workflows/release.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: goreleaser - -on: - push: - tags: - - v* - -permissions: - contents: write - -jobs: - goreleaser: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - run: git fetch --force --tags - - uses: actions/setup-go@v4 - with: - go-version: stable - # More assembly might be required: Docker logins, GPG, etc. It all depends - # on your needs. - - uses: goreleaser/goreleaser-action@v4 - with: - # either 'goreleaser' (default) or 'goreleaser-pro': - distribution: goreleaser - version: latest - args: release --clean - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}