diff --git a/pkg/mcs/resource_manager/server/manager.go b/pkg/mcs/resource_manager/server/manager.go index 1f0e33f41a3..8e4b23c8c9f 100644 --- a/pkg/mcs/resource_manager/server/manager.go +++ b/pkg/mcs/resource_manager/server/manager.go @@ -42,7 +42,7 @@ const ( defaultConsumptionChanSize = 1024 metricsCleanupInterval = time.Minute metricsCleanupTimeout = 20 * time.Minute - metricsAvailableRUInterval = 1 * time.Second + metricsAvailableRUInterval = 30 * time.Second defaultCollectIntervalSec = 20 tickPerSecond = time.Second @@ -420,7 +420,6 @@ func (m *Manager) backgroundMetricsFlush(ctx context.Context) { // Clean up the metrics that have not been updated for a long time. for name, lastTime := range m.consumptionRecord { if time.Since(lastTime) > metricsCleanupTimeout { -<<<<<<< HEAD:pkg/mcs/resource_manager/server/manager.go readRequestUnitCost.DeleteLabelValues(name) writeRequestUnitCost.DeleteLabelValues(name) sqlLayerRequestUnitCost.DeleteLabelValues(name) @@ -432,22 +431,9 @@ func (m *Manager) backgroundMetricsFlush(ctx context.Context) { requestCount.DeleteLabelValues(name, writeTypeLabel) availableRUCounter.DeleteLabelValues(name) delete(m.consumptionRecord, name) -======= - readRequestUnitCost.DeleteLabelValues(r.name, r.name, r.ruType) - writeRequestUnitCost.DeleteLabelValues(r.name, r.name, r.ruType) - sqlLayerRequestUnitCost.DeleteLabelValues(r.name, r.name, r.ruType) - readByteCost.DeleteLabelValues(r.name, r.name, r.ruType) - writeByteCost.DeleteLabelValues(r.name, r.name, r.ruType) - kvCPUCost.DeleteLabelValues(r.name, r.name, r.ruType) - sqlCPUCost.DeleteLabelValues(r.name, r.name, r.ruType) - requestCount.DeleteLabelValues(r.name, r.name, readTypeLabel) - requestCount.DeleteLabelValues(r.name, r.name, writeTypeLabel) - availableRUCounter.DeleteLabelValues(r.name, r.name, r.ruType) - delete(m.consumptionRecord, r) - delete(maxPerSecTrackers, r.name) - readRequestUnitMaxPerSecCost.DeleteLabelValues(r.name) - writeRequestUnitMaxPerSecCost.DeleteLabelValues(r.name) ->>>>>>> 52e876337 (resource_manager: record the max RU per second (#7936)):pkg/mcs/resourcemanager/server/manager.go + delete(maxPerSecTrackers, name) + readRequestUnitMaxPerSecCost.DeleteLabelValues(name) + writeRequestUnitMaxPerSecCost.DeleteLabelValues(name) } } case <-availableRUTicker.C: @@ -466,10 +452,7 @@ func (m *Manager) backgroundMetricsFlush(ctx context.Context) { if ru < 0 { ru = 0 } -<<<<<<< HEAD:pkg/mcs/resource_manager/server/manager.go - availableRUCounter.WithLabelValues(name).Set(ru) -======= - availableRUCounter.WithLabelValues(group.Name, group.Name).Set(ru) + availableRUCounter.WithLabelValues(group.Name).Set(ru) } case <-recordMaxTicker.C: @@ -478,7 +461,6 @@ func (m *Manager) backgroundMetricsFlush(ctx context.Context) { names := make([]string, 0, len(m.groups)) for name := range m.groups { names = append(names, name) ->>>>>>> 52e876337 (resource_manager: record the max RU per second (#7936)):pkg/mcs/resourcemanager/server/manager.go } m.RUnlock() for _, name := range names { diff --git a/pkg/mcs/resource_manager/server/metrics.go b/pkg/mcs/resource_manager/server/metrics.go index c33cafa3627..dec877d8c86 100644 --- a/pkg/mcs/resource_manager/server/metrics.go +++ b/pkg/mcs/resource_manager/server/metrics.go @@ -19,14 +19,15 @@ import ( ) const ( - namespace = "resource_manager" - serverSubsystem = "server" - ruSubsystem = "resource_unit" - resourceSubsystem = "resource" - resourceGroupNameLabel = "name" - typeLabel = "type" - readTypeLabel = "read" - writeTypeLabel = "write" + namespace = "resource_manager" + serverSubsystem = "server" + ruSubsystem = "resource_unit" + resourceSubsystem = "resource" + resourceGroupNameLabel = "name" + typeLabel = "type" + readTypeLabel = "read" + writeTypeLabel = "write" + newResourceGroupNameLabel = "resource_group" ) var ( @@ -55,6 +56,21 @@ var ( Help: "Bucketed histogram of the write request unit cost for all resource groups.", Buckets: prometheus.ExponentialBuckets(3, 10, 5), // 3 ~ 300000 }, []string{resourceGroupNameLabel}) + readRequestUnitMaxPerSecCost = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: ruSubsystem, + Name: "read_request_unit_max_per_sec", + Help: "Gauge of the max read request unit per second for all resource groups.", + }, []string{newResourceGroupNameLabel}) + writeRequestUnitMaxPerSecCost = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: ruSubsystem, + Name: "write_request_unit_max_per_sec", + Help: "Gauge of the max write request unit per second for all resource groups.", + }, []string{newResourceGroupNameLabel}) + sqlLayerRequestUnitCost = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: namespace, diff --git a/pkg/mcs/resourcemanager/server/metrics.go b/pkg/mcs/resourcemanager/server/metrics.go deleted file mode 100644 index 45c94e5c735..00000000000 --- a/pkg/mcs/resourcemanager/server/metrics.go +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright 2023 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package server - -import "github.com/prometheus/client_golang/prometheus" - -const ( - namespace = "resource_manager" - serverSubsystem = "server" - ruSubsystem = "resource_unit" - resourceSubsystem = "resource" - resourceGroupNameLabel = "name" - typeLabel = "type" - readTypeLabel = "read" - writeTypeLabel = "write" - backgroundTypeLabel = "background" - tiflashTypeLabel = "ap" - defaultTypeLabel = "tp" - newResourceGroupNameLabel = "resource_group" -) - -var ( - // RU cost metrics. - // `sum` is added to the name to maintain compatibility with the previous use of histogram. - readRequestUnitCost = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: ruSubsystem, - Name: "read_request_unit_sum", - Help: "Counter of the read request unit cost for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel}) - writeRequestUnitCost = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: ruSubsystem, - Name: "write_request_unit_sum", - Help: "Counter of the write request unit cost for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel}) - - readRequestUnitMaxPerSecCost = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: ruSubsystem, - Name: "read_request_unit_max_per_sec", - Help: "Gauge of the max read request unit per second for all resource groups.", - }, []string{newResourceGroupNameLabel}) - writeRequestUnitMaxPerSecCost = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: ruSubsystem, - Name: "write_request_unit_max_per_sec", - Help: "Gauge of the max write request unit per second for all resource groups.", - }, []string{newResourceGroupNameLabel}) - - sqlLayerRequestUnitCost = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: ruSubsystem, - Name: "sql_layer_request_unit_sum", - Help: "The number of the sql layer request unit cost for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel}) - - // Resource cost metrics. - readByteCost = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: resourceSubsystem, - Name: "read_byte_sum", - Help: "Counter of the read byte cost for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel}) - writeByteCost = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: resourceSubsystem, - Name: "write_byte_sum", - Help: "Counter of the write byte cost for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel}) - kvCPUCost = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: resourceSubsystem, - Name: "kv_cpu_time_ms_sum", - Help: "Counter of the KV CPU time cost in milliseconds for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel}) - sqlCPUCost = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: resourceSubsystem, - Name: "sql_cpu_time_ms_sum", - Help: "Counter of the SQL CPU time cost in milliseconds for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel}) - requestCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: resourceSubsystem, - Name: "request_count", - Help: "The number of read/write requests for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel}) - - availableRUCounter = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: ruSubsystem, - Name: "available_ru", - Help: "Counter of the available RU for all resource groups.", - }, []string{resourceGroupNameLabel, newResourceGroupNameLabel}) -) - -func init() { - prometheus.MustRegister(readRequestUnitCost) - prometheus.MustRegister(writeRequestUnitCost) - prometheus.MustRegister(sqlLayerRequestUnitCost) - prometheus.MustRegister(readByteCost) - prometheus.MustRegister(writeByteCost) - prometheus.MustRegister(kvCPUCost) - prometheus.MustRegister(sqlCPUCost) - prometheus.MustRegister(requestCount) - prometheus.MustRegister(availableRUCounter) - prometheus.MustRegister(readRequestUnitMaxPerSecCost) - prometheus.MustRegister(writeRequestUnitMaxPerSecCost) -}