Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mcs: collect scheduling statistics #7112

Merged
merged 4 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 124 additions & 131 deletions metrics/grafana/pd.json

Large diffs are not rendered by default.

66 changes: 65 additions & 1 deletion pkg/mcs/scheduling/server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,12 +425,76 @@
c.coordinator.RunUntilStop()
}

func (c *Cluster) runMetricsCollectionJob() {
defer logutil.LogPanic()
defer c.wg.Done()

ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()

for {
select {
case <-c.ctx.Done():
log.Info("metrics are reset")
c.resetMetrics()
log.Info("metrics collection job has been stopped")
return
case <-ticker.C:
c.collectMetrics()
}
}
}

func (c *Cluster) collectMetrics() {
statsMap := statistics.NewStoreStatisticsMap(c.persistConfig)
stores := c.GetStores()
for _, s := range stores {
statsMap.Observe(s)
statsMap.ObserveHotStat(s, c.hotStat.StoresStats)
}
statsMap.Collect()

c.coordinator.GetSchedulersController().CollectSchedulerMetrics()
c.coordinator.CollectHotSpotMetrics()
c.collectClusterMetrics()
}

func (c *Cluster) collectClusterMetrics() {
if c.regionStats == nil {
return

Check warning on line 464 in pkg/mcs/scheduling/server/cluster.go

View check run for this annotation

Codecov / codecov/patch

pkg/mcs/scheduling/server/cluster.go#L464

Added line #L464 was not covered by tests
}
c.regionStats.Collect()
c.labelStats.Collect()
// collect hot cache metrics
c.hotStat.CollectMetrics()
}

func (c *Cluster) resetMetrics() {
statsMap := statistics.NewStoreStatisticsMap(c.persistConfig)
statsMap.Reset()

c.coordinator.GetSchedulersController().ResetSchedulerMetrics()
c.coordinator.ResetHotSpotMetrics()
c.resetClusterMetrics()
}

func (c *Cluster) resetClusterMetrics() {
if c.regionStats == nil {
return

Check warning on line 483 in pkg/mcs/scheduling/server/cluster.go

View check run for this annotation

Codecov / codecov/patch

pkg/mcs/scheduling/server/cluster.go#L483

Added line #L483 was not covered by tests
}
c.regionStats.Reset()
c.labelStats.Reset()
// reset hot cache metrics
c.hotStat.ResetMetrics()
}

// StartBackgroundJobs starts background jobs.
func (c *Cluster) StartBackgroundJobs() {
c.wg.Add(3)
c.wg.Add(4)
go c.updateScheduler()
go c.runUpdateStoreStats()
go c.runCoordinator()
go c.runMetricsCollectionJob()
c.running.Store(true)
}

Expand Down
17 changes: 16 additions & 1 deletion pkg/mcs/scheduling/server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,11 @@ func (o *PersistConfig) IsSchedulingHalted() bool {
return o.GetScheduleConfig().HaltScheduling
}

// GetStoresLimit gets the stores' limit.
func (o *PersistConfig) GetStoresLimit() map[uint64]sc.StoreLimitConfig {
return o.GetScheduleConfig().StoreLimit
}

// GetStoreLimitByType returns the limit of a store with a given type.
func (o *PersistConfig) GetStoreLimitByType(storeID uint64, typ storelimit.Type) (returned float64) {
limit := o.GetStoreLimit(storeID)
Expand Down Expand Up @@ -620,11 +625,21 @@ func (o *PersistConfig) GetRegionMaxSize() uint64 {
return o.GetStoreConfig().GetRegionMaxSize()
}

// GetRegionMaxKeys returns the region split keys
// GetRegionMaxKeys returns the max region keys
func (o *PersistConfig) GetRegionMaxKeys() uint64 {
return o.GetStoreConfig().GetRegionMaxKeys()
}

// GetRegionSplitSize returns the region split size in MB
func (o *PersistConfig) GetRegionSplitSize() uint64 {
return o.GetStoreConfig().GetRegionSplitSize()
}

// GetRegionSplitKeys returns the region split keys
func (o *PersistConfig) GetRegionSplitKeys() uint64 {
return o.GetStoreConfig().GetRegionSplitKeys()
}

// IsEnableRegionBucket return true if the region bucket is enabled.
func (o *PersistConfig) IsEnableRegionBucket() bool {
return o.GetStoreConfig().IsEnableRegionBucket()
Expand Down
3 changes: 3 additions & 0 deletions pkg/schedule/config/config_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type SchedulerConfigProvider interface {
SharedConfigProvider

IsSchedulingHalted() bool
GetStoresLimit() map[uint64]StoreLimitConfig

IsSchedulerDisabled(string) bool
AddSchedulerCfg(string, []string)
Expand Down Expand Up @@ -137,6 +138,8 @@ type ConfProvider interface {
type StoreConfigProvider interface {
GetRegionMaxSize() uint64
GetRegionMaxKeys() uint64
GetRegionSplitSize() uint64
GetRegionSplitKeys() uint64
CheckRegionSize(uint64, uint64) error
CheckRegionKeys(uint64, uint64) error
IsEnableRegionBucket() bool
Expand Down
1 change: 1 addition & 0 deletions pkg/schedule/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ func (c *Coordinator) PatrolRegions() {
// Note: we reset the ticker here to support updating configuration dynamically.
ticker.Reset(c.cluster.GetCheckerConfig().GetPatrolRegionInterval())
case <-c.ctx.Done():
patrolCheckRegionsGauge.Set(0)
log.Info("patrol regions has been stopped")
return
}
Expand Down
19 changes: 9 additions & 10 deletions pkg/statistics/store_collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import (
"github.com/tikv/pd/pkg/core"
"github.com/tikv/pd/pkg/core/constant"
"github.com/tikv/pd/pkg/core/storelimit"
"github.com/tikv/pd/pkg/schedule/config"
"github.com/tikv/pd/pkg/statistics/utils"
"github.com/tikv/pd/server/config"
)

const (
Expand All @@ -32,7 +32,7 @@ const (
)

type storeStatistics struct {
opt *config.PersistOptions
opt config.ConfProvider
Up int
Disconnect int
Unhealthy int
Expand All @@ -54,7 +54,7 @@ type storeStatistics struct {
Removed int
}

func newStoreStatistics(opt *config.PersistOptions) *storeStatistics {
func newStoreStatistics(opt config.ConfProvider) *storeStatistics {
return &storeStatistics{
opt: opt,
LabelCounter: make(map[string]int),
Expand Down Expand Up @@ -222,11 +222,10 @@ func (s *storeStatistics) Collect() {
configs["max-snapshot-count"] = float64(s.opt.GetMaxSnapshotCount())
configs["max-merge-region-size"] = float64(s.opt.GetMaxMergeRegionSize())
configs["max-merge-region-keys"] = float64(s.opt.GetMaxMergeRegionKeys())
storeConfig := s.opt.GetStoreConfig()
configs["region-max-size"] = float64(storeConfig.GetRegionMaxSize())
configs["region-split-size"] = float64(storeConfig.GetRegionSplitSize())
configs["region-split-keys"] = float64(storeConfig.GetRegionSplitKeys())
configs["region-max-keys"] = float64(storeConfig.GetRegionMaxKeys())
configs["region-max-size"] = float64(s.opt.GetRegionMaxSize())
configs["region-split-size"] = float64(s.opt.GetRegionSplitSize())
configs["region-split-keys"] = float64(s.opt.GetRegionSplitKeys())
configs["region-max-keys"] = float64(s.opt.GetRegionMaxKeys())

var enableMakeUpReplica, enableRemoveDownReplica, enableRemoveExtraReplica, enableReplaceOfflineReplica float64
if s.opt.IsMakeUpReplicaEnabled() {
Expand Down Expand Up @@ -290,12 +289,12 @@ func (s *storeStatistics) resetStoreStatistics(storeAddress string, id string) {
}

type storeStatisticsMap struct {
opt *config.PersistOptions
opt config.ConfProvider
stats *storeStatistics
}

// NewStoreStatisticsMap creates a new storeStatisticsMap.
func NewStoreStatisticsMap(opt *config.PersistOptions) *storeStatisticsMap {
func NewStoreStatisticsMap(opt config.ConfProvider) *storeStatisticsMap {
return &storeStatisticsMap{
opt: opt,
stats: newStoreStatistics(opt),
Expand Down
15 changes: 6 additions & 9 deletions server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2153,17 +2153,14 @@ func (c *RaftCluster) deleteStore(store *core.StoreInfo) error {
}

func (c *RaftCluster) collectMetrics() {
statsMap := statistics.NewStoreStatisticsMap(c.opt)
stores := c.GetStores()
for _, s := range stores {
statsMap.Observe(s)
if !c.isAPIServiceMode {
if !c.isAPIServiceMode {
statsMap := statistics.NewStoreStatisticsMap(c.opt)
stores := c.GetStores()
for _, s := range stores {
statsMap.Observe(s)
statsMap.ObserveHotStat(s, c.hotStat.StoresStats)
}
}
statsMap.Collect()

if !c.isAPIServiceMode {
statsMap.Collect()
c.coordinator.GetSchedulersController().CollectSchedulerMetrics()
c.coordinator.CollectHotSpotMetrics()
c.collectClusterMetrics()
Expand Down
2 changes: 1 addition & 1 deletion server/config/persist_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ func (o *PersistOptions) GetRegionMaxSize() uint64 {
return o.GetStoreConfig().GetRegionMaxSize()
}

// GetRegionMaxKeys returns the region split keys
// GetRegionMaxKeys returns the max region keys
func (o *PersistOptions) GetRegionMaxKeys() uint64 {
return o.GetStoreConfig().GetRegionMaxKeys()
}
Expand Down
13 changes: 2 additions & 11 deletions tools/pd-api-bench/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,27 @@ require (
github.com/elliotchance/pie/v2 v2.1.0 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/gin-gonic/gin v1.8.1 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-playground/locales v0.14.0 // indirect
github.com/go-playground/universal-translator v0.18.0 // indirect
github.com/go-playground/validator/v10 v10.10.0 // indirect
github.com/goccy/go-json v0.9.7 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt v3.2.1+incompatible // indirect
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/btree v1.1.2 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/gorilla/mux v1.7.4 // indirect
github.com/gorilla/websocket v1.4.2 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/jonboulle/clockwork v0.2.2 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect
github.com/leodido/go-urn v1.2.1 // indirect
github.com/lufia/plan9stats v0.0.0-20230326075908-cb1d2100619a // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
Expand All @@ -72,30 +71,22 @@ require (
github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 // indirect
github.com/pingcap/kvproto v0.0.0-20230920042517-db656f45023b // indirect
github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 // indirect
github.com/pingcap/sysutil v1.0.1-0.20230407040306-fb007c5aff21 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/power-devops/perfstat v0.0.0-20221212215047-62379fc7944b // indirect
github.com/prometheus/client_golang v1.11.1 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.26.0 // indirect
github.com/prometheus/procfs v0.6.0 // indirect
github.com/sasha-s/go-deadlock v0.2.0 // indirect
github.com/shirou/gopsutil/v3 v3.23.3 // indirect
github.com/shoenig/go-m1cpu v0.1.5 // indirect
github.com/sirupsen/logrus v1.6.0 // indirect
github.com/smallnest/chanx v0.0.0-20221229104322-eb4c998d2072 // indirect
github.com/soheilhy/cmux v0.1.4 // indirect
github.com/spf13/cobra v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.8.2 // indirect
github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 // indirect
github.com/tklauser/go-sysconf v0.3.11 // indirect
github.com/tklauser/numcpus v0.6.0 // indirect
github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect
github.com/ugorji/go/codec v1.2.7 // indirect
github.com/unrolled/render v1.0.1 // indirect
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.etcd.io/bbolt v1.3.6 // indirect
go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 // indirect
go.uber.org/atomic v1.10.0 // indirect
Expand Down
Loading
Loading