diff --git a/OWNERS b/OWNERS index 5911dfd3b66..0c8b972be1e 100644 --- a/OWNERS +++ b/OWNERS @@ -22,5 +22,6 @@ reviewers: - BusyJay - howardlau1999 - Luffbee + - okJiang - shafreeck - xhebox diff --git a/client/http/interface.go b/client/http/interface.go index f90ab19624f..cd9fc22702e 100644 --- a/client/http/interface.go +++ b/client/http/interface.go @@ -715,7 +715,7 @@ func (c *client) GetRegionLabelRulesByIDs(ctx context.Context, ruleIDs []string) var labelRules []*LabelRule err = c.request(ctx, newRequestInfo(). WithName(getRegionLabelRulesByIDsName). - WithURI(RegionLabelRules). + WithURI(RegionLabelRulesByIDs). WithMethod(http.MethodGet). WithBody(idsJSON). WithResp(&labelRules)) diff --git a/client/resource_group/controller/controller.go b/client/resource_group/controller/controller.go index cc18817d9c5..8e32b156a61 100644 --- a/client/resource_group/controller/controller.go +++ b/client/resource_group/controller/controller.go @@ -46,6 +46,8 @@ const ( slowNotifyFilterDuration = 10 * time.Millisecond watchRetryInterval = 30 * time.Second + + bigRequestThreshold = 4 * 1024 * 1024 // 4MB -> 16 RRU ) type selectType int @@ -69,6 +71,9 @@ type ResourceGroupKVInterceptor interface { OnRequestWait(ctx context.Context, resourceGroupName string, info RequestInfo) (*rmpb.Consumption, *rmpb.Consumption, time.Duration, uint32, error) // OnResponse is used to consume tokens after receiving response. OnResponse(resourceGroupName string, req RequestInfo, resp ResponseInfo) (*rmpb.Consumption, error) + // OnResponseWait is used to consume tokens after receiving a response. If the response requires many tokens, we need to wait for the tokens. + // This is an optimized version of OnResponse for cases where the response requires many tokens, making the debt smaller and smoother. + OnResponseWait(ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo) (*rmpb.Consumption, time.Duration, error) // IsBackgroundRequest If the resource group has background jobs, we should not record consumption and wait for it. IsBackgroundRequest(ctx context.Context, resourceGroupName, requestResource string) bool } @@ -642,7 +647,7 @@ func (c *ResourceGroupsController) OnRequestWait( if err != nil { return nil, nil, time.Duration(0), 0, err } - return gc.onRequestWait(ctx, info) + return gc.onRequestWaitImpl(ctx, info) } // OnResponse is used to consume tokens after receiving response @@ -654,7 +659,19 @@ func (c *ResourceGroupsController) OnResponse( log.Warn("[resource group controller] resource group name does not exist", zap.String("name", resourceGroupName)) return &rmpb.Consumption{}, nil } - return gc.onResponse(req, resp) + return gc.onResponseImpl(req, resp) +} + +// OnResponseWait is used to consume tokens after receiving response +func (c *ResourceGroupsController) OnResponseWait( + ctx context.Context, resourceGroupName string, req RequestInfo, resp ResponseInfo, +) (*rmpb.Consumption, time.Duration, error) { + gc, ok := c.loadGroupController(resourceGroupName) + if !ok { + log.Warn("[resource group controller] resource group name does not exist", zap.String("name", resourceGroupName)) + return &rmpb.Consumption{}, time.Duration(0), nil + } + return gc.onResponseWaitImpl(ctx, req, resp) } // IsBackgroundRequest If the resource group has background jobs, we should not record consumption and wait for it. @@ -722,6 +739,8 @@ type groupCostController struct { // fast path to make once token limit with un-limit burst. burstable *atomic.Bool + // is throttled + isThrottled *atomic.Bool lowRUNotifyChan chan<- notifyMsg tokenBucketUpdateChan chan<- *groupCostController @@ -770,6 +789,8 @@ type groupMetricsCollection struct { failedRequestCounterWithOthers prometheus.Counter failedRequestCounterWithThrottled prometheus.Counter tokenRequestCounter prometheus.Counter + runningKVRequestCounter prometheus.Gauge + consumeTokenHistogram prometheus.Observer } func initMetrics(oldName, name string) *groupMetricsCollection { @@ -784,6 +805,8 @@ func initMetrics(oldName, name string) *groupMetricsCollection { failedRequestCounterWithThrottled: failedRequestCounter.WithLabelValues(oldName, name, throttledType), requestRetryCounter: requestRetryCounter.WithLabelValues(oldName, name), tokenRequestCounter: resourceGroupTokenRequestCounter.WithLabelValues(oldName, name), + runningKVRequestCounter: groupRunningKVRequestCounter.WithLabelValues(name), + consumeTokenHistogram: tokenConsumedHistogram.WithLabelValues(name), } } @@ -841,6 +864,7 @@ func newGroupCostController( tokenBucketUpdateChan: tokenBucketUpdateChan, lowRUNotifyChan: lowRUNotifyChan, burstable: &atomic.Bool{}, + isThrottled: &atomic.Bool{}, } switch gc.mode { @@ -937,7 +961,7 @@ func (gc *groupCostController) updateRunState() { } *gc.run.consumption = *gc.mu.consumption gc.mu.Unlock() - logControllerTrace("[resource group controller] update run state", zap.String("name", gc.name), zap.Any("request-unit-consumption", gc.run.consumption)) + logControllerTrace("[resource group controller] update run state", zap.String("name", gc.name), zap.Any("request-unit-consumption", gc.run.consumption), zap.Bool("is-throttled", gc.isThrottled.Load())) gc.run.now = newTime } @@ -1018,7 +1042,7 @@ func (gc *groupCostController) updateAvgRaWResourcePerSec() { if !gc.calcAvg(counter, getRawResourceValueFromConsumption(gc.run.consumption, typ)) { continue } - logControllerTrace("[resource group controller] update avg raw resource per sec", zap.String("name", gc.name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec)) + logControllerTrace("[resource group controller] update avg raw resource per sec", zap.String("name", gc.name), zap.String("type", rmpb.RawResourceType_name[int32(typ)]), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec), zap.Bool("is-throttled", gc.isThrottled.Load())) } gc.burstable.Store(isBurstable) } @@ -1032,7 +1056,7 @@ func (gc *groupCostController) updateAvgRUPerSec() { if !gc.calcAvg(counter, getRUValueFromConsumption(gc.run.consumption, typ)) { continue } - logControllerTrace("[resource group controller] update avg ru per sec", zap.String("name", gc.name), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec)) + logControllerTrace("[resource group controller] update avg ru per sec", zap.String("name", gc.name), zap.String("type", rmpb.RequestUnitType_name[int32(typ)]), zap.Float64("avg-ru-per-sec", counter.avgRUPerSec), zap.Bool("is-throttled", gc.isThrottled.Load())) } gc.burstable.Store(isBurstable) } @@ -1179,6 +1203,7 @@ func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket if cfg.NewBurst < 0 { cfg.NewTokens = float64(counter.getTokenBucketFunc().Settings.FillRate) } + gc.isThrottled.Store(false) } else { // Otherwise the granted token is delivered to the client by fill rate. cfg.NewTokens = 0 @@ -1199,6 +1224,7 @@ func (gc *groupCostController) modifyTokenCounter(counter *tokenCounter, bucket counter.notify.setupNotificationThreshold = 1 counter.notify.mu.Unlock() counter.lastDeadline = deadline + gc.isThrottled.Store(true) select { case gc.tokenBucketUpdateChan <- gc: default: @@ -1317,7 +1343,55 @@ func (gc *groupCostController) calcRequest(counter *tokenCounter) float64 { return value } -func (gc *groupCostController) onRequestWait( +func (gc *groupCostController) acquireTokens(ctx context.Context, delta *rmpb.Consumption, waitDuration *time.Duration, allowDebt bool) (time.Duration, error) { + gc.metrics.runningKVRequestCounter.Inc() + defer gc.metrics.runningKVRequestCounter.Dec() + var ( + err error + d time.Duration + ) +retryLoop: + for i := 0; i < gc.mainCfg.WaitRetryTimes; i++ { + now := time.Now() + switch gc.mode { + case rmpb.GroupMode_RawMode: + res := make([]*Reservation, 0, len(requestResourceLimitTypeList)) + for typ, counter := range gc.run.resourceTokens { + if v := getRawResourceValueFromConsumption(delta, typ); v > 0 { + res = append(res, counter.limiter.Reserve(ctx, gc.mainCfg.LTBMaxWaitDuration, now, v)) + } + } + if d, err = WaitReservations(ctx, now, res); err == nil || errs.ErrClientResourceGroupThrottled.NotEqual(err) { + break retryLoop + } + case rmpb.GroupMode_RUMode: + res := make([]*Reservation, 0, len(requestUnitLimitTypeList)) + for typ, counter := range gc.run.requestUnitTokens { + if v := getRUValueFromConsumption(delta, typ); v > 0 { + // record the consume token histogram if enable controller debug mode. + if enableControllerTraceLog.Load() { + gc.metrics.consumeTokenHistogram.Observe(v) + } + // allow debt for small request or not in throttled. remove tokens directly. + if allowDebt { + counter.limiter.RemoveTokens(now, v) + break retryLoop + } + res = append(res, counter.limiter.Reserve(ctx, gc.mainCfg.LTBMaxWaitDuration, now, v)) + } + } + if d, err = WaitReservations(ctx, now, res); err == nil || errs.ErrClientResourceGroupThrottled.NotEqual(err) { + break retryLoop + } + } + gc.metrics.requestRetryCounter.Inc() + time.Sleep(gc.mainCfg.WaitRetryInterval) + *waitDuration += gc.mainCfg.WaitRetryInterval + } + return d, err +} + +func (gc *groupCostController) onRequestWaitImpl( ctx context.Context, info RequestInfo, ) (*rmpb.Consumption, *rmpb.Consumption, time.Duration, uint32, error) { delta := &rmpb.Consumption{} @@ -1331,38 +1405,7 @@ func (gc *groupCostController) onRequestWait( var waitDuration time.Duration if !gc.burstable.Load() { - var err error - now := time.Now() - var i int - var d time.Duration - retryLoop: - for i = 0; i < gc.mainCfg.WaitRetryTimes; i++ { - switch gc.mode { - case rmpb.GroupMode_RawMode: - res := make([]*Reservation, 0, len(requestResourceLimitTypeList)) - for typ, counter := range gc.run.resourceTokens { - if v := getRawResourceValueFromConsumption(delta, typ); v > 0 { - res = append(res, counter.limiter.Reserve(ctx, gc.mainCfg.LTBMaxWaitDuration, now, v)) - } - } - if d, err = WaitReservations(ctx, now, res); err == nil || errs.ErrClientResourceGroupThrottled.NotEqual(err) { - break retryLoop - } - case rmpb.GroupMode_RUMode: - res := make([]*Reservation, 0, len(requestUnitLimitTypeList)) - for typ, counter := range gc.run.requestUnitTokens { - if v := getRUValueFromConsumption(delta, typ); v > 0 { - res = append(res, counter.limiter.Reserve(ctx, gc.mainCfg.LTBMaxWaitDuration, now, v)) - } - } - if d, err = WaitReservations(ctx, now, res); err == nil || errs.ErrClientResourceGroupThrottled.NotEqual(err) { - break retryLoop - } - } - gc.metrics.requestRetryCounter.Inc() - time.Sleep(gc.mainCfg.WaitRetryInterval) - waitDuration += gc.mainCfg.WaitRetryInterval - } + d, err := gc.acquireTokens(ctx, delta, &waitDuration, false) if err != nil { if errs.ErrClientResourceGroupThrottled.Equal(err) { gc.metrics.failedRequestCounterWithThrottled.Inc() @@ -1399,7 +1442,7 @@ func (gc *groupCostController) onRequestWait( return delta, penalty, waitDuration, gc.getMeta().GetPriority(), nil } -func (gc *groupCostController) onResponse( +func (gc *groupCostController) onResponseImpl( req RequestInfo, resp ResponseInfo, ) (*rmpb.Consumption, error) { delta := &rmpb.Consumption{} @@ -1440,6 +1483,47 @@ func (gc *groupCostController) onResponse( return delta, nil } +func (gc *groupCostController) onResponseWaitImpl( + ctx context.Context, req RequestInfo, resp ResponseInfo, +) (*rmpb.Consumption, time.Duration, error) { + delta := &rmpb.Consumption{} + for _, calc := range gc.calculators { + calc.AfterKVRequest(delta, req, resp) + } + var waitDuration time.Duration + if !gc.burstable.Load() { + allowDebt := delta.ReadBytes+delta.WriteBytes < bigRequestThreshold || !gc.isThrottled.Load() + d, err := gc.acquireTokens(ctx, delta, &waitDuration, allowDebt) + if err != nil { + if errs.ErrClientResourceGroupThrottled.Equal(err) { + gc.metrics.failedRequestCounterWithThrottled.Inc() + gc.metrics.failedLimitReserveDuration.Observe(d.Seconds()) + } else { + gc.metrics.failedRequestCounterWithOthers.Inc() + } + return nil, waitDuration, err + } + gc.metrics.successfulRequestDuration.Observe(d.Seconds()) + waitDuration += d + } + + gc.mu.Lock() + // Record the consumption of the request + add(gc.mu.consumption, delta) + // Record the consumption of the request by store + count := &rmpb.Consumption{} + *count = *delta + // As the penalty is only counted when the request is completed, so here needs to calculate the write cost which is added in `BeforeKVRequest` + for _, calc := range gc.calculators { + calc.BeforeKVRequest(count, req) + } + add(gc.mu.storeCounter[req.StoreID()], count) + add(gc.mu.globalCounter, count) + gc.mu.Unlock() + + return delta, waitDuration, nil +} + // GetActiveResourceGroup is used to get active resource group. // This is used for test only. func (c *ResourceGroupsController) GetActiveResourceGroup(resourceGroupName string) *rmpb.ResourceGroup { diff --git a/client/resource_group/controller/controller_test.go b/client/resource_group/controller/controller_test.go index 821364c292f..a59be4d5a2d 100644 --- a/client/resource_group/controller/controller_test.go +++ b/client/resource_group/controller/controller_test.go @@ -104,7 +104,7 @@ func TestRequestAndResponseConsumption(t *testing.T) { kvCalculator := gc.getKVCalculator() for idx, testCase := range testCases { caseNum := fmt.Sprintf("case %d", idx) - consumption, _, _, priority, err := gc.onRequestWait(context.TODO(), testCase.req) + consumption, _, _, priority, err := gc.onRequestWaitImpl(context.TODO(), testCase.req) re.NoError(err, caseNum) re.Equal(priority, gc.meta.Priority) expectedConsumption := &rmpb.Consumption{} @@ -112,7 +112,7 @@ func TestRequestAndResponseConsumption(t *testing.T) { kvCalculator.calculateWriteCost(expectedConsumption, testCase.req) re.Equal(expectedConsumption.WRU, consumption.WRU) } - consumption, err = gc.onResponse(testCase.req, testCase.resp) + consumption, err = gc.onResponseImpl(testCase.req, testCase.resp) re.NoError(err, caseNum) kvCalculator.calculateReadCost(expectedConsumption, testCase.resp) kvCalculator.calculateCPUCost(expectedConsumption, testCase.resp) @@ -121,6 +121,46 @@ func TestRequestAndResponseConsumption(t *testing.T) { } } +func TestOnResponseWaitConsumption(t *testing.T) { + re := require.New(t) + gc := createTestGroupCostController(re) + + req := &TestRequestInfo{ + isWrite: false, + } + resp := &TestResponseInfo{ + readBytes: 2000 * 64 * 1024, // 2000RU + succeed: true, + } + + consumption, waitTIme, err := gc.onResponseWaitImpl(context.TODO(), req, resp) + re.NoError(err) + re.Zero(waitTIme) + verify := func() { + expectedConsumption := &rmpb.Consumption{} + kvCalculator := gc.getKVCalculator() + kvCalculator.calculateReadCost(expectedConsumption, resp) + re.Equal(expectedConsumption.RRU, consumption.RRU) + } + verify() + + // modify the counter, then on response should has wait time. + counter := gc.run.requestUnitTokens[rmpb.RequestUnitType_RU] + gc.modifyTokenCounter(counter, &rmpb.TokenBucket{ + Settings: &rmpb.TokenLimitSettings{ + FillRate: 1000, + BurstLimit: 1000, + }, + }, + int64(5*time.Second/time.Millisecond), + ) + + consumption, waitTIme, err = gc.onResponseWaitImpl(context.TODO(), req, resp) + re.NoError(err) + re.NotZero(waitTIme) + verify() +} + func TestResourceGroupThrottledError(t *testing.T) { re := require.New(t) gc := createTestGroupCostController(re) @@ -129,7 +169,7 @@ func TestResourceGroupThrottledError(t *testing.T) { writeBytes: 10000000, } // The group is throttled - _, _, _, _, err := gc.onRequestWait(context.TODO(), req) + _, _, _, _, err := gc.onRequestWaitImpl(context.TODO(), req) re.Error(err) re.True(errs.ErrClientResourceGroupThrottled.Equal(err)) } diff --git a/client/resource_group/controller/limiter.go b/client/resource_group/controller/limiter.go index faa2bad927e..5d9823312ca 100644 --- a/client/resource_group/controller/limiter.go +++ b/client/resource_group/controller/limiter.go @@ -335,7 +335,7 @@ func (lim *Limiter) Reconfigure(now time.Time, ) { lim.mu.Lock() defer lim.mu.Unlock() - logControllerTrace("[resource group controller] before reconfigure", zap.String("name", lim.name), zap.Float64("old-tokens", lim.tokens), zap.Float64("old-rate", float64(lim.limit)), zap.Float64("old-notify-threshold", args.NotifyThreshold), zap.Int64("old-burst", lim.burst)) + logControllerTrace("[resource group controller] before reconfigure", zap.String("name", lim.name), zap.Float64("old-tokens", lim.tokens), zap.Float64("old-rate", float64(lim.limit)), zap.Float64("old-notify-threshold", lim.notifyThreshold), zap.Int64("old-burst", lim.burst)) if args.NewBurst < 0 { lim.last = now lim.tokens = args.NewTokens diff --git a/client/resource_group/controller/metrics.go b/client/resource_group/controller/metrics.go index 30a0b850c7d..0706210207f 100644 --- a/client/resource_group/controller/metrics.go +++ b/client/resource_group/controller/metrics.go @@ -63,6 +63,14 @@ var ( Help: "Counter of failed request.", }, []string{resourceGroupNameLabel, newResourceGroupNameLabel, errType}) + groupRunningKVRequestCounter = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: requestSubsystem, + Name: "running_kv_request", + Help: "Counter of running kv request.", + }, []string{newResourceGroupNameLabel}) + requestRetryCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: namespace, @@ -95,6 +103,14 @@ var ( Name: "low_token_notified", Help: "Counter of low token request.", }, []string{newResourceGroupNameLabel}) + tokenConsumedHistogram = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: tokenRequestSubsystem, + Name: "consume", + Buckets: []float64{.5, 1, 2, 5, 10, 15, 20, 40, 64, 128, 256, 512, 1024, 2048}, // 0 ~ 2048 + Help: "Bucketed histogram of token consume.", + }, []string{newResourceGroupNameLabel}) ) var ( @@ -108,8 +124,10 @@ func init() { prometheus.MustRegister(successfulRequestDuration) prometheus.MustRegister(failedRequestCounter) prometheus.MustRegister(failedLimitReserveDuration) + prometheus.MustRegister(groupRunningKVRequestCounter) prometheus.MustRegister(requestRetryCounter) prometheus.MustRegister(tokenRequestDuration) prometheus.MustRegister(resourceGroupTokenRequestCounter) prometheus.MustRegister(lowTokenRequestNotifyCounter) + prometheus.MustRegister(tokenConsumedHistogram) } diff --git a/cmd/pd-server/main.go b/cmd/pd-server/main.go index 553b93ed0ef..459b18605e9 100644 --- a/cmd/pd-server/main.go +++ b/cmd/pd-server/main.go @@ -94,6 +94,7 @@ func NewTSOServiceCommand() *cobra.Command { Short: "Run the TSO service", Run: tso.CreateServerWrapper, } + cmd.Flags().StringP("name", "", "", "human-readable name for this tso member") cmd.Flags().BoolP("version", "V", false, "print version information and exit") cmd.Flags().StringP("config", "", "", "config file") cmd.Flags().StringP("backend-endpoints", "", "", "url for etcd client") @@ -114,6 +115,7 @@ func NewSchedulingServiceCommand() *cobra.Command { Short: "Run the scheduling service", Run: scheduling.CreateServerWrapper, } + cmd.Flags().StringP("name", "", "", "human-readable name for this scheduling member") cmd.Flags().BoolP("version", "V", false, "print version information and exit") cmd.Flags().StringP("config", "", "", "config file") cmd.Flags().StringP("backend-endpoints", "", "", "url for etcd client") @@ -134,6 +136,7 @@ func NewResourceManagerServiceCommand() *cobra.Command { Short: "Run the resource manager service", Run: resource_manager.CreateServerWrapper, } + cmd.Flags().StringP("name", "", "", "human-readable name for this resource manager member") cmd.Flags().BoolP("version", "V", false, "print version information and exit") cmd.Flags().StringP("config", "", "", "config file") cmd.Flags().StringP("backend-endpoints", "", "", "url for etcd client") diff --git a/conf/config.toml b/conf/config.toml index 438d2c857a5..0c4acf5fd8c 100644 --- a/conf/config.toml +++ b/conf/config.toml @@ -111,9 +111,9 @@ [schedule] ## Controls the size limit of Region Merge. -# max-merge-region-size = 20 +# max-merge-region-size = 54 ## Specifies the upper limit of the Region Merge key. -# max-merge-region-keys = 200000 +# max-merge-region-keys = 540000 ## Controls the time interval between the split and merge operations on the same Region. # split-merge-interval = "1h" ## When PD fails to receive the heartbeat from a store after the specified period of time, diff --git a/pkg/core/region.go b/pkg/core/region.go index 244fef836f8..53268589c8a 100644 --- a/pkg/core/region.go +++ b/pkg/core/region.go @@ -45,7 +45,8 @@ import ( const ( randomRegionMaxRetry = 10 scanRegionLimit = 1000 - CollectFactor = 0.9 + // CollectFactor is the factor to collect the count of region. + CollectFactor = 0.9 ) // errRegionIsStale is error info for region is stale. @@ -721,7 +722,7 @@ func (r *RegionInfo) isRegionRecreated() bool { return r.GetRegionEpoch().GetVersion() == 1 && r.GetRegionEpoch().GetConfVer() == 1 && (len(r.GetStartKey()) != 0 || len(r.GetEndKey()) != 0) } -func (r *RegionInfo) Contains(key []byte) bool { +func (r *RegionInfo) contain(key []byte) bool { start, end := r.GetStartKey(), r.GetEndKey() return bytes.Compare(key, start) >= 0 && (len(end) == 0 || bytes.Compare(key, end) < 0) } @@ -2142,7 +2143,7 @@ func HexRegionKey(key []byte) []byte { // HexRegionKeyStr converts region key to hex format. Used for formatting region in // logs. func HexRegionKeyStr(key []byte) string { - return typeutil.BytesToString(HexRegionKey(key)) + return string(HexRegionKey(key)) } // RegionToHexMeta converts a region meta's keys to hex format. Used for formatting diff --git a/pkg/core/region_tree.go b/pkg/core/region_tree.go index 0be207d515d..12e2c5c8878 100644 --- a/pkg/core/region_tree.go +++ b/pkg/core/region_tree.go @@ -261,7 +261,7 @@ func (t *regionTree) find(item *regionItem) *regionItem { return false }) - if result == nil || !result.Contains(item.GetStartKey()) { + if result == nil || !result.contain(item.GetStartKey()) { return nil } @@ -370,7 +370,7 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { // we need to check if the previous item contains the key. if startIndex != 0 && startItem == nil { region = t.tree.GetAt(startIndex - 1).RegionInfo - if region.Contains(startKey) { + if region.contain(startKey) { startIndex-- } } diff --git a/pkg/core/region_tree_test.go b/pkg/core/region_tree_test.go index 2726b4fdab5..a2b1bfab7a7 100644 --- a/pkg/core/region_tree_test.go +++ b/pkg/core/region_tree_test.go @@ -102,15 +102,15 @@ func TestRegionItem(t *testing.T) { re.False(item.Less(newRegionItem([]byte("b"), []byte{}))) re.True(item.Less(newRegionItem([]byte("c"), []byte{}))) - re.False(item.Contains([]byte("a"))) - re.True(item.Contains([]byte("b"))) - re.True(item.Contains([]byte("c"))) + re.False(item.contain([]byte("a"))) + re.True(item.contain([]byte("b"))) + re.True(item.contain([]byte("c"))) item = newRegionItem([]byte("b"), []byte("d")) - re.False(item.Contains([]byte("a"))) - re.True(item.Contains([]byte("b"))) - re.True(item.Contains([]byte("c"))) - re.False(item.Contains([]byte("d"))) + re.False(item.contain([]byte("a"))) + re.True(item.contain([]byte("b"))) + re.True(item.contain([]byte("c"))) + re.False(item.contain([]byte("d"))) } func newRegionWithStat(start, end string, size, keys int64) *RegionInfo { diff --git a/pkg/dashboard/distroutil/distro.go b/pkg/dashboard/distroutil/distro.go index 9eb1a2eb31b..a19db806d70 100644 --- a/pkg/dashboard/distroutil/distro.go +++ b/pkg/dashboard/distroutil/distro.go @@ -16,7 +16,7 @@ package distroutil import ( "os" - "path" + "path/filepath" "github.com/pingcap/log" "github.com/pingcap/tidb-dashboard/util/distro" @@ -36,7 +36,7 @@ func MustGetResPath() string { log.Fatal("failed to read the execution path", zap.Error(err)) return "" } - return path.Join(path.Dir(exePath), resFolderName) + return filepath.Join(filepath.Dir(exePath), resFolderName) } // MustLoadAndReplaceStrings loads the distro strings from ${BinaryPath}/distro-res/strings.json @@ -44,7 +44,7 @@ func MustGetResPath() string { // distro string will be used. func MustLoadAndReplaceStrings() { resPath := MustGetResPath() - strings, err := distro.ReadResourceStringsFromFile(path.Join(resPath, stringsFileName)) + strings, err := distro.ReadResourceStringsFromFile(filepath.Join(resPath, stringsFileName)) if err != nil { log.Fatal("failed to load distro strings", zap.Error(err)) } diff --git a/pkg/encryption/crypter.go b/pkg/encryption/crypter.go index b1f8631ae26..7e69854c5a8 100644 --- a/pkg/encryption/crypter.go +++ b/pkg/encryption/crypter.go @@ -20,15 +20,15 @@ import ( "crypto/rand" "encoding/binary" "io" - "unsafe" "github.com/pingcap/kvproto/pkg/encryptionpb" "github.com/tikv/pd/pkg/errs" ) const ( - ivLengthCTR = 16 - ivLengthGCM = 12 + ivLengthCTR = 16 + ivLengthGCM = 12 + keyIDBufSize = 8 ) // CheckEncryptionMethodSupported check whether the encryption method is currently supported. @@ -106,7 +106,7 @@ func NewDataKey( if err != nil { return } - keyIDBufSize := unsafe.Sizeof(uint64(0)) + keyIDBuf := make([]byte, keyIDBufSize) n, err := io.ReadFull(rand.Reader, keyIDBuf) if err != nil { @@ -114,7 +114,7 @@ func NewDataKey( "fail to generate data key id") return } - if n != int(keyIDBufSize) { + if n != keyIDBufSize { err = errs.ErrEncryptionNewDataKey.GenWithStack( "no enough random bytes to generate data key id, bytes %d", n) return diff --git a/pkg/encryption/master_key_test.go b/pkg/encryption/master_key_test.go index 31962e9e99d..d6d7845284a 100644 --- a/pkg/encryption/master_key_test.go +++ b/pkg/encryption/master_key_test.go @@ -17,6 +17,7 @@ package encryption import ( "encoding/hex" "os" + "path/filepath" "testing" "github.com/pingcap/kvproto/pkg/encryptionpb" @@ -94,8 +95,7 @@ func TestNewFileMasterKeyMissingPath(t *testing.T) { func TestNewFileMasterKeyMissingFile(t *testing.T) { re := require.New(t) - dir := t.TempDir() - path := dir + "/key" + path := filepath.Join(t.TempDir(), "key") config := &encryptionpb.MasterKey{ Backend: &encryptionpb.MasterKey_File{ File: &encryptionpb.MasterKeyFile{ @@ -109,8 +109,7 @@ func TestNewFileMasterKeyMissingFile(t *testing.T) { func TestNewFileMasterKeyNotHexString(t *testing.T) { re := require.New(t) - dir := t.TempDir() - path := dir + "/key" + path := filepath.Join(t.TempDir(), "key") os.WriteFile(path, []byte("not-a-hex-string"), 0600) config := &encryptionpb.MasterKey{ Backend: &encryptionpb.MasterKey_File{ @@ -125,8 +124,7 @@ func TestNewFileMasterKeyNotHexString(t *testing.T) { func TestNewFileMasterKeyLengthMismatch(t *testing.T) { re := require.New(t) - dir := t.TempDir() - path := dir + "/key" + path := filepath.Join(t.TempDir(), "key") os.WriteFile(path, []byte("2f07ec61e5a50284f47f2b402a962ec6"), 0600) config := &encryptionpb.MasterKey{ Backend: &encryptionpb.MasterKey_File{ @@ -142,8 +140,7 @@ func TestNewFileMasterKeyLengthMismatch(t *testing.T) { func TestNewFileMasterKey(t *testing.T) { re := require.New(t) key := "2f07ec61e5a50284f47f2b402a962ec672e500b26cb3aa568bb1531300c74806" // #nosec G101 - dir := t.TempDir() - path := dir + "/key" + path := filepath.Join(t.TempDir(), "key") os.WriteFile(path, []byte(key), 0600) config := &encryptionpb.MasterKey{ Backend: &encryptionpb.MasterKey_File{ diff --git a/pkg/gctuner/finalizer_test.go b/pkg/gctuner/finalizer_test.go index 64cb308d931..9231ca633e5 100644 --- a/pkg/gctuner/finalizer_test.go +++ b/pkg/gctuner/finalizer_test.go @@ -12,14 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build enable_flaky_tests - package gctuner import ( "runtime" "sync/atomic" "testing" + "time" "github.com/stretchr/testify/require" ) @@ -29,7 +28,7 @@ type testState struct { } func TestFinalizer(t *testing.T) { - maxCount := int32(16) + maxCount := int32(8) state := &testState{} f := newFinalizer(func() { n := atomic.AddInt32(&state.count, 1) @@ -39,6 +38,7 @@ func TestFinalizer(t *testing.T) { }) for i := int32(1); i <= maxCount; i++ { runtime.GC() + time.Sleep(10 * time.Millisecond) require.Equal(t, i, atomic.LoadInt32(&state.count)) } require.Nil(t, f.ref) diff --git a/pkg/gctuner/memory_limit_tuner_test.go b/pkg/gctuner/memory_limit_tuner_test.go index f56a64a7326..5e5f84ccbac 100644 --- a/pkg/gctuner/memory_limit_tuner_test.go +++ b/pkg/gctuner/memory_limit_tuner_test.go @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build enable_flaky_tests - package gctuner import ( @@ -47,9 +45,9 @@ func (a *mockAllocator) freeAll() { } func TestGlobalMemoryTuner(t *testing.T) { - require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/util/gctuner/testMemoryLimitTuner", "return(true)")) + require.NoError(t, failpoint.Enable("github.com/tikv/pd/pkg/gctuner/testMemoryLimitTuner", "return(true)")) defer func() { - require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/util/gctuner/testMemoryLimitTuner")) + require.NoError(t, failpoint.Disable("github.com/tikv/pd/pkg/gctuner/testMemoryLimitTuner")) }() // Close GOGCTuner gogcTuner := EnableGOGCTuner.Load() @@ -61,11 +59,19 @@ func TestGlobalMemoryTuner(t *testing.T) { GlobalMemoryLimitTuner.UpdateMemoryLimit() require.True(t, GlobalMemoryLimitTuner.isTuning.Load()) defer func() { - time.Sleep(1 * time.Second) // If test.count > 1, wait tuning finished. - require.True(t, GlobalMemoryLimitTuner.isTuning.Load()) - // skip unstable test - // require.False(t, GlobalMemoryLimitTuner.waitingReset.Load()) - require.Equal(t, GlobalMemoryLimitTuner.nextGCTriggeredByMemoryLimit.Load(), false) + // If test.count > 1, wait tuning finished. + require.Eventually(t, func() bool { + //nolint: all_revive + return GlobalMemoryLimitTuner.isTuning.Load() + }, 5*time.Second, 100*time.Millisecond) + require.Eventually(t, func() bool { + //nolint: all_revive + return !GlobalMemoryLimitTuner.waitingReset.Load() + }, 5*time.Second, 100*time.Millisecond) + require.Eventually(t, func() bool { + //nolint: all_revive + return !GlobalMemoryLimitTuner.nextGCTriggeredByMemoryLimit.Load() + }, 5*time.Second, 100*time.Millisecond) }() allocator := &mockAllocator{} @@ -77,44 +83,43 @@ func TestGlobalMemoryTuner(t *testing.T) { } checkNextGCEqualMemoryLimit := func() { runtime.ReadMemStats(r) - // skip unstable test - // nextGC := r.NextGC - // memoryLimit := GlobalMemoryLimitTuner.calcMemoryLimit(GlobalMemoryLimitTuner.GetPercentage()) + nextGC := r.NextGC + memoryLimit := GlobalMemoryLimitTuner.calcMemoryLimit(GlobalMemoryLimitTuner.GetPercentage()) // In golang source, nextGC = memoryLimit - three parts memory. - // require.True(t, nextGC < uint64(memoryLimit)) + require.Less(t, nextGC, uint64(memoryLimit)) } memory600mb := allocator.alloc(600 << 20) gcNum := getNowGCNum() memory210mb := allocator.alloc(210 << 20) - time.Sleep(100 * time.Millisecond) - // skip unstable test - // require.True(t, GlobalMemoryLimitTuner.waitingReset.Load()) - require.True(t, gcNum < getNowGCNum()) + require.Eventually(t, func() bool { + return GlobalMemoryLimitTuner.waitingReset.Load() && gcNum < getNowGCNum() + }, 5*time.Second, 100*time.Millisecond) // Test waiting for reset - time.Sleep(500 * time.Millisecond) - require.Equal(t, GlobalMemoryLimitTuner.calcMemoryLimit(fallbackPercentage), debug.SetMemoryLimit(-1)) + require.Eventually(t, func() bool { + return GlobalMemoryLimitTuner.calcMemoryLimit(fallbackPercentage) == debug.SetMemoryLimit(-1) + }, 5*time.Second, 100*time.Millisecond) gcNum = getNowGCNum() memory100mb := allocator.alloc(100 << 20) - time.Sleep(100 * time.Millisecond) - require.Equal(t, gcNum, getNowGCNum()) // No GC + require.Eventually(t, func() bool { + return gcNum == getNowGCNum() + }, 5*time.Second, 100*time.Millisecond) // No GC allocator.free(memory210mb) allocator.free(memory100mb) runtime.GC() // Trigger GC in 80% again - time.Sleep(500 * time.Millisecond) - // skip unstable test - // require.Equal(t, GlobalMemoryLimitTuner.calcMemoryLimit(GlobalMemoryLimitTuner.GetPercentage()), debug.SetMemoryLimit(-1)) + require.Eventually(t, func() bool { + return GlobalMemoryLimitTuner.calcMemoryLimit(GlobalMemoryLimitTuner.GetPercentage()) == debug.SetMemoryLimit(-1) + }, 5*time.Second, 100*time.Millisecond) time.Sleep(100 * time.Millisecond) - // skip unstable test - // gcNum = getNowGCNum() + gcNum = getNowGCNum() checkNextGCEqualMemoryLimit() memory210mb = allocator.alloc(210 << 20) - time.Sleep(100 * time.Millisecond) - // skip unstable test - // require.True(t, gcNum < getNowGCNum()) + require.Eventually(t, func() bool { + return gcNum < getNowGCNum() + }, 5*time.Second, 100*time.Millisecond) allocator.free(memory210mb) allocator.free(memory600mb) } diff --git a/pkg/gctuner/tuner.go b/pkg/gctuner/tuner.go index 172c6adf326..74932fe174b 100644 --- a/pkg/gctuner/tuner.go +++ b/pkg/gctuner/tuner.go @@ -148,6 +148,10 @@ func (t *tuner) getGCPercent() uint32 { // tuning check the memory inuse and tune GC percent dynamically. // Go runtime ensure that it will be called serially. func (t *tuner) tuning() { + if !EnableGOGCTuner.Load() { + return + } + inuse := readMemoryInuse() threshold := t.getThreshold() log.Debug("tuning", zap.Uint64("inuse", inuse), zap.Uint64("threshold", threshold), diff --git a/pkg/gctuner/tuner_calc_test.go b/pkg/gctuner/tuner_calc_test.go deleted file mode 100644 index 473f5bda67d..00000000000 --- a/pkg/gctuner/tuner_calc_test.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2023 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package gctuner - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestCalcGCPercent(t *testing.T) { - const gb = 1024 * 1024 * 1024 - // use default value when invalid params - require.Equal(t, defaultGCPercent, calcGCPercent(0, 0)) - require.Equal(t, defaultGCPercent, calcGCPercent(0, 1)) - require.Equal(t, defaultGCPercent, calcGCPercent(1, 0)) - - require.Equal(t, maxGCPercent.Load(), calcGCPercent(1, 3*gb)) - require.Equal(t, maxGCPercent.Load(), calcGCPercent(gb/10, 4*gb)) - require.Equal(t, maxGCPercent.Load(), calcGCPercent(gb/2, 4*gb)) - require.Equal(t, uint32(300), calcGCPercent(1*gb, 4*gb)) - require.Equal(t, uint32(166), calcGCPercent(1.5*gb, 4*gb)) - require.Equal(t, uint32(100), calcGCPercent(2*gb, 4*gb)) - require.Equal(t, uint32(100), calcGCPercent(3*gb, 4*gb)) - require.Equal(t, minGCPercent.Load(), calcGCPercent(4*gb, 4*gb)) - require.Equal(t, minGCPercent.Load(), calcGCPercent(5*gb, 4*gb)) -} diff --git a/pkg/gctuner/tuner_test.go b/pkg/gctuner/tuner_test.go index 604cd449b35..7018634c5d1 100644 --- a/pkg/gctuner/tuner_test.go +++ b/pkg/gctuner/tuner_test.go @@ -4,7 +4,7 @@ // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // -// http://www.apache.org/licenses/LICENSE-2.0 +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build enable_flaky_tests - package gctuner import ( "runtime" "testing" + "time" + "github.com/docker/go-units" "github.com/stretchr/testify/require" ) @@ -27,7 +27,7 @@ var testHeap []byte func TestTuner(t *testing.T) { EnableGOGCTuner.Store(true) - memLimit := uint64(1000 * 1024 * 1024) // 1000 MB + memLimit := uint64(1000 * units.MiB) // 1000 MB threshold := memLimit / 2 tn := newTuner(threshold) require.Equal(t, threshold, tn.threshold.Load()) @@ -39,7 +39,8 @@ func TestTuner(t *testing.T) { runtime.GC() for i := 0; i < 100; i++ { runtime.GC() - require.Equal(t, maxGCPercent.Load(), tn.getGCPercent()) + require.Eventually(t, func() bool { return maxGCPercent.Load() == tn.getGCPercent() }, + 1*time.Second, 50*time.Microsecond) } // 1/4 threshold @@ -55,8 +56,10 @@ func TestTuner(t *testing.T) { runtime.GC() for i := 0; i < 100; i++ { runtime.GC() - require.GreaterOrEqual(t, tn.getGCPercent(), minGCPercent.Load()) - require.LessOrEqual(t, tn.getGCPercent(), maxGCPercent.Load()/2) + require.Eventually(t, func() bool { return tn.getGCPercent() >= minGCPercent.Load() }, + 1*time.Second, 50*time.Microsecond) + require.Eventually(t, func() bool { return tn.getGCPercent() <= maxGCPercent.Load()/2 }, + 1*time.Second, 50*time.Microsecond) } // 3/4 threshold @@ -64,7 +67,8 @@ func TestTuner(t *testing.T) { runtime.GC() for i := 0; i < 100; i++ { runtime.GC() - require.Equal(t, minGCPercent.Load(), tn.getGCPercent()) + require.Eventually(t, func() bool { return minGCPercent.Load() == tn.getGCPercent() }, + 1*time.Second, 50*time.Microsecond) } // out of threshold @@ -72,6 +76,25 @@ func TestTuner(t *testing.T) { runtime.GC() for i := 0; i < 100; i++ { runtime.GC() - require.Equal(t, minGCPercent.Load(), tn.getGCPercent()) + require.Eventually(t, func() bool { return minGCPercent.Load() == tn.getGCPercent() }, + 1*time.Second, 50*time.Microsecond) } } + +func TestCalcGCPercent(t *testing.T) { + const gb = units.GiB + // use default value when invalid params + require.Equal(t, defaultGCPercent, calcGCPercent(0, 0)) + require.Equal(t, defaultGCPercent, calcGCPercent(0, 1)) + require.Equal(t, defaultGCPercent, calcGCPercent(1, 0)) + + require.Equal(t, maxGCPercent.Load(), calcGCPercent(1, 3*gb)) + require.Equal(t, maxGCPercent.Load(), calcGCPercent(gb/10, 4*gb)) + require.Equal(t, maxGCPercent.Load(), calcGCPercent(gb/2, 4*gb)) + require.Equal(t, uint32(300), calcGCPercent(1*gb, 4*gb)) + require.Equal(t, uint32(166), calcGCPercent(1.5*gb, 4*gb)) + require.Equal(t, uint32(100), calcGCPercent(2*gb, 4*gb)) + require.Equal(t, uint32(100), calcGCPercent(3*gb, 4*gb)) + require.Equal(t, minGCPercent.Load(), calcGCPercent(4*gb, 4*gb)) + require.Equal(t, minGCPercent.Load(), calcGCPercent(5*gb, 4*gb)) +} diff --git a/pkg/gogc/gogc.go b/pkg/gogc/gogc.go index 110b596fb89..2d3039ca244 100644 --- a/pkg/gogc/gogc.go +++ b/pkg/gogc/gogc.go @@ -19,9 +19,6 @@ import ( "runtime/debug" "strconv" "sync/atomic" - - "github.com/pingcap/log" - "go.uber.org/zap" ) var gogcValue int64 @@ -39,9 +36,6 @@ func SetGOGC(val int) int { val = 100 } result := debug.SetGCPercent(val) - if result != val { - log.Info("debug.SetGCPercent", zap.Int("val", val), zap.Int("result", result)) - } atomic.StoreInt64(&gogcValue, int64(val)) return result } diff --git a/pkg/mcs/discovery/discover.go b/pkg/mcs/discovery/discover.go index 1ce5ecda51d..3e1d678cffb 100644 --- a/pkg/mcs/discovery/discover.go +++ b/pkg/mcs/discovery/discover.go @@ -45,14 +45,14 @@ func Discover(cli *clientv3.Client, clusterID, serviceName string) ([]string, er } // GetMSMembers returns all the members of the specified service name. -func GetMSMembers(name string, client *clientv3.Client) ([]ServiceRegistryEntry, error) { - switch name { +func GetMSMembers(serviceName string, client *clientv3.Client) ([]ServiceRegistryEntry, error) { + switch serviceName { case utils.TSOServiceName, utils.SchedulingServiceName, utils.ResourceManagerServiceName: clusterID, err := etcdutil.GetClusterID(client, utils.ClusterIDPath) if err != nil { return nil, err } - servicePath := ServicePath(strconv.FormatUint(clusterID, 10), name) + servicePath := ServicePath(strconv.FormatUint(clusterID, 10), serviceName) resps, err := kv.NewSlowLogTxn(client).Then(clientv3.OpGet(servicePath, clientv3.WithPrefix())).Commit() if err != nil { return nil, errs.ErrEtcdKVGet.Wrap(err).GenWithStackByCause() @@ -75,5 +75,5 @@ func GetMSMembers(name string, client *clientv3.Client) ([]ServiceRegistryEntry, return entries, nil } - return nil, errors.Errorf("unknown service name %s", name) + return nil, errors.Errorf("unknown service name %s", serviceName) } diff --git a/pkg/mcs/discovery/registry_entry.go b/pkg/mcs/discovery/registry_entry.go index bf11ae5c8a4..db4ac44a2cc 100644 --- a/pkg/mcs/discovery/registry_entry.go +++ b/pkg/mcs/discovery/registry_entry.go @@ -23,6 +23,9 @@ import ( // ServiceRegistryEntry is the registry entry of a service type ServiceRegistryEntry struct { + // The specific value will be assigned only if the startup parameter is added. + // If not assigned, the default value(service-hostname) will be used. + Name string `json:"name"` ServiceAddr string `json:"service-addr"` Version string `json:"version"` GitHash string `json:"git-hash"` diff --git a/pkg/mcs/resourcemanager/server/config.go b/pkg/mcs/resourcemanager/server/config.go index 2ccdfb05cc4..03fc6718926 100644 --- a/pkg/mcs/resourcemanager/server/config.go +++ b/pkg/mcs/resourcemanager/server/config.go @@ -202,6 +202,7 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error { } // Ignore the error check here + configutil.AdjustCommandLineString(flagSet, &c.Name, "name") configutil.AdjustCommandLineString(flagSet, &c.Log.Level, "log-level") configutil.AdjustCommandLineString(flagSet, &c.Log.File.Filename, "log-file") configutil.AdjustCommandLineString(flagSet, &c.Metric.PushAddress, "metrics-addr") diff --git a/pkg/mcs/resourcemanager/server/server.go b/pkg/mcs/resourcemanager/server/server.go index 708a11344d4..19317d8202a 100644 --- a/pkg/mcs/resourcemanager/server/server.go +++ b/pkg/mcs/resourcemanager/server/server.go @@ -339,7 +339,7 @@ func (s *Server) startServer() (err error) { s.startServerLoop() // Server has started. - entry := &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr} + entry := &discovery.ServiceRegistryEntry{ServiceAddr: s.cfg.AdvertiseListenAddr, Name: s.Name()} serializedEntry, err := entry.Serialize() if err != nil { return err diff --git a/pkg/mcs/resourcemanager/server/testutil.go b/pkg/mcs/resourcemanager/server/testutil.go index 0277e5e8a8f..3577301258c 100644 --- a/pkg/mcs/resourcemanager/server/testutil.go +++ b/pkg/mcs/resourcemanager/server/testutil.go @@ -49,16 +49,18 @@ func NewTestServer(ctx context.Context, re *require.Assertions, cfg *Config) (*S // GenerateConfig generates a new config with the given options. func GenerateConfig(c *Config) (*Config, error) { arguments := []string{ + "--name=" + c.Name, "--listen-addr=" + c.ListenAddr, "--advertise-listen-addr=" + c.AdvertiseListenAddr, "--backend-endpoints=" + c.BackendEndpoints, } flagSet := pflag.NewFlagSet("test", pflag.ContinueOnError) + flagSet.StringP("name", "", "", "human-readable name for this resource manager member") flagSet.BoolP("version", "V", false, "print version information and exit") flagSet.StringP("config", "", "", "config file") flagSet.StringP("backend-endpoints", "", "", "url for etcd client") - flagSet.StringP("listen-addr", "", "", "listen address for tso service") + flagSet.StringP("listen-addr", "", "", "listen address for resource manager service") flagSet.StringP("advertise-listen-addr", "", "", "advertise urls for listen address (default '${listen-addr}')") flagSet.StringP("cacert", "", "", "path of file that contains list of trusted TLS CAs") flagSet.StringP("cert", "", "", "path of file that contains X509 certificate in PEM format") diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index c86c739f724..955af4b9b4a 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -27,6 +27,7 @@ import ( "github.com/tikv/pd/pkg/schedule/scatter" "github.com/tikv/pd/pkg/schedule/schedulers" "github.com/tikv/pd/pkg/schedule/splitter" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/statistics/buckets" @@ -343,8 +344,9 @@ func (c *Cluster) updateScheduler() { // Remove the deleted schedulers. for _, name := range schedulersController.GetSchedulerNames() { scheduler := schedulersController.GetScheduler(name) + oldType := types.SchedulerTypeCompatibleMap[scheduler.GetType()] if slice.AnyOf(latestSchedulersConfig, func(i int) bool { - return latestSchedulersConfig[i].Type == scheduler.GetType() + return latestSchedulersConfig[i].Type == oldType }) { continue } diff --git a/pkg/mcs/scheduling/server/config/config.go b/pkg/mcs/scheduling/server/config/config.go index 2111aa3ddcc..c1fcad33ace 100644 --- a/pkg/mcs/scheduling/server/config/config.go +++ b/pkg/mcs/scheduling/server/config/config.go @@ -36,6 +36,7 @@ import ( "github.com/tikv/pd/pkg/core/storelimit" "github.com/tikv/pd/pkg/mcs/utils" sc "github.com/tikv/pd/pkg/schedule/config" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/configutil" @@ -104,6 +105,7 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error { } // Ignore the error check here + configutil.AdjustCommandLineString(flagSet, &c.Name, "name") configutil.AdjustCommandLineString(flagSet, &c.Log.Level, "log-level") configutil.AdjustCommandLineString(flagSet, &c.Log.File.Filename, "log-file") configutil.AdjustCommandLineString(flagSet, &c.Metric.PushAddress, "metrics-addr") @@ -646,10 +648,11 @@ func (o *PersistConfig) SetMaxReplicas(replicas int) { } // IsSchedulerDisabled returns if the scheduler is disabled. -func (o *PersistConfig) IsSchedulerDisabled(t string) bool { +func (o *PersistConfig) IsSchedulerDisabled(tp types.CheckerSchedulerType) bool { + oldType := types.SchedulerTypeCompatibleMap[tp] schedulers := o.GetScheduleConfig().Schedulers for _, s := range schedulers { - if t == s.Type { + if oldType == s.Type { return s.Disable } } @@ -739,11 +742,11 @@ func (o *PersistConfig) IsRaftKV2() bool { // AddSchedulerCfg adds the scheduler configurations. // This method is a no-op since we only use configurations derived from one-way synchronization from API server now. -func (*PersistConfig) AddSchedulerCfg(string, []string) {} +func (*PersistConfig) AddSchedulerCfg(types.CheckerSchedulerType, []string) {} // RemoveSchedulerCfg removes the scheduler configurations. // This method is a no-op since we only use configurations derived from one-way synchronization from API server now. -func (*PersistConfig) RemoveSchedulerCfg(string) {} +func (*PersistConfig) RemoveSchedulerCfg(types.CheckerSchedulerType) {} // CheckLabelProperty checks if the label property is satisfied. func (*PersistConfig) CheckLabelProperty(string, []*metapb.StoreLabel) bool { diff --git a/pkg/mcs/scheduling/server/grpc_service.go b/pkg/mcs/scheduling/server/grpc_service.go index 1459ccd3bac..7eb2554f7f2 100644 --- a/pkg/mcs/scheduling/server/grpc_service.go +++ b/pkg/mcs/scheduling/server/grpc_service.go @@ -51,6 +51,7 @@ var SetUpRestHandler = func(*Service) (http.Handler, apiutil.APIServiceGroup) { type dummyRestService struct{} +// ServeHTTP implements the http.Handler interface. func (dummyRestService) ServeHTTP(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusNotImplemented) w.Write([]byte("not implemented")) @@ -83,6 +84,7 @@ type heartbeatServer struct { closed int32 } +// Send implements the HeartbeatStream interface. func (s *heartbeatServer) Send(m core.RegionHeartbeatResponse) error { if atomic.LoadInt32(&s.closed) == 1 { return io.EOF @@ -106,7 +108,7 @@ func (s *heartbeatServer) Send(m core.RegionHeartbeatResponse) error { } } -func (s *heartbeatServer) Recv() (*schedulingpb.RegionHeartbeatRequest, error) { +func (s *heartbeatServer) recv() (*schedulingpb.RegionHeartbeatRequest, error) { if atomic.LoadInt32(&s.closed) == 1 { return nil, io.EOF } @@ -133,7 +135,7 @@ func (s *Service) RegionHeartbeat(stream schedulingpb.Scheduling_RegionHeartbeat }() for { - request, err := server.Recv() + request, err := server.recv() if err == io.EOF { return nil } @@ -322,7 +324,7 @@ func (s *Service) AskBatchSplit(_ context.Context, request *schedulingpb.AskBatc // If region splits during the scheduling process, regions with abnormal // status may be left, and these regions need to be checked with higher // priority. - c.GetCoordinator().GetCheckerController().AddPendingProcessedRegions(recordRegions...) + c.GetCoordinator().GetCheckerController().AddPendingProcessedRegions(false, recordRegions...) return &schedulingpb.AskBatchSplitResponse{ Header: s.header(), diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 8eb9e49d964..50936325f45 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -427,6 +427,7 @@ func (s *Server) startServer() (err error) { GitHash: versioninfo.PDGitHash, DeployPath: deployPath, StartTimestamp: s.StartTimestamp(), + Name: s.Name(), } uniqueName := s.cfg.GetAdvertiseListenAddr() uniqueID := memberutil.GenerateUniqueID(uniqueName) diff --git a/pkg/mcs/server/server.go b/pkg/mcs/server/server.go index 6aec799278c..d30d3337a46 100644 --- a/pkg/mcs/server/server.go +++ b/pkg/mcs/server/server.go @@ -95,8 +95,8 @@ func (bs *BaseServer) GetHTTPClient() *http.Client { return bs.httpClient } -// SetETCDClient sets the etcd client. -func (bs *BaseServer) SetETCDClient(etcdClient *clientv3.Client) { +// SetEtcdClient sets the etcd client. +func (bs *BaseServer) SetEtcdClient(etcdClient *clientv3.Client) { bs.etcdClient = etcdClient } diff --git a/pkg/mcs/tso/server/config.go b/pkg/mcs/tso/server/config.go index 8cfef98ebaf..82ac777ad06 100644 --- a/pkg/mcs/tso/server/config.go +++ b/pkg/mcs/tso/server/config.go @@ -167,6 +167,7 @@ func (c *Config) Parse(flagSet *pflag.FlagSet) error { } // Ignore the error check here + configutil.AdjustCommandLineString(flagSet, &c.Name, "name") configutil.AdjustCommandLineString(flagSet, &c.Log.Level, "log-level") configutil.AdjustCommandLineString(flagSet, &c.Log.File.Filename, "log-file") configutil.AdjustCommandLineString(flagSet, &c.Metric.PushAddress, "metrics-addr") diff --git a/pkg/mcs/tso/server/server.go b/pkg/mcs/tso/server/server.go index 60ce2917ed5..a120cbc9868 100644 --- a/pkg/mcs/tso/server/server.go +++ b/pkg/mcs/tso/server/server.go @@ -382,6 +382,7 @@ func (s *Server) startServer() (err error) { GitHash: versioninfo.PDGitHash, DeployPath: deployPath, StartTimestamp: s.StartTimestamp(), + Name: s.Name(), } s.keyspaceGroupManager = tso.NewKeyspaceGroupManager( s.serverLoopCtx, s.serviceID, s.GetClient(), s.GetHTTPClient(), s.cfg.AdvertiseListenAddr, diff --git a/pkg/mcs/tso/server/testutil.go b/pkg/mcs/tso/server/testutil.go index cf5d45e7754..5dcfd4759b9 100644 --- a/pkg/mcs/tso/server/testutil.go +++ b/pkg/mcs/tso/server/testutil.go @@ -34,12 +34,14 @@ func MustNewGrpcClient(re *require.Assertions, addr string) (*grpc.ClientConn, t // GenerateConfig generates a new config with the given options. func GenerateConfig(c *Config) (*Config, error) { arguments := []string{ + "--name=" + c.Name, "--listen-addr=" + c.ListenAddr, "--advertise-listen-addr=" + c.AdvertiseListenAddr, "--backend-endpoints=" + c.BackendEndpoints, } flagSet := pflag.NewFlagSet("test", pflag.ContinueOnError) + flagSet.StringP("name", "", "", "human-readable name for this tso member") flagSet.BoolP("version", "V", false, "print version information and exit") flagSet.StringP("config", "", "", "config file") flagSet.StringP("backend-endpoints", "", "", "url for etcd client") diff --git a/pkg/mcs/utils/util.go b/pkg/mcs/utils/util.go index 844cf17fde4..55e6c54e9b9 100644 --- a/pkg/mcs/utils/util.go +++ b/pkg/mcs/utils/util.go @@ -107,7 +107,7 @@ type server interface { GetGRPCServer() *grpc.Server SetGRPCServer(*grpc.Server) SetHTTPServer(*http.Server) - SetETCDClient(*clientv3.Client) + SetEtcdClient(*clientv3.Client) SetHTTPClient(*http.Client) IsSecure() bool RegisterGRPCService(*grpc.Server) @@ -183,7 +183,7 @@ func InitClient(s server) error { if err != nil { return err } - s.SetETCDClient(etcdClient) + s.SetEtcdClient(etcdClient) s.SetHTTPClient(etcdutil.CreateHTTPClient(tlsConfig)) return nil } diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index cb1efeb544b..bbd4fbb6811 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -845,7 +845,7 @@ func (mc *Cluster) SetStoreLabel(storeID uint64, labels map[string]string) { } // AddPendingProcessedRegions mock method -func (mc *Cluster) AddPendingProcessedRegions(ids ...uint64) { +func (mc *Cluster) AddPendingProcessedRegions(_ bool, ids ...uint64) { for _, id := range ids { mc.pendingProcessedRegions[id] = struct{}{} } diff --git a/pkg/ratelimit/runner.go b/pkg/ratelimit/runner.go index 4b1b51f1768..1d65ff6a568 100644 --- a/pkg/ratelimit/runner.go +++ b/pkg/ratelimit/runner.go @@ -65,6 +65,7 @@ type taskID struct { name string } +// ConcurrentRunner is a task runner that limits the number of concurrent tasks. type ConcurrentRunner struct { ctx context.Context cancel context.CancelFunc diff --git a/pkg/schedule/checker/checker_controller.go b/pkg/schedule/checker/checker_controller.go index f772219558b..200ab388e30 100644 --- a/pkg/schedule/checker/checker_controller.go +++ b/pkg/schedule/checker/checker_controller.go @@ -17,6 +17,7 @@ package checker import ( "bytes" "context" + "strconv" "time" "github.com/pingcap/failpoint" @@ -35,12 +36,16 @@ import ( ) const ( + suspectRegionLimit = 1024 checkSuspectRangesInterval = 100 * time.Millisecond // DefaultPendingRegionCacheSize is the default length of waiting list. DefaultPendingRegionCacheSize = 100000 - // It takes about 1.3 minutes(1000000/128*10/60/1000) to iterate 1 million regions(with DefaultPatrolRegionInterval=10ms). - patrolScanRegionLimit = 128 - suspectRegionLimit = 1024 + // For 1,024,000 regions, patrolRegionScanLimit is 1000, which is max(MinPatrolRegionScanLimit, 1,024,000/patrolRegionPartition) + // In order to avoid the patrolRegionScanLimit to be too big or too small, it will be limited to [128,8192]. + // It takes about 10s to iterate 1,024,000 regions(with DefaultPatrolRegionInterval=10ms) where other steps are not considered. + MinPatrolRegionScanLimit = 128 + MaxPatrolScanRegionLimit = 8192 + patrolRegionPartition = 1024 ) var ( @@ -63,7 +68,7 @@ type Controller struct { mergeChecker *MergeChecker jointStateChecker *JointStateChecker priorityInspector *PriorityInspector - pendingProcessedRegions cache.Cache + pendingProcessedRegions *cache.TTLUint64 suspectKeyRanges *cache.TTLString // suspect key-range regions that may need fix // duration is the duration of the last patrol round. @@ -76,11 +81,14 @@ type Controller struct { // It's used to update the ticker, so we need to // record it to avoid updating the ticker frequently. interval time.Duration + // patrolRegionScanLimit is the limit of regions to scan. + // It is calculated by the number of regions. + patrolRegionScanLimit int } // NewController create a new Controller. func NewController(ctx context.Context, cluster sche.CheckerCluster, conf config.CheckerConfigProvider, ruleManager *placement.RuleManager, labeler *labeler.RegionLabeler, opController *operator.Controller) *Controller { - pendingProcessedRegions := cache.NewDefaultCache(DefaultPendingRegionCacheSize) + pendingProcessedRegions := cache.NewIDTTL(ctx, time.Minute, 3*time.Minute) return &Controller{ ctx: ctx, cluster: cluster, @@ -96,6 +104,7 @@ func NewController(ctx context.Context, cluster sche.CheckerCluster, conf config pendingProcessedRegions: pendingProcessedRegions, suspectKeyRanges: cache.NewStringTTL(ctx, time.Minute, 3*time.Minute), interval: cluster.GetCheckerConfig().GetPatrolRegionInterval(), + patrolRegionScanLimit: calculateScanLimit(cluster), } } @@ -113,36 +122,40 @@ func (c *Controller) PatrolRegions() { select { case <-ticker.C: c.updateTickerIfNeeded(ticker) + if c.cluster.IsSchedulingHalted() { + log.Debug("skip patrol regions due to scheduling is halted") + continue + } + + // Check priority regions first. + c.checkPriorityRegions() + // Check pending processed regions first. + c.checkPendingProcessedRegions() + + key, regions = c.checkRegions(key) + if len(regions) == 0 { + continue + } + // Updates the label level isolation statistics. + c.cluster.UpdateRegionsLabelLevelStats(regions) + // When the key is nil, it means that the scan is finished. + if len(key) == 0 { + // update the scan limit. + c.patrolRegionScanLimit = calculateScanLimit(c.cluster) + // update the metrics. + dur := time.Since(start) + patrolCheckRegionsGauge.Set(dur.Seconds()) + c.setPatrolRegionsDuration(dur) + start = time.Now() + } + failpoint.Inject("breakPatrol", func() { + failpoint.Return() + }) case <-c.ctx.Done(): patrolCheckRegionsGauge.Set(0) c.setPatrolRegionsDuration(0) return } - if c.cluster.IsSchedulingHalted() { - continue - } - - // Check priority regions first. - c.checkPriorityRegions() - // Check pending processed regions first. - c.checkPendingProcessedRegions() - - key, regions = c.checkRegions(key) - if len(regions) == 0 { - continue - } - // Updates the label level isolation statistics. - c.cluster.UpdateRegionsLabelLevelStats(regions) - // When the key is nil, it means that the scan is finished. - if len(key) == 0 { - dur := time.Since(start) - patrolCheckRegionsGauge.Set(dur.Seconds()) - c.setPatrolRegionsDuration(dur) - start = time.Now() - } - failpoint.Inject("breakPatrol", func() { - failpoint.Break() - }) } } @@ -160,7 +173,7 @@ func (c *Controller) setPatrolRegionsDuration(dur time.Duration) { } func (c *Controller) checkRegions(startKey []byte) (key []byte, regions []*core.RegionInfo) { - regions = c.cluster.ScanRegions(startKey, nil, patrolScanRegionLimit) + regions = c.cluster.ScanRegions(startKey, nil, c.patrolRegionScanLimit) if len(regions) == 0 { // Resets the scan key. key = nil @@ -241,7 +254,7 @@ func (c *Controller) CheckRegion(region *core.RegionInfo) []*operator.Operator { if opController.OperatorCount(operator.OpReplica) < c.conf.GetReplicaScheduleLimit() { return []*operator.Operator{op} } - operator.OperatorLimitCounter.WithLabelValues(c.ruleChecker.Name(), operator.OpReplica.String()).Inc() + operator.IncOperatorLimitCounter(c.ruleChecker.GetType(), operator.OpReplica) c.pendingProcessedRegions.Put(region.GetID(), nil) } } @@ -253,7 +266,7 @@ func (c *Controller) CheckRegion(region *core.RegionInfo) []*operator.Operator { if opController.OperatorCount(operator.OpReplica) < c.conf.GetReplicaScheduleLimit() { return []*operator.Operator{op} } - operator.OperatorLimitCounter.WithLabelValues(c.replicaChecker.Name(), operator.OpReplica.String()).Inc() + operator.IncOperatorLimitCounter(c.replicaChecker.GetType(), operator.OpReplica) c.pendingProcessedRegions.Put(region.GetID(), nil) } } @@ -270,7 +283,7 @@ func (c *Controller) CheckRegion(region *core.RegionInfo) []*operator.Operator { if c.mergeChecker != nil { allowed := opController.OperatorCount(operator.OpMerge) < c.conf.GetMergeScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(c.mergeChecker.GetType(), operator.OpMerge.String()).Inc() + operator.IncOperatorLimitCounter(c.mergeChecker.GetType(), operator.OpMerge) } else if ops := c.mergeChecker.Check(region); ops != nil { // It makes sure that two operators can be added successfully altogether. return ops @@ -298,7 +311,7 @@ func (c *Controller) tryAddOperators(region *core.RegionInfo) { c.opController.AddWaitingOperator(ops...) c.RemovePendingProcessedRegion(id) } else { - c.AddPendingProcessedRegions(id) + c.AddPendingProcessedRegions(true, id) } } @@ -314,16 +327,15 @@ func (c *Controller) GetRuleChecker() *RuleChecker { // GetPendingProcessedRegions returns the pending processed regions in the cache. func (c *Controller) GetPendingProcessedRegions() []uint64 { - pendingRegions := make([]uint64, 0) - for _, item := range c.pendingProcessedRegions.Elems() { - pendingRegions = append(pendingRegions, item.Key) - } - return pendingRegions + return c.pendingProcessedRegions.GetAllID() } // AddPendingProcessedRegions adds the pending processed region into the cache. -func (c *Controller) AddPendingProcessedRegions(ids ...uint64) { +func (c *Controller) AddPendingProcessedRegions(needCheckLen bool, ids ...uint64) { for _, id := range ids { + if needCheckLen && c.pendingProcessedRegions.Len() > DefaultPendingRegionCacheSize { + return + } c.pendingProcessedRegions.Put(id, nil) } } @@ -372,7 +384,7 @@ func (c *Controller) CheckSuspectRanges() { if lastRegion.GetEndKey() != nil && bytes.Compare(lastRegion.GetEndKey(), keyRange[1]) < 0 { c.AddSuspectKeyRange(lastRegion.GetEndKey(), keyRange[1]) } - c.AddPendingProcessedRegions(regionIDList...) + c.AddPendingProcessedRegions(false, regionIDList...) } } } @@ -439,3 +451,19 @@ func (c *Controller) updateTickerIfNeeded(ticker *time.Ticker) { log.Info("checkers starts patrol regions with new interval", zap.Duration("interval", newInterval)) } } + +// GetPatrolRegionScanLimit returns the limit of regions to scan. +// It only used for test. +func (c *Controller) GetPatrolRegionScanLimit() int { + return c.patrolRegionScanLimit +} + +func calculateScanLimit(cluster sche.CheckerCluster) int { + regionCount := cluster.GetTotalRegionCount() + failpoint.Inject("regionCount", func(val failpoint.Value) { + c, _ := strconv.ParseInt(val.(string), 10, 64) + regionCount = int(c) + }) + scanlimit := max(MinPatrolRegionScanLimit, regionCount/patrolRegionPartition) + return min(scanlimit, MaxPatrolScanRegionLimit) +} diff --git a/pkg/schedule/checker/merge_checker.go b/pkg/schedule/checker/merge_checker.go index 1a7548a1084..65189d35c1d 100644 --- a/pkg/schedule/checker/merge_checker.go +++ b/pkg/schedule/checker/merge_checker.go @@ -31,6 +31,7 @@ import ( "github.com/tikv/pd/pkg/schedule/labeler" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/utils/logutil" ) @@ -69,8 +70,8 @@ func NewMergeChecker(ctx context.Context, cluster sche.CheckerCluster, conf conf } // GetType return MergeChecker's type -func (*MergeChecker) GetType() string { - return "merge-checker" +func (*MergeChecker) GetType() types.CheckerSchedulerType { + return types.MergeChecker } // RecordRegionSplit put the recently split region into cache. MergeChecker diff --git a/pkg/schedule/checker/replica_checker.go b/pkg/schedule/checker/replica_checker.go index f75ffe7e882..b0c42e88258 100644 --- a/pkg/schedule/checker/replica_checker.go +++ b/pkg/schedule/checker/replica_checker.go @@ -44,11 +44,11 @@ type ReplicaChecker struct { PauseController cluster sche.CheckerCluster conf config.CheckerConfigProvider - pendingProcessedRegions cache.Cache + pendingProcessedRegions *cache.TTLUint64 } // NewReplicaChecker creates a replica checker. -func NewReplicaChecker(cluster sche.CheckerCluster, conf config.CheckerConfigProvider, pendingProcessedRegions cache.Cache) *ReplicaChecker { +func NewReplicaChecker(cluster sche.CheckerCluster, conf config.CheckerConfigProvider, pendingProcessedRegions *cache.TTLUint64) *ReplicaChecker { return &ReplicaChecker{ cluster: cluster, conf: conf, @@ -61,6 +61,11 @@ func (*ReplicaChecker) Name() string { return types.ReplicaChecker.String() } +// GetType return ReplicaChecker's type. +func (*ReplicaChecker) GetType() types.CheckerSchedulerType { + return types.ReplicaChecker +} + // Check verifies a region's replicas, creating an operator.Operator if need. func (r *ReplicaChecker) Check(region *core.RegionInfo) *operator.Operator { replicaCheckerCounter.Inc() diff --git a/pkg/schedule/checker/replica_checker_test.go b/pkg/schedule/checker/replica_checker_test.go index a9139ee9804..da04fb6d768 100644 --- a/pkg/schedule/checker/replica_checker_test.go +++ b/pkg/schedule/checker/replica_checker_test.go @@ -51,7 +51,7 @@ func (suite *replicaCheckerTestSuite) SetupTest() { suite.ctx, suite.cancel = context.WithCancel(context.Background()) suite.cluster = mockcluster.NewCluster(suite.ctx, cfg) suite.cluster.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - suite.rc = NewReplicaChecker(suite.cluster, suite.cluster.GetCheckerConfig(), cache.NewDefaultCache(10)) + suite.rc = NewReplicaChecker(suite.cluster, suite.cluster.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) stats := &pdpb.StoreStats{ Capacity: 100, Available: 100, @@ -213,7 +213,7 @@ func (suite *replicaCheckerTestSuite) TestBasic() { tc := mockcluster.NewCluster(suite.ctx, opt) tc.SetMaxSnapshotCount(2) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) // Add stores 1,2,3,4. tc.AddRegionStore(1, 4) @@ -290,7 +290,7 @@ func (suite *replicaCheckerTestSuite) TestLostStore() { tc.AddRegionStore(1, 1) tc.AddRegionStore(2, 1) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) // now region peer in store 1,2,3.but we just have store 1,2 // This happens only in recovering the PD tc @@ -309,7 +309,7 @@ func (suite *replicaCheckerTestSuite) TestOffline() { tc.SetMaxReplicas(3) tc.SetLocationLabels([]string{"zone", "rack", "host"}) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) tc.AddLabelsStore(1, 1, map[string]string{"zone": "z1", "rack": "r1", "host": "h1"}) tc.AddLabelsStore(2, 2, map[string]string{"zone": "z2", "rack": "r1", "host": "h1"}) tc.AddLabelsStore(3, 3, map[string]string{"zone": "z3", "rack": "r1", "host": "h1"}) @@ -361,7 +361,7 @@ func (suite *replicaCheckerTestSuite) TestDistinctScore() { tc.SetMaxReplicas(3) tc.SetLocationLabels([]string{"zone", "rack", "host"}) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) tc.AddLabelsStore(1, 9, map[string]string{"zone": "z1", "rack": "r1", "host": "h1"}) tc.AddLabelsStore(2, 8, map[string]string{"zone": "z1", "rack": "r1", "host": "h1"}) @@ -441,7 +441,7 @@ func (suite *replicaCheckerTestSuite) TestDistinctScore2() { tc.SetMaxReplicas(5) tc.SetLocationLabels([]string{"zone", "host"}) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) tc.AddLabelsStore(1, 1, map[string]string{"zone": "z1", "host": "h1"}) tc.AddLabelsStore(2, 1, map[string]string{"zone": "z1", "host": "h2"}) @@ -470,7 +470,7 @@ func (suite *replicaCheckerTestSuite) TestStorageThreshold() { tc := mockcluster.NewCluster(suite.ctx, opt) tc.SetLocationLabels([]string{"zone"}) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) tc.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) tc.UpdateStorageRatio(1, 0.5, 0.5) @@ -506,7 +506,7 @@ func (suite *replicaCheckerTestSuite) TestOpts() { opt := mockconfig.NewTestOptions() tc := mockcluster.NewCluster(suite.ctx, opt) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) tc.AddRegionStore(1, 100) tc.AddRegionStore(2, 100) @@ -539,7 +539,7 @@ func (suite *replicaCheckerTestSuite) TestFixDownPeer() { tc := mockcluster.NewCluster(suite.ctx, opt) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.SetLocationLabels([]string{"zone"}) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) tc.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) tc.AddLabelsStore(2, 1, map[string]string{"zone": "z1"}) @@ -571,7 +571,7 @@ func (suite *replicaCheckerTestSuite) TestFixOfflinePeer() { tc := mockcluster.NewCluster(suite.ctx, opt) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.SetLocationLabels([]string{"zone"}) - rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewDefaultCache(10)) + rc := NewReplicaChecker(tc, tc.GetCheckerConfig(), cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) tc.AddLabelsStore(1, 1, map[string]string{"zone": "z1"}) tc.AddLabelsStore(2, 1, map[string]string{"zone": "z1"}) diff --git a/pkg/schedule/checker/rule_checker.go b/pkg/schedule/checker/rule_checker.go index a90de0a58d4..e29cd2bc05b 100644 --- a/pkg/schedule/checker/rule_checker.go +++ b/pkg/schedule/checker/rule_checker.go @@ -52,14 +52,14 @@ type RuleChecker struct { PauseController cluster sche.CheckerCluster ruleManager *placement.RuleManager - pendingProcessedRegions cache.Cache + pendingProcessedRegions *cache.TTLUint64 pendingList cache.Cache switchWitnessCache *cache.TTLUint64 record *recorder } // NewRuleChecker creates a checker instance. -func NewRuleChecker(ctx context.Context, cluster sche.CheckerCluster, ruleManager *placement.RuleManager, pendingProcessedRegions cache.Cache) *RuleChecker { +func NewRuleChecker(ctx context.Context, cluster sche.CheckerCluster, ruleManager *placement.RuleManager, pendingProcessedRegions *cache.TTLUint64) *RuleChecker { return &RuleChecker{ cluster: cluster, ruleManager: ruleManager, @@ -75,6 +75,11 @@ func (*RuleChecker) Name() string { return types.RuleChecker.String() } +// GetType returns RuleChecker's type. +func (*RuleChecker) GetType() types.CheckerSchedulerType { + return types.RuleChecker +} + // Check checks if the region matches placement rules and returns Operator to // fix it. func (c *RuleChecker) Check(region *core.RegionInfo) *operator.Operator { diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index f99208a988b..b24a95e2ade 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -62,7 +62,7 @@ func (suite *ruleCheckerTestSuite) SetupTest() { suite.cluster.SetEnableWitness(true) suite.cluster.SetEnableUseJointConsensus(false) suite.ruleManager = suite.cluster.RuleManager - suite.rc = NewRuleChecker(suite.ctx, suite.cluster, suite.ruleManager, cache.NewDefaultCache(10)) + suite.rc = NewRuleChecker(suite.ctx, suite.cluster, suite.ruleManager, cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) } func (suite *ruleCheckerTestSuite) TearDownTest() { @@ -1955,7 +1955,7 @@ func (suite *ruleCheckerTestAdvancedSuite) SetupTest() { suite.cluster.SetEnableWitness(true) suite.cluster.SetEnableUseJointConsensus(true) suite.ruleManager = suite.cluster.RuleManager - suite.rc = NewRuleChecker(suite.ctx, suite.cluster, suite.ruleManager, cache.NewDefaultCache(10)) + suite.rc = NewRuleChecker(suite.ctx, suite.cluster, suite.ruleManager, cache.NewIDTTL(suite.ctx, time.Minute, 3*time.Minute)) } func (suite *ruleCheckerTestAdvancedSuite) TearDownTest() { diff --git a/pkg/schedule/config/config.go b/pkg/schedule/config/config.go index 5a67a547483..d35f7ac6383 100644 --- a/pkg/schedule/config/config.go +++ b/pkg/schedule/config/config.go @@ -27,10 +27,13 @@ import ( const ( // DefaultMaxReplicas is the default number of replicas for each region. - DefaultMaxReplicas = 3 - defaultMaxSnapshotCount = 64 - defaultMaxPendingPeerCount = 64 - defaultMaxMergeRegionSize = 20 + DefaultMaxReplicas = 3 + defaultMaxSnapshotCount = 64 + defaultMaxPendingPeerCount = 64 + // defaultMaxMergeRegionSize is the default maximum size of region when regions can be merged. + // After https://github.com/tikv/tikv/issues/17309, the default value is enlarged from 20 to 54, + // to make it compatible with the default value of region size of tikv. + defaultMaxMergeRegionSize = 54 defaultLeaderScheduleLimit = 4 defaultRegionScheduleLimit = 2048 defaultWitnessScheduleLimit = 4 diff --git a/pkg/schedule/config/config_provider.go b/pkg/schedule/config/config_provider.go index 90e489f86f3..51ade0edb77 100644 --- a/pkg/schedule/config/config_provider.go +++ b/pkg/schedule/config/config_provider.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/core/storelimit" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage/endpoint" ) @@ -49,9 +50,9 @@ type SchedulerConfigProvider interface { SetSchedulingAllowanceStatus(bool, string) GetStoresLimit() map[uint64]StoreLimitConfig - IsSchedulerDisabled(string) bool - AddSchedulerCfg(string, []string) - RemoveSchedulerCfg(string) + IsSchedulerDisabled(types.CheckerSchedulerType) bool + AddSchedulerCfg(types.CheckerSchedulerType, []string) + RemoveSchedulerCfg(types.CheckerSchedulerType) Persist(endpoint.ConfigStorage) error GetRegionScheduleLimit() uint64 diff --git a/pkg/schedule/filter/filters.go b/pkg/schedule/filter/filters.go index 6c5dd748d17..e2846e6c9a6 100644 --- a/pkg/schedule/filter/filters.go +++ b/pkg/schedule/filter/filters.go @@ -65,7 +65,7 @@ func SelectUnavailableTargetStores(stores []*core.StoreInfo, filters []Filter, c cfilter, ok := filters[i].(comparingFilter) sourceID := uint64(0) if ok { - sourceID = cfilter.GetSourceStoreID() + sourceID = cfilter.getSourceStoreID() } if counter != nil { counter.inc(target, filters[i].Type(), sourceID, s.GetID()) @@ -99,7 +99,7 @@ func SelectTargetStores(stores []*core.StoreInfo, filters []Filter, conf config. cfilter, ok := filter.(comparingFilter) sourceID := uint64(0) if ok { - sourceID = cfilter.GetSourceStoreID() + sourceID = cfilter.getSourceStoreID() } if counter != nil { counter.inc(target, filter.Type(), sourceID, s.GetID()) @@ -141,8 +141,8 @@ type Filter interface { // comparingFilter is an interface to filter target store by comparing source and target stores type comparingFilter interface { Filter - // GetSourceStoreID returns the source store when comparing. - GetSourceStoreID() uint64 + // getSourceStoreID returns the source store when comparing. + getSourceStoreID() uint64 } // Target checks if store can pass all Filters as target store. @@ -156,7 +156,7 @@ func Target(conf config.SharedConfigProvider, store *core.StoreInfo, filters []F targetID := storeID sourceID := "" if ok { - sourceID = strconv.FormatUint(cfilter.GetSourceStoreID(), 10) + sourceID = strconv.FormatUint(cfilter.getSourceStoreID(), 10) } filterCounter.WithLabelValues(target.String(), filter.Scope(), filter.Type().String(), sourceID, targetID).Inc() } @@ -319,8 +319,8 @@ func (f *distinctScoreFilter) Target(_ config.SharedConfigProvider, store *core. return statusStoreNotMatchIsolation } -// GetSourceStoreID implements the ComparingFilter -func (f *distinctScoreFilter) GetSourceStoreID() uint64 { +// getSourceStoreID implements the ComparingFilter +func (f *distinctScoreFilter) getSourceStoreID() uint64 { return f.srcStore } @@ -669,8 +669,8 @@ func (f *ruleFitFilter) Target(_ config.SharedConfigProvider, store *core.StoreI return statusStoreNotMatchRule } -// GetSourceStoreID implements the ComparingFilter -func (f *ruleFitFilter) GetSourceStoreID() uint64 { +// getSourceStoreID implements the ComparingFilter +func (f *ruleFitFilter) getSourceStoreID() uint64 { return f.srcStore } @@ -730,7 +730,7 @@ func (f *ruleLeaderFitFilter) Target(_ config.SharedConfigProvider, store *core. return statusStoreNotMatchRule } -func (f *ruleLeaderFitFilter) GetSourceStoreID() uint64 { +func (f *ruleLeaderFitFilter) getSourceStoreID() uint64 { return f.srcLeaderStoreID } diff --git a/pkg/schedule/filter/region_filters.go b/pkg/schedule/filter/region_filters.go index e233ec75973..dca15dbf8ed 100644 --- a/pkg/schedule/filter/region_filters.go +++ b/pkg/schedule/filter/region_filters.go @@ -142,6 +142,7 @@ func NewRegionEmptyFilter(cluster sche.SharedCluster) RegionFilter { return ®ionEmptyFilter{cluster: cluster} } +// Select implements the RegionFilter interface. func (f *regionEmptyFilter) Select(region *core.RegionInfo) *plan.Status { if !isEmptyRegionAllowBalance(f.cluster, region) { return statusRegionEmpty @@ -163,6 +164,7 @@ func NewRegionWitnessFilter(storeID uint64) RegionFilter { return ®ionWitnessFilter{storeID: storeID} } +// Select implements the RegionFilter interface. func (f *regionWitnessFilter) Select(region *core.RegionInfo) *plan.Status { if region.GetStoreWitness(f.storeID) != nil { return statusRegionWitnessPeer diff --git a/pkg/schedule/handler/handler.go b/pkg/schedule/handler/handler.go index 9f9de274278..748a17b87ef 100644 --- a/pkg/schedule/handler/handler.go +++ b/pkg/schedule/handler/handler.go @@ -1124,7 +1124,7 @@ func (h *Handler) AccelerateRegionsScheduleInRange(rawStartKey, rawEndKey string for _, region := range regions { regionsIDList = append(regionsIDList, region.GetID()) } - co.GetCheckerController().AddPendingProcessedRegions(regionsIDList...) + co.GetCheckerController().AddPendingProcessedRegions(false, regionsIDList...) } return nil } @@ -1151,7 +1151,7 @@ func (h *Handler) AccelerateRegionsScheduleInRanges(startKeys [][]byte, endKeys for _, region := range regions { regionsIDList = append(regionsIDList, region.GetID()) } - co.GetCheckerController().AddPendingProcessedRegions(regionsIDList...) + co.GetCheckerController().AddPendingProcessedRegions(false, regionsIDList...) } return nil } diff --git a/pkg/schedule/operator/builder.go b/pkg/schedule/operator/builder.go index e28e7de973a..29b8aedf978 100644 --- a/pkg/schedule/operator/builder.go +++ b/pkg/schedule/operator/builder.go @@ -117,15 +117,15 @@ func NewBuilder(desc string, ci sche.SharedCluster, region *core.RegionInfo, opt err = errors.Errorf("cannot build operator for region with nil peer") break } - originPeers.Set(p) + originPeers.set(p) } for _, p := range region.GetPendingPeers() { - unhealthyPeers.Set(p) + unhealthyPeers.set(p) } for _, p := range region.GetDownPeers() { - unhealthyPeers.Set(p.Peer) + unhealthyPeers.set(p.Peer) } // origin leader @@ -158,7 +158,7 @@ func NewBuilder(desc string, ci sche.SharedCluster, region *core.RegionInfo, opt b.originPeers = originPeers b.unhealthyPeers = unhealthyPeers b.originLeaderStoreID = originLeaderStoreID - b.targetPeers = originPeers.Copy() + b.targetPeers = originPeers.copy() b.useJointConsensus = supportConfChangeV2 && b.GetSharedConfig().IsUseJointConsensus() b.err = err return b @@ -177,7 +177,7 @@ func (b *Builder) AddPeer(peer *metapb.Peer) *Builder { } else if old, ok := b.targetPeers[peer.GetStoreId()]; ok { b.err = errors.Errorf("cannot add peer %s: already have peer %s", peer, old) } else { - b.targetPeers.Set(peer) + b.targetPeers.set(peer) } return b } @@ -209,7 +209,7 @@ func (b *Builder) PromoteLearner(storeID uint64) *Builder { } else if _, ok := b.unhealthyPeers[storeID]; ok { b.err = errors.Errorf("cannot promote peer %d: unhealthy", storeID) } else { - b.targetPeers.Set(&metapb.Peer{ + b.targetPeers.set(&metapb.Peer{ Id: peer.GetId(), StoreId: peer.GetStoreId(), Role: metapb.PeerRole_Voter, @@ -229,7 +229,7 @@ func (b *Builder) DemoteVoter(storeID uint64) *Builder { } else if core.IsLearner(peer) { b.err = errors.Errorf("cannot demote voter %d: is already learner", storeID) } else { - b.targetPeers.Set(&metapb.Peer{ + b.targetPeers.set(&metapb.Peer{ Id: peer.GetId(), StoreId: peer.GetStoreId(), Role: metapb.PeerRole_Learner, @@ -249,7 +249,7 @@ func (b *Builder) BecomeWitness(storeID uint64) *Builder { } else if core.IsWitness(peer) { b.err = errors.Errorf("cannot switch peer to witness %d: is already witness", storeID) } else { - b.targetPeers.Set(&metapb.Peer{ + b.targetPeers.set(&metapb.Peer{ Id: peer.GetId(), StoreId: peer.GetStoreId(), Role: peer.GetRole(), @@ -269,7 +269,7 @@ func (b *Builder) BecomeNonWitness(storeID uint64) *Builder { } else if !core.IsWitness(peer) { b.err = errors.Errorf("cannot switch peer to non-witness %d: is already non-witness", storeID) } else { - b.targetPeers.Set(&metapb.Peer{ + b.targetPeers.set(&metapb.Peer{ Id: peer.GetId(), StoreId: peer.GetStoreId(), Role: peer.GetRole(), @@ -335,7 +335,7 @@ func (b *Builder) SetPeers(peers map[uint64]*metapb.Peer) *Builder { b.targetLeaderStoreID = 0 } - b.targetPeers = peersMap(peers).Copy() + b.targetPeers = peersMap(peers).copy() return b } @@ -439,7 +439,7 @@ func (b *Builder) prepareBuild() (string, error) { for _, o := range b.originPeers { n := b.targetPeers[o.GetStoreId()] if n == nil { - b.toRemove.Set(o) + b.toRemove.set(o) continue } @@ -461,25 +461,25 @@ func (b *Builder) prepareBuild() (string, error) { if !core.IsLearner(n) { n.Role = metapb.PeerRole_Learner n.IsWitness = true - b.toPromoteNonWitness.Set(n) + b.toPromoteNonWitness.set(n) } - b.toNonWitness.Set(n) + b.toNonWitness.set(n) } else if !isOriginPeerWitness && isTargetPeerWitness { - b.toWitness.Set(n) + b.toWitness.set(n) } isOriginPeerLearner := core.IsLearner(o) isTargetPeerLearner := core.IsLearner(n) if isOriginPeerLearner && !isTargetPeerLearner { // learner -> voter - b.toPromote.Set(n) + b.toPromote.set(n) } else if !isOriginPeerLearner && isTargetPeerLearner { // voter -> learner if b.useJointConsensus { - b.toDemote.Set(n) + b.toDemote.set(n) } else { - b.toRemove.Set(o) - // the targetPeers loop below will add `b.toAdd.Set(n)` + b.toRemove.set(o) + // the targetPeers loop below will add `b.toAdd.set(n)` } } } @@ -500,8 +500,8 @@ func (b *Builder) prepareBuild() (string, error) { IsWitness: n.GetIsWitness(), } } - // It is a pair with `b.toRemove.Set(o)` when `o != nil`. - b.toAdd.Set(n) + // It is a pair with `b.toRemove.set(o)` when `o != nil`. + b.toAdd.set(n) } } @@ -510,7 +510,7 @@ func (b *Builder) prepareBuild() (string, error) { b.targetLeaderStoreID = 0 } - b.currentPeers, b.currentLeaderStoreID = b.originPeers.Copy(), b.originLeaderStoreID + b.currentPeers, b.currentLeaderStoreID = b.originPeers.copy(), b.originLeaderStoreID if b.targetLeaderStoreID != 0 { targetLeader := b.targetPeers[b.targetLeaderStoreID] @@ -580,7 +580,7 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) { Role: metapb.PeerRole_Learner, IsWitness: peer.GetIsWitness(), }) - b.toPromote.Set(peer) + b.toPromote.set(peer) } else { b.execAddPeer(peer) } @@ -596,7 +596,7 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) { for _, remove := range b.toRemove.IDs() { peer := b.toRemove[remove] if !core.IsLearner(peer) { - b.toDemote.Set(&metapb.Peer{ + b.toDemote.set(&metapb.Peer{ Id: peer.GetId(), StoreId: peer.GetStoreId(), Role: metapb.PeerRole_Learner, @@ -637,7 +637,7 @@ func (b *Builder) buildStepsWithJointConsensus(kind OpKind) (OpKind, error) { for _, promote := range b.toPromoteNonWitness.IDs() { peer := b.toPromoteNonWitness[promote] peer.IsWitness = false - b.toPromote.Set(peer) + b.toPromote.set(peer) kind |= OpRegion } b.toPromoteNonWitness = newPeersMap() @@ -771,13 +771,13 @@ func (b *Builder) execTransferLeader(targetStoreID uint64, targetStoreIDs []uint func (b *Builder) execPromoteLearner(peer *metapb.Peer) { b.steps = append(b.steps, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) - b.currentPeers.Set(peer) + b.currentPeers.set(peer) delete(b.toPromote, peer.GetStoreId()) } func (b *Builder) execPromoteNonWitness(peer *metapb.Peer) { b.steps = append(b.steps, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: false}) - b.currentPeers.Set(peer) + b.currentPeers.set(peer) delete(b.toPromoteNonWitness, peer.GetStoreId()) } @@ -786,7 +786,7 @@ func (b *Builder) execAddPeer(peer *metapb.Peer) { if !core.IsLearner(peer) { b.steps = append(b.steps, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) } - b.currentPeers.Set(peer) + b.currentPeers.set(peer) b.peerAddStep[peer.GetStoreId()] = len(b.steps) delete(b.toAdd, peer.GetStoreId()) } @@ -824,14 +824,14 @@ func (b *Builder) execChangePeerV2(needEnter bool, needTransferLeader bool) { for _, p := range b.toPromote.IDs() { peer := b.toPromote[p] step.PromoteLearners = append(step.PromoteLearners, PromoteLearner{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) - b.currentPeers.Set(peer) + b.currentPeers.set(peer) } b.toPromote = newPeersMap() for _, d := range b.toDemote.IDs() { peer := b.toDemote[d] step.DemoteVoters = append(step.DemoteVoters, DemoteVoter{ToStore: peer.GetStoreId(), PeerID: peer.GetId(), IsWitness: peer.GetIsWitness()}) - b.currentPeers.Set(peer) + b.currentPeers.set(peer) } b.toDemote = newPeersMap() @@ -1279,10 +1279,11 @@ func (pm peersMap) IDs() []uint64 { return ids } -func (pm peersMap) Set(peer *metapb.Peer) { +func (pm peersMap) set(peer *metapb.Peer) { pm[peer.GetStoreId()] = peer } +// String returns a brief description of the peersMap. func (pm peersMap) String() string { ids := make([]uint64, 0, len(pm)) for _, p := range pm { @@ -1291,10 +1292,10 @@ func (pm peersMap) String() string { return fmt.Sprintf("%v", ids) } -func (pm peersMap) Copy() peersMap { +func (pm peersMap) copy() peersMap { var pm2 peersMap = make(map[uint64]*metapb.Peer, len(pm)) for _, p := range pm { - pm2.Set(p) + pm2.set(p) } return pm2 } diff --git a/pkg/schedule/operator/create_operator.go b/pkg/schedule/operator/create_operator.go index 64680520933..4fae7f9e3f2 100644 --- a/pkg/schedule/operator/create_operator.go +++ b/pkg/schedule/operator/create_operator.go @@ -285,9 +285,9 @@ func CreateLeaveJointStateOperator(desc string, ci sche.SharedCluster, origin *c for _, o := range b.originPeers { switch o.GetRole() { case metapb.PeerRole_IncomingVoter: - b.toPromote.Set(o) + b.toPromote.set(o) case metapb.PeerRole_DemotingVoter: - b.toDemote.Set(o) + b.toDemote.set(o) } } @@ -298,7 +298,7 @@ func CreateLeaveJointStateOperator(desc string, ci sche.SharedCluster, origin *c b.targetLeaderStoreID = b.originLeaderStoreID } - b.currentPeers, b.currentLeaderStoreID = b.originPeers.Copy(), b.originLeaderStoreID + b.currentPeers, b.currentLeaderStoreID = b.originPeers.copy(), b.originLeaderStoreID b.peerAddStep = make(map[uint64]int) brief := b.brief() diff --git a/pkg/schedule/operator/metrics.go b/pkg/schedule/operator/metrics.go index 20bb4e6b7ca..74f9ddad0c7 100644 --- a/pkg/schedule/operator/metrics.go +++ b/pkg/schedule/operator/metrics.go @@ -14,7 +14,10 @@ package operator -import "github.com/prometheus/client_golang/prometheus" +import ( + "github.com/prometheus/client_golang/prometheus" + types "github.com/tikv/pd/pkg/schedule/type" +) var ( operatorStepDuration = prometheus.NewHistogramVec( @@ -26,8 +29,7 @@ var ( Buckets: []float64{0.5, 1, 2, 4, 8, 16, 20, 40, 60, 90, 120, 180, 240, 300, 480, 600, 720, 900, 1200, 1800, 3600}, }, []string{"type"}) - // OperatorLimitCounter exposes the counter when meeting limit. - OperatorLimitCounter = prometheus.NewCounterVec( + operatorLimitCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "pd", Subsystem: "schedule", @@ -82,10 +84,15 @@ var ( func init() { prometheus.MustRegister(operatorStepDuration) - prometheus.MustRegister(OperatorLimitCounter) + prometheus.MustRegister(operatorLimitCounter) prometheus.MustRegister(OperatorExceededStoreLimitCounter) prometheus.MustRegister(operatorCounter) prometheus.MustRegister(operatorDuration) prometheus.MustRegister(operatorSizeHist) prometheus.MustRegister(storeLimitCostCounter) } + +// IncOperatorLimitCounter increases the counter of operator meeting limit. +func IncOperatorLimitCounter(typ types.CheckerSchedulerType, kind OpKind) { + operatorLimitCounter.WithLabelValues(typ.String(), kind.String()).Inc() +} diff --git a/pkg/schedule/operator/operator_controller.go b/pkg/schedule/operator/operator_controller.go index fe93bd98756..e4da6ead0ef 100644 --- a/pkg/schedule/operator/operator_controller.go +++ b/pkg/schedule/operator/operator_controller.go @@ -235,10 +235,10 @@ func getNextPushOperatorTime(step OpStep, now time.Time) time.Time { // "next" is true to indicate that it may exist in next attempt, // and false is the end for the poll. func (oc *Controller) pollNeedDispatchRegion() (r *core.RegionInfo, next bool) { - if oc.opNotifierQueue.Len() == 0 { + if oc.opNotifierQueue.len() == 0 { return nil, false } - item, _ := oc.opNotifierQueue.Pop() + item, _ := oc.opNotifierQueue.pop() regionID := item.op.RegionID() opi, ok := oc.operators.Load(regionID) if !ok || opi.(*Operator) == nil { @@ -265,13 +265,13 @@ func (oc *Controller) pollNeedDispatchRegion() (r *core.RegionInfo, next bool) { } now := time.Now() if now.Before(item.time) { - oc.opNotifierQueue.Push(item) + oc.opNotifierQueue.push(item) return nil, false } // pushes with new notify time. item.time = getNextPushOperatorTime(step, now) - oc.opNotifierQueue.Push(item) + oc.opNotifierQueue.push(item) return r, true } @@ -561,7 +561,7 @@ func (oc *Controller) addOperatorInner(op *Operator) bool { } } - oc.opNotifierQueue.Push(&operatorWithTime{op: op, time: getNextPushOperatorTime(step, time.Now())}) + oc.opNotifierQueue.push(&operatorWithTime{op: op, time: getNextPushOperatorTime(step, time.Now())}) operatorCounter.WithLabelValues(op.Desc(), "create").Inc() for _, counter := range op.Counters { counter.Inc() @@ -753,7 +753,7 @@ func (oc *Controller) GetOperator(regionID uint64) *Operator { // GetOperators gets operators from the running operators. func (oc *Controller) GetOperators() []*Operator { - operators := make([]*Operator, 0, oc.opNotifierQueue.Len()) + operators := make([]*Operator, 0, oc.opNotifierQueue.len()) oc.operators.Range( func(_, value any) bool { operators = append(operators, value.(*Operator)) @@ -769,7 +769,7 @@ func (oc *Controller) GetWaitingOperators() []*Operator { // GetOperatorsOfKind returns the running operators of the kind. func (oc *Controller) GetOperatorsOfKind(mask OpKind) []*Operator { - operators := make([]*Operator, 0, oc.opNotifierQueue.Len()) + operators := make([]*Operator, 0, oc.opNotifierQueue.len()) oc.operators.Range( func(_, value any) bool { op := value.(*Operator) diff --git a/pkg/schedule/operator/operator_controller_test.go b/pkg/schedule/operator/operator_controller_test.go index 2b16516c4c7..3894df7e5e7 100644 --- a/pkg/schedule/operator/operator_controller_test.go +++ b/pkg/schedule/operator/operator_controller_test.go @@ -364,10 +364,10 @@ func (suite *operatorControllerTestSuite) TestPollDispatchRegion() { oc.SetOperator(op4) re.True(op2.Start()) oc.SetOperator(op2) - oc.opNotifierQueue.Push(&operatorWithTime{op: op1, time: time.Now().Add(100 * time.Millisecond)}) - oc.opNotifierQueue.Push(&operatorWithTime{op: op3, time: time.Now().Add(300 * time.Millisecond)}) - oc.opNotifierQueue.Push(&operatorWithTime{op: op4, time: time.Now().Add(499 * time.Millisecond)}) - oc.opNotifierQueue.Push(&operatorWithTime{op: op2, time: time.Now().Add(500 * time.Millisecond)}) + oc.opNotifierQueue.push(&operatorWithTime{op: op1, time: time.Now().Add(100 * time.Millisecond)}) + oc.opNotifierQueue.push(&operatorWithTime{op: op3, time: time.Now().Add(300 * time.Millisecond)}) + oc.opNotifierQueue.push(&operatorWithTime{op: op4, time: time.Now().Add(499 * time.Millisecond)}) + oc.opNotifierQueue.push(&operatorWithTime{op: op2, time: time.Now().Add(500 * time.Millisecond)}) } // first poll got nil r, next := oc.pollNeedDispatchRegion() @@ -447,7 +447,7 @@ func (suite *operatorControllerTestSuite) TestPollDispatchRegionForMergeRegion() r, next = controller.pollNeedDispatchRegion() re.True(next) re.Nil(r) - re.Equal(1, controller.opNotifierQueue.Len()) + re.Equal(1, controller.opNotifierQueue.len()) re.Empty(controller.GetOperators()) re.Empty(controller.wop.ListOperator()) re.NotNil(controller.records.Get(101)) @@ -458,7 +458,7 @@ func (suite *operatorControllerTestSuite) TestPollDispatchRegionForMergeRegion() r, next = controller.pollNeedDispatchRegion() re.True(next) re.Nil(r) - re.Equal(0, controller.opNotifierQueue.Len()) + re.Equal(0, controller.opNotifierQueue.len()) // Add the two ops to waiting operators again. source.GetMeta().RegionEpoch = &metapb.RegionEpoch{ConfVer: 0, Version: 0} @@ -478,7 +478,7 @@ func (suite *operatorControllerTestSuite) TestPollDispatchRegionForMergeRegion() r, next = controller.pollNeedDispatchRegion() re.True(next) re.Nil(r) - re.Equal(1, controller.opNotifierQueue.Len()) + re.Equal(1, controller.opNotifierQueue.len()) re.Empty(controller.GetOperators()) re.Empty(controller.wop.ListOperator()) re.NotNil(controller.records.Get(101)) @@ -488,7 +488,7 @@ func (suite *operatorControllerTestSuite) TestPollDispatchRegionForMergeRegion() r, next = controller.pollNeedDispatchRegion() re.True(next) re.Nil(r) - re.Equal(0, controller.opNotifierQueue.Len()) + re.Equal(0, controller.opNotifierQueue.len()) } func (suite *operatorControllerTestSuite) TestCheckOperatorLightly() { diff --git a/pkg/schedule/operator/operator_queue.go b/pkg/schedule/operator/operator_queue.go index 8643717d5ad..51991ff7ab4 100644 --- a/pkg/schedule/operator/operator_queue.go +++ b/pkg/schedule/operator/operator_queue.go @@ -67,19 +67,19 @@ func newConcurrentHeapOpQueue() *concurrentHeapOpQueue { return &concurrentHeapOpQueue{heap: make(operatorQueue, 0)} } -func (ch *concurrentHeapOpQueue) Len() int { +func (ch *concurrentHeapOpQueue) len() int { ch.Lock() defer ch.Unlock() return len(ch.heap) } -func (ch *concurrentHeapOpQueue) Push(x *operatorWithTime) { +func (ch *concurrentHeapOpQueue) push(x *operatorWithTime) { ch.Lock() defer ch.Unlock() heap.Push(&ch.heap, x) } -func (ch *concurrentHeapOpQueue) Pop() (*operatorWithTime, bool) { +func (ch *concurrentHeapOpQueue) pop() (*operatorWithTime, bool) { ch.Lock() defer ch.Unlock() if len(ch.heap) == 0 { diff --git a/pkg/schedule/scatter/region_scatterer.go b/pkg/schedule/scatter/region_scatterer.go index 100b9eb764d..efef5439fed 100644 --- a/pkg/schedule/scatter/region_scatterer.go +++ b/pkg/schedule/scatter/region_scatterer.go @@ -125,12 +125,12 @@ type RegionScatterer struct { ordinaryEngine engineContext specialEngines sync.Map opController *operator.Controller - addSuspectRegions func(regionIDs ...uint64) + addSuspectRegions func(bool, ...uint64) } // NewRegionScatterer creates a region scatterer. // RegionScatter is used for the `Lightning`, it will scatter the specified regions before import data. -func NewRegionScatterer(ctx context.Context, cluster sche.SharedCluster, opController *operator.Controller, addSuspectRegions func(regionIDs ...uint64)) *RegionScatterer { +func NewRegionScatterer(ctx context.Context, cluster sche.SharedCluster, opController *operator.Controller, addSuspectRegions func(bool, ...uint64)) *RegionScatterer { return &RegionScatterer{ ctx: ctx, name: regionScatterName, @@ -275,7 +275,7 @@ func (r *RegionScatterer) scatterRegions(regions map[uint64]*core.RegionInfo, fa // in a group level instead of cluster level. func (r *RegionScatterer) Scatter(region *core.RegionInfo, group string, skipStoreLimit bool) (*operator.Operator, error) { if !filter.IsRegionReplicated(r.cluster, region) { - r.addSuspectRegions(region.GetID()) + r.addSuspectRegions(false, region.GetID()) scatterSkipNotReplicatedCounter.Inc() log.Warn("region not replicated during scatter", zap.Uint64("region-id", region.GetID())) return nil, errors.Errorf("region %d is not fully replicated", region.GetID()) diff --git a/pkg/schedule/schedulers/balance_leader.go b/pkg/schedule/schedulers/balance_leader.go index 899737536e2..f6c8dd5d1b6 100644 --- a/pkg/schedule/schedulers/balance_leader.go +++ b/pkg/schedule/schedulers/balance_leader.go @@ -64,7 +64,7 @@ type balanceLeaderSchedulerConfig struct { Batch int `json:"batch"` } -func (conf *balanceLeaderSchedulerConfig) Update(data []byte) (int, any) { +func (conf *balanceLeaderSchedulerConfig) update(data []byte) (int, any) { conf.Lock() defer conf.Unlock() @@ -146,19 +146,19 @@ func newBalanceLeaderHandler(conf *balanceLeaderSchedulerConfig) http.Handler { rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", handler.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", handler.ListConfig).Methods(http.MethodGet) + router.HandleFunc("/config", handler.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", handler.listConfig).Methods(http.MethodGet) return router } -func (handler *balanceLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *balanceLeaderHandler) updateConfig(w http.ResponseWriter, r *http.Request) { data, _ := io.ReadAll(r.Body) r.Body.Close() - httpCode, v := handler.config.Update(data) + httpCode, v := handler.config.update(data) handler.rd.JSON(w, httpCode, v) } -func (handler *balanceLeaderHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { +func (handler *balanceLeaderHandler) listConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -166,7 +166,6 @@ func (handler *balanceLeaderHandler) ListConfig(w http.ResponseWriter, _ *http.R type balanceLeaderScheduler struct { *BaseScheduler *retryQuota - name string conf *balanceLeaderSchedulerConfig handler http.Handler filters []filter.Filter @@ -176,14 +175,11 @@ type balanceLeaderScheduler struct { // newBalanceLeaderScheduler creates a scheduler that tends to keep leaders on // each store balanced. func newBalanceLeaderScheduler(opController *operator.Controller, conf *balanceLeaderSchedulerConfig, options ...BalanceLeaderCreateOption) Scheduler { - base := NewBaseScheduler(opController) s := &balanceLeaderScheduler{ - BaseScheduler: base, + BaseScheduler: NewBaseScheduler(opController, types.BalanceLeaderScheduler), retryQuota: newRetryQuota(), - name: BalanceLeaderName, conf: conf, handler: newBalanceLeaderHandler(conf), - filterCounter: filter.NewCounter(types.BalanceLeaderScheduler.String()), } for _, option := range options { option(s) @@ -192,6 +188,7 @@ func newBalanceLeaderScheduler(opController *operator.Controller, conf *balanceL &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true, OperatorLevel: constant.High}, filter.NewSpecialUseFilter(s.GetName()), } + s.filterCounter = filter.NewCounter(s.GetName()) return s } @@ -202,13 +199,6 @@ func (l *balanceLeaderScheduler) ServeHTTP(w http.ResponseWriter, r *http.Reques // BalanceLeaderCreateOption is used to create a scheduler with an option. type BalanceLeaderCreateOption func(s *balanceLeaderScheduler) -// WithBalanceLeaderFilterCounterName sets the filter counter name for the scheduler. -func WithBalanceLeaderFilterCounterName(name string) BalanceLeaderCreateOption { - return func(s *balanceLeaderScheduler) { - s.filterCounter.SetScope(name) - } -} - // WithBalanceLeaderName sets the name for the scheduler. func WithBalanceLeaderName(name string) BalanceLeaderCreateOption { return func(s *balanceLeaderScheduler) { @@ -216,14 +206,6 @@ func WithBalanceLeaderName(name string) BalanceLeaderCreateOption { } } -func (l *balanceLeaderScheduler) GetName() string { - return l.name -} - -func (*balanceLeaderScheduler) GetType() string { - return BalanceLeaderType -} - func (l *balanceLeaderScheduler) EncodeConfig() ([]byte, error) { l.conf.RLock() defer l.conf.RUnlock() @@ -252,7 +234,7 @@ func (l *balanceLeaderScheduler) ReloadConfig() error { func (l *balanceLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := l.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(l.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(l.GetType(), operator.OpLeader) } return allowed } @@ -366,7 +348,7 @@ func (l *balanceLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun stores := cluster.GetStores() scoreFunc := func(store *core.StoreInfo) float64 { - return store.LeaderScore(solver.kind.Policy, solver.GetOpInfluence(store.GetID())) + return store.LeaderScore(solver.kind.Policy, solver.getOpInfluence(store.GetID())) } sourceCandidate := newCandidateStores(filter.SelectSourceStores(stores, l.filters, cluster.GetSchedulerConfig(), collector, l.filterCounter), false, scoreFunc) targetCandidate := newCandidateStores(filter.SelectTargetStores(stores, l.filters, cluster.GetSchedulerConfig(), nil, l.filterCounter), true, scoreFunc) @@ -397,7 +379,7 @@ func (l *balanceLeaderScheduler) Schedule(cluster sche.SchedulerCluster, dryRun } } } - l.retryQuota.GC(append(sourceCandidate.stores, targetCandidate.stores...)) + l.retryQuota.gc(append(sourceCandidate.stores, targetCandidate.stores...)) return result, collector.GetPlans() } @@ -406,7 +388,7 @@ func createTransferLeaderOperator(cs *candidateStores, dir string, l *balanceLea store := cs.getStore() ssolver.Step++ defer func() { ssolver.Step-- }() - retryLimit := l.retryQuota.GetLimit(store) + retryLimit := l.retryQuota.getLimit(store) var creator func(*solver, *plan.Collector) *operator.Operator switch dir { case transferOut: @@ -426,9 +408,9 @@ func createTransferLeaderOperator(cs *candidateStores, dir string, l *balanceLea } } if op != nil { - l.retryQuota.ResetLimit(store) + l.retryQuota.resetLimit(store) } else { - l.Attenuate(store) + l.attenuate(store) log.Debug("no operator created for selected stores", zap.String("scheduler", l.GetName()), zap.Uint64(dir, store.GetID())) cs.next() } @@ -454,10 +436,10 @@ func makeInfluence(op *operator.Operator, plan *solver, usedRegions map[uint64]s // It randomly selects a health region from the source store, then picks // the best follower peer and transfers the leader. func (l *balanceLeaderScheduler) transferLeaderOut(solver *solver, collector *plan.Collector) *operator.Operator { - solver.Region = filter.SelectOneRegion(solver.RandLeaderRegions(solver.SourceStoreID(), l.conf.getRanges()), + solver.Region = filter.SelectOneRegion(solver.RandLeaderRegions(solver.sourceStoreID(), l.conf.getRanges()), collector, filter.NewRegionPendingFilter(), filter.NewRegionDownFilter()) if solver.Region == nil { - log.Debug("store has no leader", zap.String("scheduler", l.GetName()), zap.Uint64("store-id", solver.SourceStoreID())) + log.Debug("store has no leader", zap.String("scheduler", l.GetName()), zap.Uint64("store-id", solver.sourceStoreID())) balanceLeaderNoLeaderRegionCounter.Inc() return nil } @@ -480,8 +462,8 @@ func (l *balanceLeaderScheduler) transferLeaderOut(solver *solver, collector *pl targets = filter.SelectTargetStores(targets, finalFilters, conf, collector, l.filterCounter) leaderSchedulePolicy := conf.GetLeaderSchedulePolicy() sort.Slice(targets, func(i, j int) bool { - iOp := solver.GetOpInfluence(targets[i].GetID()) - jOp := solver.GetOpInfluence(targets[j].GetID()) + iOp := solver.getOpInfluence(targets[i].GetID()) + jOp := solver.getOpInfluence(targets[j].GetID()) return targets[i].LeaderScore(leaderSchedulePolicy, iOp) < targets[j].LeaderScore(leaderSchedulePolicy, jOp) }) for _, solver.Target = range targets { @@ -498,10 +480,10 @@ func (l *balanceLeaderScheduler) transferLeaderOut(solver *solver, collector *pl // It randomly selects a health region from the target store, then picks // the worst follower peer and transfers the leader. func (l *balanceLeaderScheduler) transferLeaderIn(solver *solver, collector *plan.Collector) *operator.Operator { - solver.Region = filter.SelectOneRegion(solver.RandFollowerRegions(solver.TargetStoreID(), l.conf.getRanges()), + solver.Region = filter.SelectOneRegion(solver.RandFollowerRegions(solver.targetStoreID(), l.conf.getRanges()), nil, filter.NewRegionPendingFilter(), filter.NewRegionDownFilter()) if solver.Region == nil { - log.Debug("store has no follower", zap.String("scheduler", l.GetName()), zap.Uint64("store-id", solver.TargetStoreID())) + log.Debug("store has no follower", zap.String("scheduler", l.GetName()), zap.Uint64("store-id", solver.targetStoreID())) balanceLeaderNoFollowerRegionCounter.Inc() return nil } @@ -554,7 +536,7 @@ func (l *balanceLeaderScheduler) createOperator(solver *solver, collector *plan. } solver.Step++ defer func() { solver.Step-- }() - op, err := operator.CreateTransferLeaderOperator(BalanceLeaderType, solver, solver.Region, solver.TargetStoreID(), []uint64{}, operator.OpLeader) + op, err := operator.CreateTransferLeaderOperator(BalanceLeaderType, solver, solver.Region, solver.targetStoreID(), []uint64{}, operator.OpLeader) if err != nil { log.Debug("fail to create balance leader operator", errs.ZapError(err)) if collector != nil { @@ -566,7 +548,7 @@ func (l *balanceLeaderScheduler) createOperator(solver *solver, collector *plan. balanceLeaderNewOpCounter, ) op.FinishedCounters = append(op.FinishedCounters, - balanceDirectionCounter.WithLabelValues(l.GetName(), solver.SourceMetricLabel(), solver.TargetMetricLabel()), + balanceDirectionCounter.WithLabelValues(l.GetName(), solver.sourceMetricLabel(), solver.targetMetricLabel()), ) op.SetAdditionalInfo("sourceScore", strconv.FormatFloat(solver.sourceScore, 'f', 2, 64)) op.SetAdditionalInfo("targetScore", strconv.FormatFloat(solver.targetScore, 'f', 2, 64)) diff --git a/pkg/schedule/schedulers/balance_region.go b/pkg/schedule/schedulers/balance_region.go index b26830155b0..7c19187dd74 100644 --- a/pkg/schedule/schedulers/balance_region.go +++ b/pkg/schedule/schedulers/balance_region.go @@ -38,7 +38,6 @@ const ( ) type balanceRegionSchedulerConfig struct { - Name string `json:"name"` Ranges []core.KeyRange `json:"ranges"` // TODO: When we prepare to use Ranges, we will need to implement the ReloadConfig function for this scheduler. } @@ -46,6 +45,7 @@ type balanceRegionSchedulerConfig struct { type balanceRegionScheduler struct { *BaseScheduler *retryQuota + name string conf *balanceRegionSchedulerConfig filters []filter.Filter filterCounter *filter.Counter @@ -54,12 +54,11 @@ type balanceRegionScheduler struct { // newBalanceRegionScheduler creates a scheduler that tends to keep regions on // each store balanced. func newBalanceRegionScheduler(opController *operator.Controller, conf *balanceRegionSchedulerConfig, opts ...BalanceRegionCreateOption) Scheduler { - base := NewBaseScheduler(opController) scheduler := &balanceRegionScheduler{ - BaseScheduler: base, + BaseScheduler: NewBaseScheduler(opController, types.BalanceRegionScheduler), retryQuota: newRetryQuota(), + name: types.BalanceRegionScheduler.String(), conf: conf, - filterCounter: filter.NewCounter(types.BalanceRegionScheduler.String()), } for _, setOption := range opts { setOption(scheduler) @@ -68,6 +67,7 @@ func newBalanceRegionScheduler(opController *operator.Controller, conf *balanceR &filter.StoreStateFilter{ActionScope: scheduler.GetName(), MoveRegion: true, OperatorLevel: constant.Medium}, filter.NewSpecialUseFilter(scheduler.GetName()), } + scheduler.filterCounter = filter.NewCounter(scheduler.GetName()) return scheduler } @@ -77,37 +77,25 @@ type BalanceRegionCreateOption func(s *balanceRegionScheduler) // WithBalanceRegionName sets the name for the scheduler. func WithBalanceRegionName(name string) BalanceRegionCreateOption { return func(s *balanceRegionScheduler) { - s.conf.Name = name + s.name = name } } -// WithBalanceRegionFilterCounterName sets the filter counter name for the scheduler. -func WithBalanceRegionFilterCounterName(name string) BalanceRegionCreateOption { - return func(s *balanceRegionScheduler) { - s.filterCounter.SetScope(name) - } -} - -func (s *balanceRegionScheduler) GetName() string { - return s.conf.Name -} - -func (*balanceRegionScheduler) GetType() string { - return BalanceRegionType -} - +// EncodeConfig implements the Scheduler interface. func (s *balanceRegionScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// IsScheduleAllowed implements the Scheduler interface. func (s *balanceRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpRegion) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpRegion.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpRegion) } return allowed } +// Schedule implements the Scheduler interface. func (s *balanceRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { basePlan := plan.NewBalanceSchedulerPlan() defer s.filterCounter.Flush() @@ -127,8 +115,8 @@ func (s *balanceRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun solver := newSolver(basePlan, kind, cluster, opInfluence) sort.Slice(sourceStores, func(i, j int) bool { - iOp := solver.GetOpInfluence(sourceStores[i].GetID()) - jOp := solver.GetOpInfluence(sourceStores[j].GetID()) + iOp := solver.getOpInfluence(sourceStores[i].GetID()) + jOp := solver.getOpInfluence(sourceStores[j].GetID()) return sourceStores[i].RegionScore(conf.GetRegionScoreFormulaVersion(), conf.GetHighSpaceRatio(), conf.GetLowSpaceRatio(), iOp) > sourceStores[j].RegionScore(conf.GetRegionScoreFormulaVersion(), conf.GetHighSpaceRatio(), conf.GetLowSpaceRatio(), jOp) }) @@ -153,7 +141,7 @@ func (s *balanceRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun // sourcesStore is sorted by region score desc, so we pick the first store as source store. for sourceIndex, solver.Source = range sourceStores { - retryLimit := s.retryQuota.GetLimit(solver.Source) + retryLimit := s.retryQuota.getLimit(solver.Source) solver.sourceScore = solver.sourceStoreScore(s.GetName()) if sourceIndex == len(sourceStores)-1 { break @@ -161,22 +149,22 @@ func (s *balanceRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun for i := 0; i < retryLimit; i++ { // Priority pick the region that has a pending peer. // Pending region may mean the disk is overload, remove the pending region firstly. - solver.Region = filter.SelectOneRegion(cluster.RandPendingRegions(solver.SourceStoreID(), s.conf.Ranges), collector, - append(baseRegionFilters, filter.NewRegionWitnessFilter(solver.SourceStoreID()))...) + solver.Region = filter.SelectOneRegion(cluster.RandPendingRegions(solver.sourceStoreID(), s.conf.Ranges), collector, + append(baseRegionFilters, filter.NewRegionWitnessFilter(solver.sourceStoreID()))...) if solver.Region == nil { // Then pick the region that has a follower in the source store. - solver.Region = filter.SelectOneRegion(cluster.RandFollowerRegions(solver.SourceStoreID(), s.conf.Ranges), collector, - append(baseRegionFilters, filter.NewRegionWitnessFilter(solver.SourceStoreID()), pendingFilter)...) + solver.Region = filter.SelectOneRegion(cluster.RandFollowerRegions(solver.sourceStoreID(), s.conf.Ranges), collector, + append(baseRegionFilters, filter.NewRegionWitnessFilter(solver.sourceStoreID()), pendingFilter)...) } if solver.Region == nil { // Then pick the region has the leader in the source store. - solver.Region = filter.SelectOneRegion(cluster.RandLeaderRegions(solver.SourceStoreID(), s.conf.Ranges), collector, - append(baseRegionFilters, filter.NewRegionWitnessFilter(solver.SourceStoreID()), pendingFilter)...) + solver.Region = filter.SelectOneRegion(cluster.RandLeaderRegions(solver.sourceStoreID(), s.conf.Ranges), collector, + append(baseRegionFilters, filter.NewRegionWitnessFilter(solver.sourceStoreID()), pendingFilter)...) } if solver.Region == nil { // Finally, pick learner. - solver.Region = filter.SelectOneRegion(cluster.RandLearnerRegions(solver.SourceStoreID(), s.conf.Ranges), collector, - append(baseRegionFilters, filter.NewRegionWitnessFilter(solver.SourceStoreID()), pendingFilter)...) + solver.Region = filter.SelectOneRegion(cluster.RandLearnerRegions(solver.sourceStoreID(), s.conf.Ranges), collector, + append(baseRegionFilters, filter.NewRegionWitnessFilter(solver.sourceStoreID()), pendingFilter)...) } if solver.Region == nil { balanceRegionNoRegionCounter.Inc() @@ -206,15 +194,15 @@ func (s *balanceRegionScheduler) Schedule(cluster sche.SchedulerCluster, dryRun // satisfy all the filters, so the region fit must belong the scheduled region. solver.fit = replicaFilter.(*filter.RegionReplicatedFilter).GetFit() if op := s.transferPeer(solver, collector, sourceStores[sourceIndex+1:], faultTargets); op != nil { - s.retryQuota.ResetLimit(solver.Source) + s.retryQuota.resetLimit(solver.Source) op.Counters = append(op.Counters, balanceRegionNewOpCounter) return []*operator.Operator{op}, collector.GetPlans() } solver.Step-- } - s.retryQuota.Attenuate(solver.Source) + s.retryQuota.attenuate(solver.Source) } - s.retryQuota.GC(stores) + s.retryQuota.gc(stores) return nil, collector.GetPlans() } diff --git a/pkg/schedule/schedulers/balance_test.go b/pkg/schedule/schedulers/balance_test.go index 26214ed5456..0cfaf510f1b 100644 --- a/pkg/schedule/schedulers/balance_test.go +++ b/pkg/schedule/schedulers/balance_test.go @@ -1399,8 +1399,8 @@ func TestConcurrencyUpdateConfig(t *testing.T) { return default: } - sche.config.BuildWithArgs(args) - re.NoError(sche.config.Persist()) + sche.config.buildWithArgs(args) + re.NoError(sche.config.persist()) } }() for i := 0; i < 1000; i++ { diff --git a/pkg/schedule/schedulers/balance_witness.go b/pkg/schedule/schedulers/balance_witness.go index 1c4daa62634..dbb0d012c72 100644 --- a/pkg/schedule/schedulers/balance_witness.go +++ b/pkg/schedule/schedulers/balance_witness.go @@ -143,19 +143,19 @@ func newBalanceWitnessHandler(conf *balanceWitnessSchedulerConfig) http.Handler rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", handler.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", handler.ListConfig).Methods(http.MethodGet) + router.HandleFunc("/config", handler.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", handler.listConfig).Methods(http.MethodGet) return router } -func (handler *balanceWitnessHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *balanceWitnessHandler) updateConfig(w http.ResponseWriter, r *http.Request) { data, _ := io.ReadAll(r.Body) r.Body.Close() httpCode, v := handler.config.Update(data) handler.rd.JSON(w, httpCode, v) } -func (handler *balanceWitnessHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { +func (handler *balanceWitnessHandler) listConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -163,7 +163,6 @@ func (handler *balanceWitnessHandler) ListConfig(w http.ResponseWriter, _ *http. type balanceWitnessScheduler struct { *BaseScheduler *retryQuota - name string conf *balanceWitnessSchedulerConfig handler http.Handler filters []filter.Filter @@ -174,11 +173,9 @@ type balanceWitnessScheduler struct { // newBalanceWitnessScheduler creates a scheduler that tends to keep witnesses on // each store balanced. func newBalanceWitnessScheduler(opController *operator.Controller, conf *balanceWitnessSchedulerConfig, options ...BalanceWitnessCreateOption) Scheduler { - base := NewBaseScheduler(opController) s := &balanceWitnessScheduler{ - BaseScheduler: base, + BaseScheduler: NewBaseScheduler(opController, types.BalanceWitnessScheduler), retryQuota: newRetryQuota(), - name: BalanceWitnessName, conf: conf, handler: newBalanceWitnessHandler(conf), counter: balanceWitnessCounter, @@ -194,6 +191,7 @@ func newBalanceWitnessScheduler(opController *operator.Controller, conf *balance return s } +// ServeHTTP implements the http.Handler interface. func (b *balanceWitnessScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { b.handler.ServeHTTP(w, r) } @@ -208,27 +206,14 @@ func WithBalanceWitnessCounter(counter *prometheus.CounterVec) BalanceWitnessCre } } -// WithBalanceWitnessName sets the name for the scheduler. -func WithBalanceWitnessName(name string) BalanceWitnessCreateOption { - return func(s *balanceWitnessScheduler) { - s.name = name - } -} - -func (b *balanceWitnessScheduler) GetName() string { - return b.name -} - -func (*balanceWitnessScheduler) GetType() string { - return BalanceWitnessType -} - +// EncodeConfig implements the Scheduler interface. func (b *balanceWitnessScheduler) EncodeConfig() ([]byte, error) { b.conf.RLock() defer b.conf.RUnlock() return EncodeConfig(b.conf) } +// ReloadConfig implements the Scheduler interface. func (b *balanceWitnessScheduler) ReloadConfig() error { b.conf.Lock() defer b.conf.Unlock() @@ -248,14 +233,16 @@ func (b *balanceWitnessScheduler) ReloadConfig() error { return nil } +// IsScheduleAllowed implements the Scheduler interface. func (b *balanceWitnessScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := b.OpController.OperatorCount(operator.OpWitness) < cluster.GetSchedulerConfig().GetWitnessScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(b.GetType(), operator.OpWitness.String()).Inc() + operator.IncOperatorLimitCounter(b.GetType(), operator.OpWitness) } return allowed } +// Schedule implements the Scheduler interface. func (b *balanceWitnessScheduler) Schedule(cluster sche.SchedulerCluster, dryRun bool) ([]*operator.Operator, []plan.Plan) { basePlan := plan.NewBalanceSchedulerPlan() var collector *plan.Collector @@ -271,7 +258,7 @@ func (b *balanceWitnessScheduler) Schedule(cluster sche.SchedulerCluster, dryRun stores := cluster.GetStores() scoreFunc := func(store *core.StoreInfo) float64 { - return store.WitnessScore(solver.GetOpInfluence(store.GetID())) + return store.WitnessScore(solver.getOpInfluence(store.GetID())) } sourceCandidate := newCandidateStores(filter.SelectSourceStores(stores, b.filters, cluster.GetSchedulerConfig(), collector, b.filterCounter), false, scoreFunc) usedRegions := make(map[uint64]struct{}) @@ -287,7 +274,7 @@ func (b *balanceWitnessScheduler) Schedule(cluster sche.SchedulerCluster, dryRun makeInfluence(op, solver, usedRegions, sourceCandidate) } } - b.retryQuota.GC(sourceCandidate.stores) + b.retryQuota.gc(sourceCandidate.stores) return result, collector.GetPlans() } @@ -296,7 +283,7 @@ func createTransferWitnessOperator(cs *candidateStores, b *balanceWitnessSchedul store := cs.getStore() ssolver.Step++ defer func() { ssolver.Step-- }() - retryLimit := b.retryQuota.GetLimit(store) + retryLimit := b.retryQuota.getLimit(store) ssolver.Source, ssolver.Target = store, nil var op *operator.Operator for i := 0; i < retryLimit; i++ { @@ -309,9 +296,9 @@ func createTransferWitnessOperator(cs *candidateStores, b *balanceWitnessSchedul } } if op != nil { - b.retryQuota.ResetLimit(store) + b.retryQuota.resetLimit(store) } else { - b.Attenuate(store) + b.attenuate(store) log.Debug("no operator created for selected stores", zap.String("scheduler", b.GetName()), zap.Uint64("transfer-out", store.GetID())) cs.next() } @@ -322,10 +309,10 @@ func createTransferWitnessOperator(cs *candidateStores, b *balanceWitnessSchedul // It randomly selects a health region from the source store, then picks // the best follower peer and transfers the witness. func (b *balanceWitnessScheduler) transferWitnessOut(solver *solver, collector *plan.Collector) *operator.Operator { - solver.Region = filter.SelectOneRegion(solver.RandWitnessRegions(solver.SourceStoreID(), b.conf.getRanges()), + solver.Region = filter.SelectOneRegion(solver.RandWitnessRegions(solver.sourceStoreID(), b.conf.getRanges()), collector, filter.NewRegionPendingFilter(), filter.NewRegionDownFilter()) if solver.Region == nil { - log.Debug("store has no witness", zap.String("scheduler", b.GetName()), zap.Uint64("store-id", solver.SourceStoreID())) + log.Debug("store has no witness", zap.String("scheduler", b.GetName()), zap.Uint64("store-id", solver.sourceStoreID())) schedulerCounter.WithLabelValues(b.GetName(), "no-witness-region").Inc() return nil } @@ -339,8 +326,8 @@ func (b *balanceWitnessScheduler) transferWitnessOut(solver *solver, collector * } targets = filter.SelectTargetStores(targets, finalFilters, conf, collector, b.filterCounter) sort.Slice(targets, func(i, j int) bool { - iOp := solver.GetOpInfluence(targets[i].GetID()) - jOp := solver.GetOpInfluence(targets[j].GetID()) + iOp := solver.getOpInfluence(targets[i].GetID()) + jOp := solver.getOpInfluence(targets[j].GetID()) return targets[i].WitnessScore(iOp) < targets[j].WitnessScore(jOp) }) for _, solver.Target = range targets { @@ -370,7 +357,7 @@ func (b *balanceWitnessScheduler) createOperator(solver *solver, collector *plan } solver.Step++ defer func() { solver.Step-- }() - op, err := operator.CreateMoveWitnessOperator(BalanceWitnessType, solver, solver.Region, solver.SourceStoreID(), solver.TargetStoreID()) + op, err := operator.CreateMoveWitnessOperator(BalanceWitnessType, solver, solver.Region, solver.sourceStoreID(), solver.targetStoreID()) if err != nil { log.Debug("fail to create balance witness operator", errs.ZapError(err)) return nil @@ -379,9 +366,9 @@ func (b *balanceWitnessScheduler) createOperator(solver *solver, collector *plan schedulerCounter.WithLabelValues(b.GetName(), "new-operator"), ) op.FinishedCounters = append(op.FinishedCounters, - balanceDirectionCounter.WithLabelValues(b.GetName(), solver.SourceMetricLabel(), solver.TargetMetricLabel()), - b.counter.WithLabelValues("move-witness", solver.SourceMetricLabel()+"-out"), - b.counter.WithLabelValues("move-witness", solver.TargetMetricLabel()+"-in"), + balanceDirectionCounter.WithLabelValues(b.GetName(), solver.sourceMetricLabel(), solver.targetMetricLabel()), + b.counter.WithLabelValues("move-witness", solver.sourceMetricLabel()+"-out"), + b.counter.WithLabelValues("move-witness", solver.targetMetricLabel()+"-in"), ) op.SetAdditionalInfo("sourceScore", strconv.FormatFloat(solver.sourceScore, 'f', 2, 64)) op.SetAdditionalInfo("targetScore", strconv.FormatFloat(solver.targetScore, 'f', 2, 64)) diff --git a/pkg/schedule/schedulers/base_scheduler.go b/pkg/schedule/schedulers/base_scheduler.go index f3772757ad3..b3dae9856e6 100644 --- a/pkg/schedule/schedulers/base_scheduler.go +++ b/pkg/schedule/schedulers/base_scheduler.go @@ -23,6 +23,7 @@ import ( "github.com/tikv/pd/pkg/errs" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/operator" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/utils/typeutil" ) @@ -61,11 +62,14 @@ func intervalGrow(x time.Duration, maxInterval time.Duration, typ intervalGrowth // BaseScheduler is a basic scheduler for all other complex scheduler type BaseScheduler struct { OpController *operator.Controller + + name string + tp types.CheckerSchedulerType } // NewBaseScheduler returns a basic scheduler -func NewBaseScheduler(opController *operator.Controller) *BaseScheduler { - return &BaseScheduler{OpController: opController} +func NewBaseScheduler(opController *operator.Controller, tp types.CheckerSchedulerType) *BaseScheduler { + return &BaseScheduler{OpController: opController, tp: tp} } func (*BaseScheduler) ServeHTTP(w http.ResponseWriter, _ *http.Request) { @@ -97,3 +101,16 @@ func (*BaseScheduler) PrepareConfig(sche.SchedulerCluster) error { return nil } // CleanConfig does some cleanup work about config. func (*BaseScheduler) CleanConfig(sche.SchedulerCluster) {} + +// GetName returns the name of the scheduler +func (s *BaseScheduler) GetName() string { + if len(s.name) == 0 { + return s.tp.String() + } + return s.name +} + +// GetType returns the type of the scheduler +func (s *BaseScheduler) GetType() types.CheckerSchedulerType { + return s.tp +} diff --git a/pkg/schedule/schedulers/evict_leader.go b/pkg/schedule/schedulers/evict_leader.go index 2adcfbe7e48..7e5c4706043 100644 --- a/pkg/schedule/schedulers/evict_leader.go +++ b/pkg/schedule/schedulers/evict_leader.go @@ -29,6 +29,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/syncutil" @@ -87,7 +88,6 @@ func (conf *evictLeaderSchedulerConfig) Clone() *evictLeaderSchedulerConfig { } func (conf *evictLeaderSchedulerConfig) persistLocked() error { - name := conf.getSchedulerName() data, err := EncodeConfig(conf) failpoint.Inject("persistFail", func() { err = errors.New("fail to persist") @@ -95,11 +95,7 @@ func (conf *evictLeaderSchedulerConfig) persistLocked() error { if err != nil { return err } - return conf.storage.SaveSchedulerConfig(name, data) -} - -func (*evictLeaderSchedulerConfig) getSchedulerName() string { - return EvictLeaderName + return conf.storage.SaveSchedulerConfig(types.EvictLeaderScheduler.String(), data) } func (conf *evictLeaderSchedulerConfig) getRanges(id uint64) []string { @@ -256,10 +252,9 @@ type evictLeaderScheduler struct { // newEvictLeaderScheduler creates an admin scheduler that transfers all leaders // out of a store. func newEvictLeaderScheduler(opController *operator.Controller, conf *evictLeaderSchedulerConfig) Scheduler { - base := NewBaseScheduler(opController) handler := newEvictLeaderHandler(conf) return &evictLeaderScheduler{ - BaseScheduler: base, + BaseScheduler: NewBaseScheduler(opController, types.EvictLeaderScheduler), conf: conf, handler: handler, } @@ -270,45 +265,44 @@ func (s *evictLeaderScheduler) EvictStoreIDs() []uint64 { return s.conf.getStores() } +// ServeHTTP implements the http.Handler interface. func (s *evictLeaderScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.handler.ServeHTTP(w, r) } -func (*evictLeaderScheduler) GetName() string { - return EvictLeaderName -} - -func (*evictLeaderScheduler) GetType() string { - return EvictLeaderType -} - +// GetName implements the Scheduler interface. func (s *evictLeaderScheduler) EncodeConfig() ([]byte, error) { return s.conf.encodeConfig() } +// ReloadConfig reloads the config from the storage. func (s *evictLeaderScheduler) ReloadConfig() error { return s.conf.reloadConfig(s.GetName()) } +// PrepareConfig implements the Scheduler interface. func (s *evictLeaderScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { return s.conf.pauseLeaderTransfer(cluster) } +// CleanConfig implements the Scheduler interface. func (s *evictLeaderScheduler) CleanConfig(cluster sche.SchedulerCluster) { s.conf.resumeLeaderTransfer(cluster) } +// IsScheduleAllowed implements the Scheduler interface. func (s *evictLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return allowed } +// Schedule implements the Scheduler interface. func (s *evictLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { evictLeaderCounter.Inc() - return scheduleEvictLeaderBatch(s.GetName(), s.GetType(), cluster, s.conf), nil + return scheduleEvictLeaderBatch(s.GetName(), cluster, s.conf), nil } func uniqueAppendOperator(dst []*operator.Operator, src ...*operator.Operator) []*operator.Operator { @@ -332,11 +326,11 @@ type evictLeaderStoresConf interface { getBatch() int } -func scheduleEvictLeaderBatch(name, typ string, cluster sche.SchedulerCluster, conf evictLeaderStoresConf) []*operator.Operator { +func scheduleEvictLeaderBatch(name string, cluster sche.SchedulerCluster, conf evictLeaderStoresConf) []*operator.Operator { var ops []*operator.Operator batchSize := conf.getBatch() for i := 0; i < batchSize; i++ { - once := scheduleEvictLeaderOnce(name, typ, cluster, conf) + once := scheduleEvictLeaderOnce(name, cluster, conf) // no more regions if len(once) == 0 { break @@ -350,7 +344,7 @@ func scheduleEvictLeaderBatch(name, typ string, cluster sche.SchedulerCluster, c return ops } -func scheduleEvictLeaderOnce(name, typ string, cluster sche.SchedulerCluster, conf evictLeaderStoresConf) []*operator.Operator { +func scheduleEvictLeaderOnce(name string, cluster sche.SchedulerCluster, conf evictLeaderStoresConf) []*operator.Operator { stores := conf.getStores() ops := make([]*operator.Operator, 0, len(stores)) for _, storeID := range stores { @@ -395,7 +389,7 @@ func scheduleEvictLeaderOnce(name, typ string, cluster sche.SchedulerCluster, co for _, t := range targets { targetIDs = append(targetIDs, t.GetID()) } - op, err := operator.CreateTransferLeaderOperator(typ, cluster, region, target.GetID(), targetIDs, operator.OpLeader) + op, err := operator.CreateTransferLeaderOperator(name, cluster, region, target.GetID(), targetIDs, operator.OpLeader) if err != nil { log.Debug("fail to create evict leader operator", errs.ZapError(err)) continue @@ -412,7 +406,7 @@ type evictLeaderHandler struct { config *evictLeaderSchedulerConfig } -func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.Request) { var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return @@ -467,12 +461,12 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.rd.JSON(w, http.StatusOK, "The scheduler has been applied to the store.") } -func (handler *evictLeaderHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { +func (handler *evictLeaderHandler) listConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } -func (handler *evictLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.Request) { +func (handler *evictLeaderHandler) deleteConfig(w http.ResponseWriter, r *http.Request) { idStr := mux.Vars(r)["store_id"] id, err := strconv.ParseUint(idStr, 10, 64) if err != nil { @@ -499,8 +493,8 @@ func newEvictLeaderHandler(config *evictLeaderSchedulerConfig) http.Handler { rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) - router.HandleFunc("/delete/{store_id}", h.DeleteConfig).Methods(http.MethodDelete) + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) + router.HandleFunc("/delete/{store_id}", h.deleteConfig).Methods(http.MethodDelete) return router } diff --git a/pkg/schedule/schedulers/evict_slow_store.go b/pkg/schedule/schedulers/evict_slow_store.go index c9f10fa610f..bc0590531af 100644 --- a/pkg/schedule/schedulers/evict_slow_store.go +++ b/pkg/schedule/schedulers/evict_slow_store.go @@ -26,6 +26,7 @@ import ( sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/syncutil" @@ -63,7 +64,7 @@ func initEvictSlowStoreSchedulerConfig(storage endpoint.ConfigStorage) *evictSlo } } -func (conf *evictSlowStoreSchedulerConfig) Clone() *evictSlowStoreSchedulerConfig { +func (conf *evictSlowStoreSchedulerConfig) clone() *evictSlowStoreSchedulerConfig { conf.RLock() defer conf.RUnlock() return &evictSlowStoreSchedulerConfig{ @@ -72,7 +73,6 @@ func (conf *evictSlowStoreSchedulerConfig) Clone() *evictSlowStoreSchedulerConfi } func (conf *evictSlowStoreSchedulerConfig) persistLocked() error { - name := EvictSlowStoreName data, err := EncodeConfig(conf) failpoint.Inject("persistFail", func() { err = errors.New("fail to persist") @@ -80,7 +80,7 @@ func (conf *evictSlowStoreSchedulerConfig) persistLocked() error { if err != nil { return err } - return conf.storage.SaveSchedulerConfig(name, data) + return conf.storage.SaveSchedulerConfig(types.EvictSlowStoreScheduler.String(), data) } func (conf *evictSlowStoreSchedulerConfig) getStores() []uint64 { @@ -149,12 +149,12 @@ func newEvictSlowStoreHandler(config *evictSlowStoreSchedulerConfig) http.Handle rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) return router } -func (handler *evictSlowStoreHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *evictSlowStoreHandler) updateConfig(w http.ResponseWriter, r *http.Request) { var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return @@ -178,8 +178,8 @@ func (handler *evictSlowStoreHandler) UpdateConfig(w http.ResponseWriter, r *htt handler.rd.JSON(w, http.StatusOK, "Config updated.") } -func (handler *evictSlowStoreHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { - conf := handler.config.Clone() +func (handler *evictSlowStoreHandler) listConfig(w http.ResponseWriter, _ *http.Request) { + conf := handler.config.clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -189,22 +189,17 @@ type evictSlowStoreScheduler struct { handler http.Handler } +// ServeHTTP implements the http.Handler interface. func (s *evictSlowStoreScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.handler.ServeHTTP(w, r) } -func (*evictSlowStoreScheduler) GetName() string { - return EvictSlowStoreName -} - -func (*evictSlowStoreScheduler) GetType() string { - return EvictSlowStoreType -} - +// EncodeConfig implements the Scheduler interface. func (s *evictSlowStoreScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// ReloadConfig implements the Scheduler interface. func (s *evictSlowStoreScheduler) ReloadConfig() error { s.conf.Lock() defer s.conf.Unlock() @@ -233,6 +228,7 @@ func (s *evictSlowStoreScheduler) ReloadConfig() error { return nil } +// PrepareConfig implements the Scheduler interface. func (s *evictSlowStoreScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { evictStore := s.conf.evictStore() if evictStore != 0 { @@ -241,6 +237,7 @@ func (s *evictSlowStoreScheduler) PrepareConfig(cluster sche.SchedulerCluster) e return nil } +// CleanConfig implements the Scheduler interface. func (s *evictSlowStoreScheduler) CleanConfig(cluster sche.SchedulerCluster) { s.cleanupEvictLeader(cluster) } @@ -267,20 +264,22 @@ func (s *evictSlowStoreScheduler) cleanupEvictLeader(cluster sche.SchedulerClust } func (s *evictSlowStoreScheduler) schedulerEvictLeader(cluster sche.SchedulerCluster) []*operator.Operator { - return scheduleEvictLeaderBatch(s.GetName(), s.GetType(), cluster, s.conf) + return scheduleEvictLeaderBatch(s.GetName(), cluster, s.conf) } +// IsScheduleAllowed implements the Scheduler interface. func (s *evictSlowStoreScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { if s.conf.evictStore() != 0 { allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return allowed } return true } +// Schedule implements the Scheduler interface. func (s *evictSlowStoreScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { evictSlowStoreCounter.Inc() @@ -336,7 +335,7 @@ func (s *evictSlowStoreScheduler) Schedule(cluster sche.SchedulerCluster, _ bool func newEvictSlowStoreScheduler(opController *operator.Controller, conf *evictSlowStoreSchedulerConfig) Scheduler { handler := newEvictSlowStoreHandler(conf) return &evictSlowStoreScheduler{ - BaseScheduler: NewBaseScheduler(opController), + BaseScheduler: NewBaseScheduler(opController, types.EvictSlowStoreScheduler), conf: conf, handler: handler, } diff --git a/pkg/schedule/schedulers/evict_slow_store_test.go b/pkg/schedule/schedulers/evict_slow_store_test.go index 6ed9764ba7c..440ab85d08e 100644 --- a/pkg/schedule/schedulers/evict_slow_store_test.go +++ b/pkg/schedule/schedulers/evict_slow_store_test.go @@ -25,6 +25,7 @@ import ( "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/mock/mockcluster" "github.com/tikv/pd/pkg/schedule/operator" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/operatorutil" ) @@ -79,7 +80,7 @@ func (suite *evictSlowStoreTestSuite) TestEvictSlowStore() { // Add evict leader scheduler to store 1 ops, _ := suite.es.Schedule(suite.tc, false) operatorutil.CheckMultiTargetTransferLeader(re, ops[0], operator.OpLeader, 1, []uint64{2}) - re.Equal(EvictSlowStoreType, ops[0].Desc()) + re.Equal(types.EvictSlowStoreScheduler.String(), ops[0].Desc()) // Cannot balance leaders to store 1 ops, _ = suite.bs.Schedule(suite.tc, false) re.Empty(ops) diff --git a/pkg/schedule/schedulers/evict_slow_trend.go b/pkg/schedule/schedulers/evict_slow_trend.go index dc2266b5540..5fa799c45b5 100644 --- a/pkg/schedule/schedulers/evict_slow_trend.go +++ b/pkg/schedule/schedulers/evict_slow_trend.go @@ -27,6 +27,7 @@ import ( sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/syncutil" @@ -86,7 +87,6 @@ func (conf *evictSlowTrendSchedulerConfig) Clone() *evictSlowTrendSchedulerConfi } func (conf *evictSlowTrendSchedulerConfig) persistLocked() error { - name := EvictSlowTrendName data, err := EncodeConfig(conf) failpoint.Inject("persistFail", func() { err = errors.New("fail to persist") @@ -94,7 +94,7 @@ func (conf *evictSlowTrendSchedulerConfig) persistLocked() error { if err != nil { return err } - return conf.storage.SaveSchedulerConfig(name, data) + return conf.storage.SaveSchedulerConfig(types.EvictSlowTrendScheduler.String(), data) } func (conf *evictSlowTrendSchedulerConfig) getStores() []uint64 { @@ -238,12 +238,12 @@ func newEvictSlowTrendHandler(config *evictSlowTrendSchedulerConfig) http.Handle rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) return router } -func (handler *evictSlowTrendHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *evictSlowTrendHandler) updateConfig(w http.ResponseWriter, r *http.Request) { var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return @@ -267,7 +267,7 @@ func (handler *evictSlowTrendHandler) UpdateConfig(w http.ResponseWriter, r *htt handler.rd.JSON(w, http.StatusOK, "Config updated.") } -func (handler *evictSlowTrendHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { +func (handler *evictSlowTrendHandler) listConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -291,22 +291,17 @@ func (s *evictSlowTrendScheduler) GetNextInterval(time.Duration) time.Duration { return intervalGrow(s.GetMinInterval(), MaxScheduleInterval, growthType) } +// ServeHTTP implements the http.Handler interface. func (s *evictSlowTrendScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.handler.ServeHTTP(w, r) } -func (*evictSlowTrendScheduler) GetName() string { - return EvictSlowTrendName -} - -func (*evictSlowTrendScheduler) GetType() string { - return EvictSlowTrendType -} - +// EncodeConfig implements the Scheduler interface. func (s *evictSlowTrendScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// ReloadConfig implements the Scheduler interface. func (s *evictSlowTrendScheduler) ReloadConfig() error { s.conf.Lock() defer s.conf.Unlock() @@ -335,6 +330,7 @@ func (s *evictSlowTrendScheduler) ReloadConfig() error { return nil } +// PrepareConfig implements the Scheduler interface. func (s *evictSlowTrendScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { evictedStoreID := s.conf.evictedStore() if evictedStoreID == 0 { @@ -343,6 +339,7 @@ func (s *evictSlowTrendScheduler) PrepareConfig(cluster sche.SchedulerCluster) e return cluster.SlowTrendEvicted(evictedStoreID) } +// CleanConfig implements the Scheduler interface. func (s *evictSlowTrendScheduler) CleanConfig(cluster sche.SchedulerCluster) { s.cleanupEvictLeader(cluster) } @@ -374,20 +371,22 @@ func (s *evictSlowTrendScheduler) scheduleEvictLeader(cluster sche.SchedulerClus return nil } storeSlowTrendEvictedStatusGauge.WithLabelValues(store.GetAddress(), strconv.FormatUint(store.GetID(), 10)).Set(1) - return scheduleEvictLeaderBatch(s.GetName(), s.GetType(), cluster, s.conf) + return scheduleEvictLeaderBatch(s.GetName(), cluster, s.conf) } +// IsScheduleAllowed implements the Scheduler interface. func (s *evictSlowTrendScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { if s.conf.evictedStore() == 0 { return true } allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return allowed } +// Schedule implements the Scheduler interface. func (s *evictSlowTrendScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { schedulerCounter.WithLabelValues(s.GetName(), "schedule").Inc() @@ -459,7 +458,7 @@ func (s *evictSlowTrendScheduler) Schedule(cluster sche.SchedulerCluster, _ bool func newEvictSlowTrendScheduler(opController *operator.Controller, conf *evictSlowTrendSchedulerConfig) Scheduler { handler := newEvictSlowTrendHandler(conf) return &evictSlowTrendScheduler{ - BaseScheduler: NewBaseScheduler(opController), + BaseScheduler: NewBaseScheduler(opController, types.EvictSlowTrendScheduler), conf: conf, handler: handler, } diff --git a/pkg/schedule/schedulers/evict_slow_trend_test.go b/pkg/schedule/schedulers/evict_slow_trend_test.go index dd6807f4a85..c01ae4959ba 100644 --- a/pkg/schedule/schedulers/evict_slow_trend_test.go +++ b/pkg/schedule/schedulers/evict_slow_trend_test.go @@ -27,6 +27,7 @@ import ( "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/mock/mockcluster" "github.com/tikv/pd/pkg/schedule/operator" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/operatorutil" ) @@ -155,7 +156,7 @@ func (suite *evictSlowTrendTestSuite) TestEvictSlowTrend() { } ops, _ = suite.es.Schedule(suite.tc, false) operatorutil.CheckMultiTargetTransferLeader(re, ops[0], operator.OpLeader, 1, []uint64{2, 3}) - re.Equal(EvictSlowTrendType, ops[0].Desc()) + re.Equal(types.EvictSlowTrendScheduler.String(), ops[0].Desc()) re.Zero(es2.conf.candidate()) re.Equal(uint64(1), es2.conf.evictedStore()) // Cannot balance leaders to store 1 diff --git a/pkg/schedule/schedulers/grant_hot_region.go b/pkg/schedule/schedulers/grant_hot_region.go index a19a4e1bf4b..1e45096a881 100644 --- a/pkg/schedule/schedulers/grant_hot_region.go +++ b/pkg/schedule/schedulers/grant_hot_region.go @@ -30,6 +30,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/statistics/utils" @@ -68,19 +69,19 @@ func (conf *grantHotRegionSchedulerConfig) setStore(leaderID uint64, peers []uin return ret } -func (conf *grantHotRegionSchedulerConfig) GetStoreLeaderID() uint64 { +func (conf *grantHotRegionSchedulerConfig) getStoreLeaderID() uint64 { conf.RLock() defer conf.RUnlock() return conf.StoreLeaderID } -func (conf *grantHotRegionSchedulerConfig) SetStoreLeaderID(id uint64) { +func (conf *grantHotRegionSchedulerConfig) setStoreLeaderID(id uint64) { conf.Lock() defer conf.Unlock() conf.StoreLeaderID = id } -func (conf *grantHotRegionSchedulerConfig) Clone() *grantHotRegionSchedulerConfig { +func (conf *grantHotRegionSchedulerConfig) clone() *grantHotRegionSchedulerConfig { conf.RLock() defer conf.RUnlock() newStoreIDs := make([]uint64, len(conf.StoreIDs)) @@ -92,18 +93,13 @@ func (conf *grantHotRegionSchedulerConfig) Clone() *grantHotRegionSchedulerConfi } func (conf *grantHotRegionSchedulerConfig) Persist() error { - name := conf.getSchedulerName() conf.RLock() defer conf.RUnlock() data, err := EncodeConfig(conf) if err != nil { return err } - return conf.storage.SaveSchedulerConfig(name, data) -} - -func (*grantHotRegionSchedulerConfig) getSchedulerName() string { - return GrantHotRegionName + return conf.storage.SaveSchedulerConfig(types.GrantHotRegionScheduler.String(), data) } func (conf *grantHotRegionSchedulerConfig) has(storeID uint64) bool { @@ -133,6 +129,7 @@ type grantHotRegionScheduler struct { func newGrantHotRegionScheduler(opController *operator.Controller, conf *grantHotRegionSchedulerConfig) *grantHotRegionScheduler { base := newBaseHotScheduler(opController, statistics.DefaultHistorySampleDuration, statistics.DefaultHistorySampleInterval) + base.tp = types.GrantHotRegionScheduler handler := newGrantHotRegionHandler(conf) ret := &grantHotRegionScheduler{ baseHotScheduler: base, @@ -142,18 +139,12 @@ func newGrantHotRegionScheduler(opController *operator.Controller, conf *grantHo return ret } -func (*grantHotRegionScheduler) GetName() string { - return GrantHotRegionName -} - -func (*grantHotRegionScheduler) GetType() string { - return GrantHotRegionType -} - +// EncodeConfig implements the Scheduler interface. func (s *grantHotRegionScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// ReloadConfig implements the Scheduler interface. func (s *grantHotRegionScheduler) ReloadConfig() error { s.conf.Lock() defer s.conf.Unlock() @@ -180,10 +171,10 @@ func (s *grantHotRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerCluste regionAllowed := s.OpController.OperatorCount(operator.OpRegion) < conf.GetRegionScheduleLimit() leaderAllowed := s.OpController.OperatorCount(operator.OpLeader) < conf.GetLeaderScheduleLimit() if !regionAllowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpRegion.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpRegion) } if !leaderAllowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return regionAllowed && leaderAllowed } @@ -197,7 +188,7 @@ type grantHotRegionHandler struct { config *grantHotRegionSchedulerConfig } -func (handler *grantHotRegionHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *grantHotRegionHandler) updateConfig(w http.ResponseWriter, r *http.Request) { var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return @@ -227,15 +218,15 @@ func (handler *grantHotRegionHandler) UpdateConfig(w http.ResponseWriter, r *htt } if err = handler.config.Persist(); err != nil { - handler.config.SetStoreLeaderID(0) + handler.config.setStoreLeaderID(0) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } handler.rd.JSON(w, http.StatusOK, nil) } -func (handler *grantHotRegionHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { - conf := handler.config.Clone() +func (handler *grantHotRegionHandler) listConfig(w http.ResponseWriter, _ *http.Request) { + conf := handler.config.clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -245,8 +236,8 @@ func newGrantHotRegionHandler(config *grantHotRegionSchedulerConfig) http.Handle rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) return router } @@ -280,7 +271,7 @@ func (s *grantHotRegionScheduler) randomSchedule(cluster sche.SchedulerCluster, continue } } else { - if !s.conf.has(srcStoreID) || srcStoreID == s.conf.GetStoreLeaderID() { + if !s.conf.has(srcStoreID) || srcStoreID == s.conf.getStoreLeaderID() { continue } } @@ -321,7 +312,7 @@ func (s *grantHotRegionScheduler) transfer(cluster sche.SchedulerCluster, region var candidate []uint64 if isLeader { filters = append(filters, &filter.StoreStateFilter{ActionScope: s.GetName(), TransferLeader: true, OperatorLevel: constant.High}) - candidate = []uint64{s.conf.GetStoreLeaderID()} + candidate = []uint64{s.conf.getStoreLeaderID()} } else { filters = append(filters, &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true, OperatorLevel: constant.High}, filter.NewExcludedFilter(s.GetName(), srcRegion.GetStoreIDs(), srcRegion.GetStoreIDs())) diff --git a/pkg/schedule/schedulers/grant_leader.go b/pkg/schedule/schedulers/grant_leader.go index 21900fac85d..1cf194c5f49 100644 --- a/pkg/schedule/schedulers/grant_leader.go +++ b/pkg/schedule/schedulers/grant_leader.go @@ -28,6 +28,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/syncutil" @@ -69,7 +70,7 @@ func (conf *grantLeaderSchedulerConfig) BuildWithArgs(args []string) error { return nil } -func (conf *grantLeaderSchedulerConfig) Clone() *grantLeaderSchedulerConfig { +func (conf *grantLeaderSchedulerConfig) clone() *grantLeaderSchedulerConfig { conf.RLock() defer conf.RUnlock() newStoreIDWithRanges := make(map[uint64][]core.KeyRange) @@ -81,19 +82,14 @@ func (conf *grantLeaderSchedulerConfig) Clone() *grantLeaderSchedulerConfig { } } -func (conf *grantLeaderSchedulerConfig) Persist() error { - name := conf.getSchedulerName() +func (conf *grantLeaderSchedulerConfig) persist() error { conf.RLock() defer conf.RUnlock() data, err := EncodeConfig(conf) if err != nil { return err } - return conf.storage.SaveSchedulerConfig(name, data) -} - -func (*grantLeaderSchedulerConfig) getSchedulerName() string { - return GrantLeaderName + return conf.storage.SaveSchedulerConfig(types.GrantLeaderScheduler.String(), data) } func (conf *grantLeaderSchedulerConfig) getRanges(id uint64) []string { @@ -159,7 +155,7 @@ type grantLeaderScheduler struct { // newGrantLeaderScheduler creates an admin scheduler that transfers all leaders // to a store. func newGrantLeaderScheduler(opController *operator.Controller, conf *grantLeaderSchedulerConfig) Scheduler { - base := NewBaseScheduler(opController) + base := NewBaseScheduler(opController, types.GrantLeaderScheduler) handler := newGrantLeaderHandler(conf) return &grantLeaderScheduler{ BaseScheduler: base, @@ -168,22 +164,17 @@ func newGrantLeaderScheduler(opController *operator.Controller, conf *grantLeade } } +// ServeHTTP implements the http.Handler interface. func (s *grantLeaderScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.handler.ServeHTTP(w, r) } -func (*grantLeaderScheduler) GetName() string { - return GrantLeaderName -} - -func (*grantLeaderScheduler) GetType() string { - return GrantLeaderType -} - +// EncodeConfig implements the Scheduler interface. func (s *grantLeaderScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// ReloadConfig implements the Scheduler interface. func (s *grantLeaderScheduler) ReloadConfig() error { s.conf.Lock() defer s.conf.Unlock() @@ -203,6 +194,7 @@ func (s *grantLeaderScheduler) ReloadConfig() error { return nil } +// PrepareConfig implements the Scheduler interface. func (s *grantLeaderScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { s.conf.RLock() defer s.conf.RUnlock() @@ -215,6 +207,7 @@ func (s *grantLeaderScheduler) PrepareConfig(cluster sche.SchedulerCluster) erro return res } +// CleanConfig implements the Scheduler interface. func (s *grantLeaderScheduler) CleanConfig(cluster sche.SchedulerCluster) { s.conf.RLock() defer s.conf.RUnlock() @@ -223,14 +216,16 @@ func (s *grantLeaderScheduler) CleanConfig(cluster sche.SchedulerCluster) { } } +// IsScheduleAllowed implements the Scheduler interface. func (s *grantLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return allowed } +// Schedule implements the Scheduler interface. func (s *grantLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { grantLeaderCounter.Inc() storeIDWithRanges := s.conf.getStoreIDWithRanges() @@ -262,7 +257,7 @@ type grantLeaderHandler struct { config *grantLeaderSchedulerConfig } -func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *grantLeaderHandler) updateConfig(w http.ResponseWriter, r *http.Request) { var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return @@ -297,7 +292,7 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - err = handler.config.Persist() + err = handler.config.persist() if err != nil { handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) @@ -306,12 +301,12 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.rd.JSON(w, http.StatusOK, "The scheduler has been applied to the store.") } -func (handler *grantLeaderHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { - conf := handler.config.Clone() +func (handler *grantLeaderHandler) listConfig(w http.ResponseWriter, _ *http.Request) { + conf := handler.config.clone() handler.rd.JSON(w, http.StatusOK, conf) } -func (handler *grantLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.Request) { +func (handler *grantLeaderHandler) deleteConfig(w http.ResponseWriter, r *http.Request) { idStr := mux.Vars(r)["store_id"] id, err := strconv.ParseUint(idStr, 10, 64) if err != nil { @@ -323,7 +318,7 @@ func (handler *grantLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.R keyRanges := handler.config.getKeyRangesByID(id) succ, last := handler.config.removeStore(id) if succ { - err = handler.config.Persist() + err = handler.config.persist() if err != nil { handler.config.resetStore(id, keyRanges) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) @@ -354,8 +349,8 @@ func newGrantLeaderHandler(config *grantLeaderSchedulerConfig) http.Handler { rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) - router.HandleFunc("/delete/{store_id}", h.DeleteConfig).Methods(http.MethodDelete) + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) + router.HandleFunc("/delete/{store_id}", h.deleteConfig).Methods(http.MethodDelete) return router } diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index f79d8fac760..ff837e67ad2 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -34,6 +34,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/statistics/buckets" @@ -92,7 +93,7 @@ type baseHotScheduler struct { } func newBaseHotScheduler(opController *operator.Controller, sampleDuration time.Duration, sampleInterval time.Duration) *baseHotScheduler { - base := NewBaseScheduler(opController) + base := NewBaseScheduler(opController, types.BalanceHotRegionScheduler) ret := &baseHotScheduler{ BaseScheduler: base, regionPendings: make(map[uint64]*pendingInfluence), @@ -202,7 +203,7 @@ type hotScheduler struct { func newHotScheduler(opController *operator.Controller, conf *hotRegionSchedulerConfig) *hotScheduler { base := newBaseHotScheduler(opController, - conf.GetHistorySampleDuration(), conf.GetHistorySampleInterval()) + conf.getHistorySampleDuration(), conf.getHistorySampleInterval()) ret := &hotScheduler{ name: HotRegionName, baseHotScheduler: base, @@ -214,18 +215,12 @@ func newHotScheduler(opController *operator.Controller, conf *hotRegionScheduler return ret } -func (h *hotScheduler) GetName() string { - return h.name -} - -func (*hotScheduler) GetType() string { - return HotRegionType -} - +// EncodeConfig implements the Scheduler interface. func (h *hotScheduler) EncodeConfig() ([]byte, error) { - return h.conf.EncodeConfig() + return h.conf.encodeConfig() } +// ReloadConfig impl func (h *hotScheduler) ReloadConfig() error { h.conf.Lock() defer h.conf.Unlock() @@ -266,26 +261,31 @@ func (h *hotScheduler) ReloadConfig() error { return nil } +// ServeHTTP implements the http.Handler interface. func (h *hotScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { h.conf.ServeHTTP(w, r) } +// GetMinInterval implements the Scheduler interface. func (*hotScheduler) GetMinInterval() time.Duration { return minHotScheduleInterval } +// GetNextInterval implements the Scheduler interface. func (h *hotScheduler) GetNextInterval(time.Duration) time.Duration { return intervalGrow(h.GetMinInterval(), maxHotScheduleInterval, exponentialGrowth) } +// IsScheduleAllowed implements the Scheduler interface. func (h *hotScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := h.OpController.OperatorCount(operator.OpHotRegion) < cluster.GetSchedulerConfig().GetHotRegionScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(h.GetType(), operator.OpHotRegion.String()).Inc() + operator.IncOperatorLimitCounter(h.GetType(), operator.OpHotRegion) } return allowed } +// Schedule implements the Scheduler interface. func (h *hotScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { hotSchedulerCounter.Inc() typ := h.randomType() @@ -295,22 +295,22 @@ func (h *hotScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*opera func (h *hotScheduler) dispatch(typ resourceType, cluster sche.SchedulerCluster) []*operator.Operator { h.Lock() defer h.Unlock() - h.updateHistoryLoadConfig(h.conf.GetHistorySampleDuration(), h.conf.GetHistorySampleInterval()) + h.updateHistoryLoadConfig(h.conf.getHistorySampleDuration(), h.conf.getHistorySampleInterval()) h.prepareForBalance(typ, cluster) - // IsForbidRWType can not be move earlier to support to use api and metrics. + // isForbidRWType can not be move earlier to support to use api and metrics. switch typ { case readLeader, readPeer: - if h.conf.IsForbidRWType(utils.Read) { + if h.conf.isForbidRWType(utils.Read) { return nil } return h.balanceHotReadRegions(cluster) case writePeer: - if h.conf.IsForbidRWType(utils.Write) { + if h.conf.isForbidRWType(utils.Write) { return nil } return h.balanceHotWritePeers(cluster) case writeLeader: - if h.conf.IsForbidRWType(utils.Write) { + if h.conf.isForbidRWType(utils.Write) { return nil } return h.balanceHotWriteLeaders(cluster) @@ -506,11 +506,11 @@ type balanceSolver struct { func (bs *balanceSolver) init() { // Load the configuration items of the scheduler. bs.resourceTy = toResourceType(bs.rwTy, bs.opTy) - bs.maxPeerNum = bs.sche.conf.GetMaxPeerNumber() + bs.maxPeerNum = bs.sche.conf.getMaxPeerNumber() bs.minHotDegree = bs.GetSchedulerConfig().GetHotRegionCacheHitsThreshold() bs.firstPriority, bs.secondPriority = prioritiesToDim(bs.getPriorities()) - bs.greatDecRatio, bs.minorDecRatio = bs.sche.conf.GetGreatDecRatio(), bs.sche.conf.GetMinorDecRatio() - switch bs.sche.conf.GetRankFormulaVersion() { + bs.greatDecRatio, bs.minorDecRatio = bs.sche.conf.getGreatDecRatio(), bs.sche.conf.getMinorDecRatio() + switch bs.sche.conf.getRankFormulaVersion() { case "v1": bs.rank = initRankV1(bs) default: @@ -541,16 +541,16 @@ func (bs *balanceSolver) init() { } rankStepRatios := []float64{ - utils.ByteDim: bs.sche.conf.GetByteRankStepRatio(), - utils.KeyDim: bs.sche.conf.GetKeyRankStepRatio(), - utils.QueryDim: bs.sche.conf.GetQueryRateRankStepRatio()} + utils.ByteDim: bs.sche.conf.getByteRankStepRatio(), + utils.KeyDim: bs.sche.conf.getKeyRankStepRatio(), + utils.QueryDim: bs.sche.conf.getQueryRateRankStepRatio()} stepLoads := make([]float64, utils.DimLen) for i := range stepLoads { stepLoads[i] = maxCur.Loads[i] * rankStepRatios[i] } bs.rankStep = &statistics.StoreLoad{ Loads: stepLoads, - Count: maxCur.Count * bs.sche.conf.GetCountRankStepRatio(), + Count: maxCur.Count * bs.sche.conf.getCountRankStepRatio(), } } @@ -564,11 +564,11 @@ func (bs *balanceSolver) getPriorities() []string { // For write, they are different switch bs.resourceTy { case readLeader, readPeer: - return adjustPrioritiesConfig(querySupport, bs.sche.conf.GetReadPriorities(), getReadPriorities) + return adjustPrioritiesConfig(querySupport, bs.sche.conf.getReadPriorities(), getReadPriorities) case writeLeader: - return adjustPrioritiesConfig(querySupport, bs.sche.conf.GetWriteLeaderPriorities(), getWriteLeaderPriorities) + return adjustPrioritiesConfig(querySupport, bs.sche.conf.getWriteLeaderPriorities(), getWriteLeaderPriorities) case writePeer: - return adjustPrioritiesConfig(querySupport, bs.sche.conf.GetWritePeerPriorities(), getWritePeerPriorities) + return adjustPrioritiesConfig(querySupport, bs.sche.conf.getWritePeerPriorities(), getWritePeerPriorities) } log.Error("illegal type or illegal operator while getting the priority", zap.String("type", bs.rwTy.String()), zap.String("operator", bs.opTy.String())) return []string{} @@ -770,16 +770,16 @@ func (bs *balanceSolver) calcMaxZombieDur() time.Duration { // We use store query info rather than total of hot write leader to guide hot write leader scheduler // when its first priority is `QueryDim`, because `Write-peer` does not have `QueryDim`. // The reason is the same with `tikvCollector.GetLoads`. - return bs.sche.conf.GetStoreStatZombieDuration() + return bs.sche.conf.getStoreStatZombieDuration() } - return bs.sche.conf.GetRegionsStatZombieDuration() + return bs.sche.conf.getRegionsStatZombieDuration() case writePeer: if bs.best.srcStore.IsTiFlash() { - return bs.sche.conf.GetRegionsStatZombieDuration() + return bs.sche.conf.getRegionsStatZombieDuration() } - return bs.sche.conf.GetStoreStatZombieDuration() + return bs.sche.conf.getStoreStatZombieDuration() default: - return bs.sche.conf.GetStoreStatZombieDuration() + return bs.sche.conf.getStoreStatZombieDuration() } } @@ -787,8 +787,8 @@ func (bs *balanceSolver) calcMaxZombieDur() time.Duration { // its expectation * ratio, the store would be selected as hot source store func (bs *balanceSolver) filterSrcStores() map[uint64]*statistics.StoreLoadDetail { ret := make(map[uint64]*statistics.StoreLoadDetail) - confSrcToleranceRatio := bs.sche.conf.GetSrcToleranceRatio() - confEnableForTiFlash := bs.sche.conf.GetEnableForTiFlash() + confSrcToleranceRatio := bs.sche.conf.getSrcToleranceRatio() + confEnableForTiFlash := bs.sche.conf.getEnableForTiFlash() for id, detail := range bs.stLoadDetail { srcToleranceRatio := confSrcToleranceRatio if detail.IsTiFlash() { @@ -1026,8 +1026,8 @@ func (bs *balanceSolver) filterDstStores() map[uint64]*statistics.StoreLoadDetai func (bs *balanceSolver) pickDstStores(filters []filter.Filter, candidates []*statistics.StoreLoadDetail) map[uint64]*statistics.StoreLoadDetail { ret := make(map[uint64]*statistics.StoreLoadDetail, len(candidates)) - confDstToleranceRatio := bs.sche.conf.GetDstToleranceRatio() - confEnableForTiFlash := bs.sche.conf.GetEnableForTiFlash() + confDstToleranceRatio := bs.sche.conf.getDstToleranceRatio() + confEnableForTiFlash := bs.sche.conf.getEnableForTiFlash() for _, detail := range candidates { store := detail.StoreInfo dstToleranceRatio := confDstToleranceRatio @@ -1120,7 +1120,7 @@ func (bs *balanceSolver) checkHistoryLoadsByPriorityAndToleranceFirstOnly(_ [][] } func (bs *balanceSolver) enableExpectation() bool { - return bs.sche.conf.GetDstToleranceRatio() > 0 && bs.sche.conf.GetSrcToleranceRatio() > 0 + return bs.sche.conf.getDstToleranceRatio() > 0 && bs.sche.conf.getSrcToleranceRatio() > 0 } func (bs *balanceSolver) isUniformFirstPriority(store *statistics.StoreLoadDetail) bool { @@ -1156,11 +1156,11 @@ func (bs *balanceSolver) isTolerance(dim int, reverse bool) bool { func (bs *balanceSolver) getMinRate(dim int) float64 { switch dim { case utils.KeyDim: - return bs.sche.conf.GetMinHotKeyRate() + return bs.sche.conf.getMinHotKeyRate() case utils.ByteDim: - return bs.sche.conf.GetMinHotByteRate() + return bs.sche.conf.getMinHotByteRate() case utils.QueryDim: - return bs.sche.conf.GetMinHotQueryRate() + return bs.sche.conf.getMinHotQueryRate() } return -1 } diff --git a/pkg/schedule/schedulers/hot_region_config.go b/pkg/schedule/schedulers/hot_region_config.go index 5f08d755f76..83121254cc0 100644 --- a/pkg/schedule/schedulers/hot_region_config.go +++ b/pkg/schedule/schedulers/hot_region_config.go @@ -157,181 +157,183 @@ type hotRegionSchedulerConfig struct { HistorySampleInterval typeutil.Duration `json:"history-sample-interval"` } -func (conf *hotRegionSchedulerConfig) EncodeConfig() ([]byte, error) { +func (conf *hotRegionSchedulerConfig) encodeConfig() ([]byte, error) { conf.RLock() defer conf.RUnlock() return EncodeConfig(conf) } -func (conf *hotRegionSchedulerConfig) GetStoreStatZombieDuration() time.Duration { +func (conf *hotRegionSchedulerConfig) getStoreStatZombieDuration() time.Duration { conf.RLock() defer conf.RUnlock() return time.Duration(conf.MaxZombieRounds*utils.StoreHeartBeatReportInterval) * time.Second } -func (conf *hotRegionSchedulerConfig) GetRegionsStatZombieDuration() time.Duration { +func (conf *hotRegionSchedulerConfig) getRegionsStatZombieDuration() time.Duration { conf.RLock() defer conf.RUnlock() return time.Duration(conf.MaxZombieRounds*utils.RegionHeartBeatReportInterval) * time.Second } -func (conf *hotRegionSchedulerConfig) GetMaxPeerNumber() int { +func (conf *hotRegionSchedulerConfig) getMaxPeerNumber() int { conf.RLock() defer conf.RUnlock() return conf.MaxPeerNum } -func (conf *hotRegionSchedulerConfig) GetSrcToleranceRatio() float64 { +func (conf *hotRegionSchedulerConfig) getSrcToleranceRatio() float64 { conf.RLock() defer conf.RUnlock() return conf.SrcToleranceRatio } -func (conf *hotRegionSchedulerConfig) SetSrcToleranceRatio(tol float64) { +func (conf *hotRegionSchedulerConfig) setSrcToleranceRatio(tol float64) { conf.Lock() defer conf.Unlock() conf.SrcToleranceRatio = tol } -func (conf *hotRegionSchedulerConfig) GetDstToleranceRatio() float64 { +func (conf *hotRegionSchedulerConfig) getDstToleranceRatio() float64 { conf.RLock() defer conf.RUnlock() return conf.DstToleranceRatio } -func (conf *hotRegionSchedulerConfig) SetDstToleranceRatio(tol float64) { +func (conf *hotRegionSchedulerConfig) setDstToleranceRatio(tol float64) { conf.Lock() defer conf.Unlock() conf.DstToleranceRatio = tol } -func (conf *hotRegionSchedulerConfig) GetByteRankStepRatio() float64 { +func (conf *hotRegionSchedulerConfig) getByteRankStepRatio() float64 { conf.RLock() defer conf.RUnlock() return conf.ByteRateRankStepRatio } -func (conf *hotRegionSchedulerConfig) GetKeyRankStepRatio() float64 { +func (conf *hotRegionSchedulerConfig) getKeyRankStepRatio() float64 { conf.RLock() defer conf.RUnlock() return conf.KeyRateRankStepRatio } -func (conf *hotRegionSchedulerConfig) GetQueryRateRankStepRatio() float64 { +func (conf *hotRegionSchedulerConfig) getQueryRateRankStepRatio() float64 { conf.RLock() defer conf.RUnlock() return conf.QueryRateRankStepRatio } -func (conf *hotRegionSchedulerConfig) GetCountRankStepRatio() float64 { +func (conf *hotRegionSchedulerConfig) getCountRankStepRatio() float64 { conf.RLock() defer conf.RUnlock() return conf.CountRankStepRatio } -func (conf *hotRegionSchedulerConfig) GetGreatDecRatio() float64 { +func (conf *hotRegionSchedulerConfig) getGreatDecRatio() float64 { conf.RLock() defer conf.RUnlock() return conf.GreatDecRatio } -func (conf *hotRegionSchedulerConfig) SetStrictPickingStore(v bool) { +func (conf *hotRegionSchedulerConfig) setStrictPickingStore(v bool) { conf.RLock() defer conf.RUnlock() conf.StrictPickingStore = v } -func (conf *hotRegionSchedulerConfig) GetMinorDecRatio() float64 { +func (conf *hotRegionSchedulerConfig) getMinorDecRatio() float64 { conf.RLock() defer conf.RUnlock() return conf.MinorDecRatio } -func (conf *hotRegionSchedulerConfig) GetMinHotKeyRate() float64 { +func (conf *hotRegionSchedulerConfig) getMinHotKeyRate() float64 { conf.RLock() defer conf.RUnlock() return conf.MinHotKeyRate } -func (conf *hotRegionSchedulerConfig) GetMinHotByteRate() float64 { +func (conf *hotRegionSchedulerConfig) getMinHotByteRate() float64 { conf.RLock() defer conf.RUnlock() return conf.MinHotByteRate } -func (conf *hotRegionSchedulerConfig) GetEnableForTiFlash() bool { +func (conf *hotRegionSchedulerConfig) getEnableForTiFlash() bool { conf.RLock() defer conf.RUnlock() return conf.EnableForTiFlash } -func (conf *hotRegionSchedulerConfig) SetEnableForTiFlash(enable bool) { +func (conf *hotRegionSchedulerConfig) setEnableForTiFlash(enable bool) { conf.Lock() defer conf.Unlock() conf.EnableForTiFlash = enable } -func (conf *hotRegionSchedulerConfig) GetMinHotQueryRate() float64 { +func (conf *hotRegionSchedulerConfig) getMinHotQueryRate() float64 { conf.RLock() defer conf.RUnlock() return conf.MinHotQueryRate } -func (conf *hotRegionSchedulerConfig) GetReadPriorities() []string { +func (conf *hotRegionSchedulerConfig) getReadPriorities() []string { conf.RLock() defer conf.RUnlock() return conf.ReadPriorities } -func (conf *hotRegionSchedulerConfig) GetWriteLeaderPriorities() []string { +func (conf *hotRegionSchedulerConfig) getWriteLeaderPriorities() []string { conf.RLock() defer conf.RUnlock() return conf.WriteLeaderPriorities } -func (conf *hotRegionSchedulerConfig) GetWritePeerPriorities() []string { +func (conf *hotRegionSchedulerConfig) getWritePeerPriorities() []string { conf.RLock() defer conf.RUnlock() return conf.WritePeerPriorities } -func (conf *hotRegionSchedulerConfig) IsStrictPickingStoreEnabled() bool { +func (conf *hotRegionSchedulerConfig) isStrictPickingStoreEnabled() bool { conf.RLock() defer conf.RUnlock() return conf.StrictPickingStore } -func (conf *hotRegionSchedulerConfig) SetRankFormulaVersion(v string) { +func (conf *hotRegionSchedulerConfig) setRankFormulaVersion(v string) { conf.Lock() defer conf.Unlock() conf.RankFormulaVersion = v } -func (conf *hotRegionSchedulerConfig) GetRankFormulaVersion() string { +func (conf *hotRegionSchedulerConfig) getRankFormulaVersion() string { conf.RLock() defer conf.RUnlock() return conf.getRankFormulaVersionLocked() } -func (conf *hotRegionSchedulerConfig) GetHistorySampleDuration() time.Duration { +func (conf *hotRegionSchedulerConfig) getHistorySampleDuration() time.Duration { conf.RLock() defer conf.RUnlock() return conf.HistorySampleDuration.Duration } -func (conf *hotRegionSchedulerConfig) GetHistorySampleInterval() time.Duration { +func (conf *hotRegionSchedulerConfig) getHistorySampleInterval() time.Duration { conf.RLock() defer conf.RUnlock() return conf.HistorySampleInterval.Duration } -func (conf *hotRegionSchedulerConfig) SetHistorySampleDuration(d time.Duration) { +// nolint: unused, unparam +func (conf *hotRegionSchedulerConfig) setHistorySampleDuration(d time.Duration) { conf.Lock() defer conf.Unlock() conf.HistorySampleDuration = typeutil.NewDuration(d) } -func (conf *hotRegionSchedulerConfig) SetHistorySampleInterval(d time.Duration) { +// nolint: unused +func (conf *hotRegionSchedulerConfig) setHistorySampleInterval(d time.Duration) { conf.Lock() defer conf.Unlock() conf.HistorySampleInterval = typeutil.NewDuration(d) @@ -346,7 +348,7 @@ func (conf *hotRegionSchedulerConfig) getRankFormulaVersionLocked() string { } } -func (conf *hotRegionSchedulerConfig) IsForbidRWType(rw utils.RWType) bool { +func (conf *hotRegionSchedulerConfig) isForbidRWType(rw utils.RWType) bool { conf.RLock() defer conf.RUnlock() return rw.String() == conf.ForbidRWType @@ -367,6 +369,7 @@ func (conf *hotRegionSchedulerConfig) getForbidRWTypeLocked() string { } } +// ServeHTTP implements the http.Handler interface. func (conf *hotRegionSchedulerConfig) ServeHTTP(w http.ResponseWriter, r *http.Request) { router := mux.NewRouter() router.HandleFunc("/list", conf.handleGetConfig).Methods(http.MethodGet) diff --git a/pkg/schedule/schedulers/hot_region_rank_v1.go b/pkg/schedule/schedulers/hot_region_rank_v1.go index ebf6e9bf744..9005dff8861 100644 --- a/pkg/schedule/schedulers/hot_region_rank_v1.go +++ b/pkg/schedule/schedulers/hot_region_rank_v1.go @@ -39,7 +39,7 @@ func (r *rankV1) checkByPriorityAndTolerance(loads []float64, f func(int) bool) switch { case r.resourceTy == writeLeader: return r.checkByPriorityAndToleranceFirstOnly(loads, f) - case r.sche.conf.IsStrictPickingStoreEnabled(): + case r.sche.conf.isStrictPickingStoreEnabled(): return r.checkByPriorityAndToleranceAllOf(loads, f) default: return r.checkByPriorityAndToleranceFirstOnly(loads, f) @@ -50,7 +50,7 @@ func (r *rankV1) checkHistoryLoadsByPriority(loads [][]float64, f func(int) bool switch { case r.resourceTy == writeLeader: return r.checkHistoryLoadsByPriorityAndToleranceFirstOnly(loads, f) - case r.sche.conf.IsStrictPickingStoreEnabled(): + case r.sche.conf.isStrictPickingStoreEnabled(): return r.checkHistoryLoadsByPriorityAndToleranceAllOf(loads, f) default: return r.checkHistoryLoadsByPriorityAndToleranceFirstOnly(loads, f) diff --git a/pkg/schedule/schedulers/hot_region_rank_v2_test.go b/pkg/schedule/schedulers/hot_region_rank_v2_test.go index 0237c2156ec..029d47c3c51 100644 --- a/pkg/schedule/schedulers/hot_region_rank_v2_test.go +++ b/pkg/schedule/schedulers/hot_region_rank_v2_test.go @@ -36,10 +36,10 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { re.NoError(err) hb := sche.(*hotScheduler) hb.types = []resourceType{writePeer} - hb.conf.SetDstToleranceRatio(0.0) - hb.conf.SetSrcToleranceRatio(0.0) - hb.conf.SetRankFormulaVersion("v1") - hb.conf.SetHistorySampleDuration(0) + hb.conf.setDstToleranceRatio(0.0) + hb.conf.setSrcToleranceRatio(0.0) + hb.conf.setRankFormulaVersion("v1") + hb.conf.setHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -62,7 +62,7 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { re.Empty(ops) re.False(hb.searchRevertRegions[writePeer]) - hb.conf.SetRankFormulaVersion("v2") + hb.conf.setRankFormulaVersion("v2") // searchRevertRegions becomes true after the first `Schedule`. ops, _ = hb.Schedule(tc, false) re.Empty(ops) @@ -97,10 +97,10 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirst(t *testing.T) { re.NoError(err) hb := sche.(*hotScheduler) hb.types = []resourceType{writePeer} - hb.conf.SetDstToleranceRatio(0.0) - hb.conf.SetSrcToleranceRatio(0.0) - hb.conf.SetRankFormulaVersion("v1") - hb.conf.SetHistorySampleDuration(0) + hb.conf.setDstToleranceRatio(0.0) + hb.conf.setSrcToleranceRatio(0.0) + hb.conf.setRankFormulaVersion("v1") + hb.conf.setHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -125,7 +125,7 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirst(t *testing.T) { re.False(hb.searchRevertRegions[writePeer]) clearPendingInfluence(hb) - hb.conf.SetRankFormulaVersion("v2") + hb.conf.setRankFormulaVersion("v2") // searchRevertRegions becomes true after the first `Schedule`. ops, _ = hb.Schedule(tc, false) re.Len(ops, 1) @@ -149,10 +149,10 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirstOnly(t *testing.T) { re.NoError(err) hb := sche.(*hotScheduler) hb.types = []resourceType{writePeer} - hb.conf.SetDstToleranceRatio(0.0) - hb.conf.SetSrcToleranceRatio(0.0) - hb.conf.SetRankFormulaVersion("v1") - hb.conf.SetHistorySampleDuration(0) + hb.conf.setDstToleranceRatio(0.0) + hb.conf.setSrcToleranceRatio(0.0) + hb.conf.setRankFormulaVersion("v1") + hb.conf.setHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -177,7 +177,7 @@ func TestHotWriteRegionScheduleWithRevertRegionsDimFirstOnly(t *testing.T) { re.False(hb.searchRevertRegions[writePeer]) clearPendingInfluence(hb) - hb.conf.SetRankFormulaVersion("v2") + hb.conf.setRankFormulaVersion("v2") // searchRevertRegions becomes true after the first `Schedule`. ops, _ = hb.Schedule(tc, false) re.Len(ops, 1) @@ -209,10 +209,10 @@ func TestHotReadRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { sche, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) re.NoError(err) hb := sche.(*hotScheduler) - hb.conf.SetDstToleranceRatio(0.0) - hb.conf.SetSrcToleranceRatio(0.0) - hb.conf.SetRankFormulaVersion("v1") - hb.conf.SetHistorySampleDuration(0) + hb.conf.setDstToleranceRatio(0.0) + hb.conf.setSrcToleranceRatio(0.0) + hb.conf.setRankFormulaVersion("v1") + hb.conf.setHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -235,7 +235,7 @@ func TestHotReadRegionScheduleWithRevertRegionsDimSecond(t *testing.T) { re.Empty(ops) re.False(hb.searchRevertRegions[readLeader]) - hb.conf.SetRankFormulaVersion("v2") + hb.conf.setRankFormulaVersion("v2") // searchRevertRegions becomes true after the first `Schedule`. ops, _ = hb.Schedule(tc, false) re.Empty(ops) @@ -267,11 +267,11 @@ func TestSkipUniformStore(t *testing.T) { defer cancel() hb, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) re.NoError(err) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) - hb.(*hotScheduler).conf.SetRankFormulaVersion("v2") + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) + hb.(*hotScheduler).conf.setRankFormulaVersion("v2") hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) @@ -422,9 +422,9 @@ func checkHotReadRegionScheduleWithSmallHotRegion(re *require.Assertions, highLo sche, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil, nil) re.NoError(err) hb := sche.(*hotScheduler) - hb.conf.SetSrcToleranceRatio(1) - hb.conf.SetDstToleranceRatio(1) - hb.conf.SetRankFormulaVersion("v2") + hb.conf.setSrcToleranceRatio(1) + hb.conf.setDstToleranceRatio(1) + hb.conf.setRankFormulaVersion("v2") hb.conf.ReadPriorities = []string{utils.QueryPriority, utils.BytePriority} tc.AddRegionStore(1, 40) tc.AddRegionStore(2, 10) diff --git a/pkg/schedule/schedulers/hot_region_test.go b/pkg/schedule/schedulers/hot_region_test.go index 3b563106dc0..fc7a6ae4417 100644 --- a/pkg/schedule/schedulers/hot_region_test.go +++ b/pkg/schedule/schedulers/hot_region_test.go @@ -84,36 +84,36 @@ func TestUpgrade(t *testing.T) { sche, err := CreateScheduler(HotRegionType, oc, storage.NewStorageWithMemoryBackend(), ConfigSliceDecoder(HotRegionType, nil)) re.NoError(err) hb := sche.(*hotScheduler) - re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.GetReadPriorities()) - re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.GetWriteLeaderPriorities()) - re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.GetWritePeerPriorities()) - re.Equal("v2", hb.conf.GetRankFormulaVersion()) + re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.getReadPriorities()) + re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.getWriteLeaderPriorities()) + re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.getWritePeerPriorities()) + re.Equal("v2", hb.conf.getRankFormulaVersion()) // upgrade from json(null) sche, err = CreateScheduler(HotRegionType, oc, storage.NewStorageWithMemoryBackend(), ConfigJSONDecoder([]byte("null"))) re.NoError(err) hb = sche.(*hotScheduler) - re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.GetReadPriorities()) - re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.GetWriteLeaderPriorities()) - re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.GetWritePeerPriorities()) - re.Equal("v2", hb.conf.GetRankFormulaVersion()) + re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.getReadPriorities()) + re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.getWriteLeaderPriorities()) + re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.getWritePeerPriorities()) + re.Equal("v2", hb.conf.getRankFormulaVersion()) // upgrade from < 5.2 config51 := `{"min-hot-byte-rate":100,"min-hot-key-rate":10,"min-hot-query-rate":10,"max-zombie-rounds":5,"max-peer-number":1000,"byte-rate-rank-step-ratio":0.05,"key-rate-rank-step-ratio":0.05,"query-rate-rank-step-ratio":0.05,"count-rank-step-ratio":0.01,"great-dec-ratio":0.95,"minor-dec-ratio":0.99,"src-tolerance-ratio":1.05,"dst-tolerance-ratio":1.05,"strict-picking-store":"true","enable-for-tiflash":"true"}` sche, err = CreateScheduler(HotRegionType, oc, storage.NewStorageWithMemoryBackend(), ConfigJSONDecoder([]byte(config51))) re.NoError(err) hb = sche.(*hotScheduler) - re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.GetReadPriorities()) - re.Equal([]string{utils.KeyPriority, utils.BytePriority}, hb.conf.GetWriteLeaderPriorities()) - re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.GetWritePeerPriorities()) - re.Equal("v1", hb.conf.GetRankFormulaVersion()) + re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.getReadPriorities()) + re.Equal([]string{utils.KeyPriority, utils.BytePriority}, hb.conf.getWriteLeaderPriorities()) + re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.getWritePeerPriorities()) + re.Equal("v1", hb.conf.getRankFormulaVersion()) // upgrade from < 6.4 config54 := `{"min-hot-byte-rate":100,"min-hot-key-rate":10,"min-hot-query-rate":10,"max-zombie-rounds":5,"max-peer-number":1000,"byte-rate-rank-step-ratio":0.05,"key-rate-rank-step-ratio":0.05,"query-rate-rank-step-ratio":0.05,"count-rank-step-ratio":0.01,"great-dec-ratio":0.95,"minor-dec-ratio":0.99,"src-tolerance-ratio":1.05,"dst-tolerance-ratio":1.05,"read-priorities":["query","byte"],"write-leader-priorities":["query","byte"],"write-peer-priorities":["byte","key"],"strict-picking-store":"true","enable-for-tiflash":"true","forbid-rw-type":"none"}` sche, err = CreateScheduler(HotRegionType, oc, storage.NewStorageWithMemoryBackend(), ConfigJSONDecoder([]byte(config54))) re.NoError(err) hb = sche.(*hotScheduler) - re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.GetReadPriorities()) - re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.GetWriteLeaderPriorities()) - re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.GetWritePeerPriorities()) - re.Equal("v1", hb.conf.GetRankFormulaVersion()) + re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.getReadPriorities()) + re.Equal([]string{utils.QueryPriority, utils.BytePriority}, hb.conf.getWriteLeaderPriorities()) + re.Equal([]string{utils.BytePriority, utils.KeyPriority}, hb.conf.getWritePeerPriorities()) + re.Equal("v1", hb.conf.getRankFormulaVersion()) } func TestGCPendingOpInfos(t *testing.T) { @@ -151,7 +151,7 @@ func checkGCPendingOpInfos(re *require.Assertions, enablePlacementRules bool) { op.Start() op.SetStatusReachTime(operator.CREATED, time.Now().Add(-5*utils.StoreHeartBeatReportInterval*time.Second)) op.SetStatusReachTime(operator.STARTED, time.Now().Add((-5*utils.StoreHeartBeatReportInterval+1)*time.Second)) - return newPendingInfluence(op, []uint64{2}, 4, statistics.Influence{}, hb.conf.GetStoreStatZombieDuration()) + return newPendingInfluence(op, []uint64{2}, 4, statistics.Influence{}, hb.conf.getStoreStatZombieDuration()) } justDoneOpInfluence := func(region *core.RegionInfo, ty opType) *pendingInfluence { infl := notDoneOpInfluence(region, ty) @@ -400,7 +400,7 @@ func checkHotWriteRegionPlacement(re *require.Assertions, enablePlacementRules b hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).types = []resourceType{writePeer} - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.AddLabelsStore(1, 2, map[string]string{"zone": "z1", "host": "h1"}) tc.AddLabelsStore(2, 2, map[string]string{"zone": "z1", "host": "h2"}) @@ -456,7 +456,7 @@ func checkHotWriteRegionScheduleByteRateOnly(re *require.Assertions, enablePlace hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.BytePriority, utils.KeyPriority} // Add stores 1, 2, 3, 4, 5, 6 with region counts 3, 2, 2, 2, 0, 0. @@ -652,7 +652,7 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { sche, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb := sche.(*hotScheduler) - hb.conf.SetHistorySampleDuration(0) + hb.conf.setHistorySampleDuration(0) // Add TiKV stores 1, 2, 3, 4, 5, 6, 7 (Down) with region counts 3, 3, 2, 2, 0, 0, 0. // Add TiFlash stores 8, 9, 10 with region counts 2, 1, 1. @@ -734,7 +734,7 @@ func TestHotWriteRegionScheduleByteRateOnlyWithTiFlash(t *testing.T) { } pdServerCfg.FlowRoundByDigit = 3 // Disable for TiFlash - hb.conf.SetEnableForTiFlash(false) + hb.conf.setEnableForTiFlash(false) for i := 0; i < 20; i++ { clearPendingInfluence(hb) ops, _ := hb.Schedule(tc, false) @@ -848,10 +848,10 @@ func TestHotWriteRegionScheduleWithQuery(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.QueryPriority, utils.BytePriority} - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -884,11 +884,11 @@ func TestHotWriteRegionScheduleWithKeyRate(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).types = []resourceType{writePeer} - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} hb.(*hotScheduler).conf.RankFormulaVersion = "v1" - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) @@ -941,8 +941,8 @@ func TestHotWriteRegionScheduleUnhealthyStore(t *testing.T) { defer cancel() hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) @@ -986,8 +986,8 @@ func TestHotWriteRegionScheduleCheckHot(t *testing.T) { defer cancel() hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) @@ -1019,7 +1019,7 @@ func TestHotWriteRegionScheduleWithLeader(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) hb.(*hotScheduler).types = []resourceType{writeLeader} hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) re.NoError(err) tc.AddRegionStore(1, 20) @@ -1085,7 +1085,7 @@ func checkHotWriteRegionScheduleWithPendingInfluence(re *require.Assertions, dim hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).conf.RankFormulaVersion = "v1" - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) @@ -1169,7 +1169,7 @@ func TestHotWriteRegionScheduleWithRuleEnabled(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) key, err := hex.DecodeString("") re.NoError(err) @@ -1250,7 +1250,7 @@ func TestHotReadRegionScheduleByteRateOnly(t *testing.T) { re.NoError(err) hb := scheduler.(*hotScheduler) hb.conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} - hb.conf.SetHistorySampleDuration(0) + hb.conf.setHistorySampleDuration(0) // Add stores 1, 2, 3, 4, 5 with region counts 3, 2, 2, 2, 0. tc.AddRegionStore(1, 3) @@ -1370,10 +1370,10 @@ func TestHotReadRegionScheduleWithQuery(t *testing.T) { defer cancel() hb, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) hb.(*hotScheduler).conf.RankFormulaVersion = "v1" - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -1404,10 +1404,10 @@ func TestHotReadRegionScheduleWithKeyRate(t *testing.T) { hb, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).conf.RankFormulaVersion = "v1" - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -1469,7 +1469,7 @@ func checkHotReadRegionScheduleWithPendingInfluence(re *require.Assertions, dim hb.(*hotScheduler).conf.MinorDecRatio = 1 hb.(*hotScheduler).conf.DstToleranceRatio = 1 hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) pendingAmpFactor = 0.0 tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) @@ -1575,9 +1575,9 @@ func TestHotReadWithEvictLeaderScheduler(t *testing.T) { defer cancel() hb, err := CreateScheduler(utils.Read.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) - hb.(*hotScheduler).conf.SetStrictPickingStore(false) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) + hb.(*hotScheduler).conf.setStrictPickingStore(false) hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) @@ -2042,9 +2042,9 @@ func TestInfluenceByRWType(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).types = []resourceType{writePeer} - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) @@ -2162,9 +2162,9 @@ func TestHotScheduleWithPriority(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).types = []resourceType{writePeer} - hb.(*hotScheduler).conf.SetDstToleranceRatio(1.05) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1.05) - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setDstToleranceRatio(1.05) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1.05) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) // skip stddev check stddevThreshold = -1.0 @@ -2207,7 +2207,7 @@ func TestHotScheduleWithPriority(t *testing.T) { addRegionInfo(tc, utils.Read, []testRegionInfo{ {1, []uint64{1, 2, 3}, 2 * units.MiB, 2 * units.MiB, 0}, }) - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) hb.(*hotScheduler).conf.ReadPriorities = []string{utils.BytePriority, utils.KeyPriority} ops, _ = hb.Schedule(tc, false) re.Len(ops, 1) @@ -2222,7 +2222,7 @@ func TestHotScheduleWithPriority(t *testing.T) { hb.(*hotScheduler).types = []resourceType{writePeer} hb.(*hotScheduler).conf.WriteLeaderPriorities = []string{utils.KeyPriority, utils.BytePriority} hb.(*hotScheduler).conf.RankFormulaVersion = "v1" - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) re.NoError(err) // assert loose store picking @@ -2264,8 +2264,8 @@ func TestHotScheduleWithStddev(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).types = []resourceType{writePeer} - hb.(*hotScheduler).conf.SetDstToleranceRatio(1.0) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1.0) + hb.(*hotScheduler).conf.setDstToleranceRatio(1.0) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1.0) hb.(*hotScheduler).conf.RankFormulaVersion = "v1" tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) @@ -2274,7 +2274,7 @@ func TestHotScheduleWithStddev(t *testing.T) { tc.AddRegionStore(4, 20) tc.AddRegionStore(5, 20) hb.(*hotScheduler).conf.StrictPickingStore = false - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) // skip uniform cluster tc.UpdateStorageWrittenStats(1, 5*units.MiB*utils.StoreHeartBeatReportInterval, 5*units.MiB*utils.StoreHeartBeatReportInterval) @@ -2323,9 +2323,9 @@ func TestHotWriteLeaderScheduleWithPriority(t *testing.T) { hb, err := CreateScheduler(utils.Write.String(), oc, storage.NewStorageWithMemoryBackend(), nil) re.NoError(err) hb.(*hotScheduler).types = []resourceType{writeLeader} - hb.(*hotScheduler).conf.SetDstToleranceRatio(1) - hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) - hb.(*hotScheduler).conf.SetHistorySampleDuration(0) + hb.(*hotScheduler).conf.setDstToleranceRatio(1) + hb.(*hotScheduler).conf.setSrcToleranceRatio(1) + hb.(*hotScheduler).conf.setHistorySampleDuration(0) tc.SetClusterVersion(versioninfo.MinSupportedVersion(versioninfo.Version4_0)) tc.AddRegionStore(1, 20) @@ -2533,17 +2533,17 @@ func TestConfigValidation(t *testing.T) { // rank-formula-version // default hc = initHotRegionScheduleConfig() - re.Equal("v2", hc.GetRankFormulaVersion()) + re.Equal("v2", hc.getRankFormulaVersion()) // v1 hc.RankFormulaVersion = "v1" err = hc.validateLocked() re.NoError(err) - re.Equal("v1", hc.GetRankFormulaVersion()) + re.Equal("v1", hc.getRankFormulaVersion()) // v2 hc.RankFormulaVersion = "v2" err = hc.validateLocked() re.NoError(err) - re.Equal("v2", hc.GetRankFormulaVersion()) + re.Equal("v2", hc.getRankFormulaVersion()) // illegal hc.RankFormulaVersion = "v0" err = hc.validateLocked() @@ -2552,20 +2552,20 @@ func TestConfigValidation(t *testing.T) { // forbid-rw-type // default hc = initHotRegionScheduleConfig() - re.False(hc.IsForbidRWType(utils.Read)) - re.False(hc.IsForbidRWType(utils.Write)) + re.False(hc.isForbidRWType(utils.Read)) + re.False(hc.isForbidRWType(utils.Write)) // read hc.ForbidRWType = "read" err = hc.validateLocked() re.NoError(err) - re.True(hc.IsForbidRWType(utils.Read)) - re.False(hc.IsForbidRWType(utils.Write)) + re.True(hc.isForbidRWType(utils.Read)) + re.False(hc.isForbidRWType(utils.Write)) // write hc.ForbidRWType = "write" err = hc.validateLocked() re.NoError(err) - re.False(hc.IsForbidRWType(utils.Read)) - re.True(hc.IsForbidRWType(utils.Write)) + re.False(hc.isForbidRWType(utils.Read)) + re.True(hc.isForbidRWType(utils.Write)) // illegal hc.ForbidRWType = "test" err = hc.validateLocked() diff --git a/pkg/schedule/schedulers/init.go b/pkg/schedule/schedulers/init.go index 777c8b3d625..5990aa2cda3 100644 --- a/pkg/schedule/schedulers/init.go +++ b/pkg/schedule/schedulers/init.go @@ -75,7 +75,6 @@ func schedulersRegister() { return err } conf.Ranges = ranges - conf.Name = BalanceRegionName return nil } }) @@ -226,7 +225,7 @@ func schedulersRegister() { // For clusters with the initial version >= v5.2, it will be overwritten by the default config. conf.applyPrioritiesConfig(compatiblePrioritiesConfig) // For clusters with the initial version >= v6.4, it will be overwritten by the default config. - conf.SetRankFormulaVersion("") + conf.setRankFormulaVersion("") if err := decoder(conf); err != nil { return nil, err } @@ -282,7 +281,6 @@ func schedulersRegister() { return err } conf.Ranges = ranges - conf.Name = LabelName return nil } }) @@ -307,7 +305,6 @@ func schedulersRegister() { return err } conf.Ranges = ranges - conf.Name = RandomMergeName return nil } }) @@ -370,7 +367,6 @@ func schedulersRegister() { } conf.Limit = limit } - conf.Name = ShuffleHotRegionName return nil } }) diff --git a/pkg/schedule/schedulers/label.go b/pkg/schedule/schedulers/label.go index 6b7a98f8d02..f57d82b3149 100644 --- a/pkg/schedule/schedulers/label.go +++ b/pkg/schedule/schedulers/label.go @@ -24,6 +24,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "go.uber.org/zap" ) @@ -35,7 +36,6 @@ const ( ) type labelSchedulerConfig struct { - Name string `json:"name"` Ranges []core.KeyRange `json:"ranges"` // TODO: When we prepare to use Ranges, we will need to implement the ReloadConfig function for this scheduler. } @@ -50,31 +50,26 @@ type labelScheduler struct { // the store with the specific label. func newLabelScheduler(opController *operator.Controller, conf *labelSchedulerConfig) Scheduler { return &labelScheduler{ - BaseScheduler: NewBaseScheduler(opController), + BaseScheduler: NewBaseScheduler(opController, types.LabelScheduler), conf: conf, } } -func (s *labelScheduler) GetName() string { - return s.conf.Name -} - -func (*labelScheduler) GetType() string { - return LabelType -} - +// EncodeConfig implements the Scheduler interface. func (s *labelScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// IsScheduleAllowed implements the Scheduler interface. func (s *labelScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return allowed } +// Schedule implements the Scheduler interface. func (s *labelScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { labelCounter.Inc() stores := cluster.GetStores() diff --git a/pkg/schedule/schedulers/metrics.go b/pkg/schedule/schedulers/metrics.go index f8bd2b4d686..42170e43818 100644 --- a/pkg/schedule/schedulers/metrics.go +++ b/pkg/schedule/schedulers/metrics.go @@ -186,7 +186,7 @@ func grantLeaderCounterWithEvent(event string) prometheus.Counter { } func hotRegionCounterWithEvent(event string) prometheus.Counter { - return schedulerCounter.WithLabelValues(types.HotRegionScheduler.String(), event) + return schedulerCounter.WithLabelValues(types.BalanceHotRegionScheduler.String(), event) } func labelCounterWithEvent(event string) prometheus.Counter { diff --git a/pkg/schedule/schedulers/random_merge.go b/pkg/schedule/schedulers/random_merge.go index ff96afe03eb..751ab1eaa9d 100644 --- a/pkg/schedule/schedulers/random_merge.go +++ b/pkg/schedule/schedulers/random_merge.go @@ -26,6 +26,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" ) const ( @@ -36,7 +37,6 @@ const ( ) type randomMergeSchedulerConfig struct { - Name string `json:"name"` Ranges []core.KeyRange `json:"ranges"` // TODO: When we prepare to use Ranges, we will need to implement the ReloadConfig function for this scheduler. } @@ -49,38 +49,33 @@ type randomMergeScheduler struct { // newRandomMergeScheduler creates an admin scheduler that randomly picks two adjacent regions // then merges them. func newRandomMergeScheduler(opController *operator.Controller, conf *randomMergeSchedulerConfig) Scheduler { - base := NewBaseScheduler(opController) + base := NewBaseScheduler(opController, types.RandomMergeScheduler) return &randomMergeScheduler{ BaseScheduler: base, conf: conf, } } -func (s *randomMergeScheduler) GetName() string { - return s.conf.Name -} - -func (*randomMergeScheduler) GetType() string { - return RandomMergeType -} - +// EncodeConfig implements the Scheduler interface. func (s *randomMergeScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// IsScheduleAllowed implements the Scheduler interface. func (s *randomMergeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpMerge) < cluster.GetSchedulerConfig().GetMergeScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpMerge.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpMerge) } return allowed } +// Schedule implements the Scheduler interface. func (s *randomMergeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { randomMergeCounter.Inc() store := filter.NewCandidates(cluster.GetStores()). - FilterSource(cluster.GetSchedulerConfig(), nil, nil, &filter.StoreStateFilter{ActionScope: s.conf.Name, MoveRegion: true, OperatorLevel: constant.Low}). + FilterSource(cluster.GetSchedulerConfig(), nil, nil, &filter.StoreStateFilter{ActionScope: s.GetName(), MoveRegion: true, OperatorLevel: constant.Low}). RandomPick() if store == nil { randomMergeNoSourceStoreCounter.Inc() diff --git a/pkg/schedule/schedulers/scatter_range.go b/pkg/schedule/schedulers/scatter_range.go index 17c67a154ab..76a47dd973b 100644 --- a/pkg/schedule/schedulers/scatter_range.go +++ b/pkg/schedule/schedulers/scatter_range.go @@ -25,6 +25,7 @@ import ( sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/syncutil" @@ -46,7 +47,7 @@ type scatterRangeSchedulerConfig struct { EndKey string `json:"end-key"` } -func (conf *scatterRangeSchedulerConfig) BuildWithArgs(args []string) error { +func (conf *scatterRangeSchedulerConfig) buildWithArgs(args []string) error { if len(args) != 3 { return errs.ErrSchedulerConfig.FastGenByArgs("ranges and name") } @@ -59,7 +60,7 @@ func (conf *scatterRangeSchedulerConfig) BuildWithArgs(args []string) error { return nil } -func (conf *scatterRangeSchedulerConfig) Clone() *scatterRangeSchedulerConfig { +func (conf *scatterRangeSchedulerConfig) clone() *scatterRangeSchedulerConfig { conf.RLock() defer conf.RUnlock() return &scatterRangeSchedulerConfig{ @@ -69,7 +70,7 @@ func (conf *scatterRangeSchedulerConfig) Clone() *scatterRangeSchedulerConfig { } } -func (conf *scatterRangeSchedulerConfig) Persist() error { +func (conf *scatterRangeSchedulerConfig) persist() error { name := conf.getSchedulerName() conf.RLock() defer conf.RUnlock() @@ -80,19 +81,19 @@ func (conf *scatterRangeSchedulerConfig) Persist() error { return conf.storage.SaveSchedulerConfig(name, data) } -func (conf *scatterRangeSchedulerConfig) GetRangeName() string { +func (conf *scatterRangeSchedulerConfig) getRangeName() string { conf.RLock() defer conf.RUnlock() return conf.RangeName } -func (conf *scatterRangeSchedulerConfig) GetStartKey() []byte { +func (conf *scatterRangeSchedulerConfig) getStartKey() []byte { conf.RLock() defer conf.RUnlock() return []byte(conf.StartKey) } -func (conf *scatterRangeSchedulerConfig) GetEndKey() []byte { +func (conf *scatterRangeSchedulerConfig) getEndKey() []byte { conf.RLock() defer conf.RUnlock() return []byte(conf.EndKey) @@ -106,7 +107,6 @@ func (conf *scatterRangeSchedulerConfig) getSchedulerName() string { type scatterRangeScheduler struct { *BaseScheduler - name string config *scatterRangeSchedulerConfig balanceLeader Scheduler balanceRegion Scheduler @@ -115,49 +115,43 @@ type scatterRangeScheduler struct { // newScatterRangeScheduler creates a scheduler that balances the distribution of leaders and regions that in the specified key range. func newScatterRangeScheduler(opController *operator.Controller, config *scatterRangeSchedulerConfig) Scheduler { - base := NewBaseScheduler(opController) + base := NewBaseScheduler(opController, types.ScatterRangeScheduler) - name := config.getSchedulerName() handler := newScatterRangeHandler(config) scheduler := &scatterRangeScheduler{ BaseScheduler: base, config: config, handler: handler, - name: name, balanceLeader: newBalanceLeaderScheduler( opController, &balanceLeaderSchedulerConfig{Ranges: []core.KeyRange{core.NewKeyRange("", "")}}, + // the name will not be persisted WithBalanceLeaderName("scatter-range-leader"), - WithBalanceLeaderFilterCounterName("scatter-range-leader"), ), balanceRegion: newBalanceRegionScheduler( opController, &balanceRegionSchedulerConfig{Ranges: []core.KeyRange{core.NewKeyRange("", "")}}, + // the name will not be persisted WithBalanceRegionName("scatter-range-region"), - WithBalanceRegionFilterCounterName("scatter-range-region"), ), } + scheduler.name = config.getSchedulerName() return scheduler } +// ServeHTTP implements the http.Handler interface. func (l *scatterRangeScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { l.handler.ServeHTTP(w, r) } -func (l *scatterRangeScheduler) GetName() string { - return l.name -} - -func (*scatterRangeScheduler) GetType() string { - return ScatterRangeType -} - +// EncodeConfig implements the Scheduler interface. func (l *scatterRangeScheduler) EncodeConfig() ([]byte, error) { l.config.RLock() defer l.config.RUnlock() return EncodeConfig(l.config) } +// ReloadConfig implements the Scheduler interface. func (l *scatterRangeScheduler) ReloadConfig() error { l.config.Lock() defer l.config.Unlock() @@ -178,6 +172,7 @@ func (l *scatterRangeScheduler) ReloadConfig() error { return nil } +// IsScheduleAllowed implements the Scheduler interface. func (l *scatterRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { return l.allowBalanceLeader(cluster) || l.allowBalanceRegion(cluster) } @@ -185,7 +180,7 @@ func (l *scatterRangeScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) func (l *scatterRangeScheduler) allowBalanceLeader(cluster sche.SchedulerCluster) bool { allowed := l.OpController.OperatorCount(operator.OpRange) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(l.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(l.GetType(), operator.OpLeader) } return allowed } @@ -193,20 +188,21 @@ func (l *scatterRangeScheduler) allowBalanceLeader(cluster sche.SchedulerCluster func (l *scatterRangeScheduler) allowBalanceRegion(cluster sche.SchedulerCluster) bool { allowed := l.OpController.OperatorCount(operator.OpRange) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(l.GetType(), operator.OpRegion.String()).Inc() + operator.IncOperatorLimitCounter(l.GetType(), operator.OpRegion) } return allowed } +// Schedule implements the Scheduler interface. func (l *scatterRangeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { scatterRangeCounter.Inc() // isolate a new cluster according to the key range - c := genRangeCluster(cluster, l.config.GetStartKey(), l.config.GetEndKey()) + c := genRangeCluster(cluster, l.config.getStartKey(), l.config.getEndKey()) c.SetTolerantSizeRatio(2) if l.allowBalanceLeader(cluster) { ops, _ := l.balanceLeader.Schedule(c, false) if len(ops) > 0 { - ops[0].SetDesc(fmt.Sprintf("scatter-range-leader-%s", l.config.GetRangeName())) + ops[0].SetDesc(fmt.Sprintf("scatter-range-leader-%s", l.config.getRangeName())) ops[0].AttachKind(operator.OpRange) ops[0].Counters = append(ops[0].Counters, scatterRangeNewOperatorCounter, @@ -218,7 +214,7 @@ func (l *scatterRangeScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) if l.allowBalanceRegion(cluster) { ops, _ := l.balanceRegion.Schedule(c, false) if len(ops) > 0 { - ops[0].SetDesc(fmt.Sprintf("scatter-range-region-%s", l.config.GetRangeName())) + ops[0].SetDesc(fmt.Sprintf("scatter-range-region-%s", l.config.getRangeName())) ops[0].AttachKind(operator.OpRange) ops[0].Counters = append(ops[0].Counters, scatterRangeNewOperatorCounter, @@ -236,7 +232,7 @@ type scatterRangeHandler struct { config *scatterRangeSchedulerConfig } -func (handler *scatterRangeHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *scatterRangeHandler) updateConfig(w http.ResponseWriter, r *http.Request) { var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return @@ -244,42 +240,42 @@ func (handler *scatterRangeHandler) UpdateConfig(w http.ResponseWriter, r *http. var args []string name, ok := input["range-name"].(string) if ok { - if name != handler.config.GetRangeName() { + if name != handler.config.getRangeName() { handler.rd.JSON(w, http.StatusInternalServerError, errors.New("Cannot change the range name, please delete this schedule").Error()) return } args = append(args, name) } else { - args = append(args, handler.config.GetRangeName()) + args = append(args, handler.config.getRangeName()) } startKey, ok := input["start-key"].(string) if ok { args = append(args, startKey) } else { - args = append(args, string(handler.config.GetStartKey())) + args = append(args, string(handler.config.getStartKey())) } endKey, ok := input["end-key"].(string) if ok { args = append(args, endKey) } else { - args = append(args, string(handler.config.GetEndKey())) + args = append(args, string(handler.config.getEndKey())) } - err := handler.config.BuildWithArgs(args) + err := handler.config.buildWithArgs(args) if err != nil { handler.rd.JSON(w, http.StatusBadRequest, err.Error()) return } - err = handler.config.Persist() + err = handler.config.persist() if err != nil { handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) } handler.rd.JSON(w, http.StatusOK, nil) } -func (handler *scatterRangeHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { - conf := handler.config.Clone() +func (handler *scatterRangeHandler) listConfig(w http.ResponseWriter, _ *http.Request) { + conf := handler.config.clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -289,7 +285,7 @@ func newScatterRangeHandler(config *scatterRangeSchedulerConfig) http.Handler { rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) return router } diff --git a/pkg/schedule/schedulers/scheduler.go b/pkg/schedule/schedulers/scheduler.go index abace59a266..894544d9617 100644 --- a/pkg/schedule/schedulers/scheduler.go +++ b/pkg/schedule/schedulers/scheduler.go @@ -27,6 +27,7 @@ import ( sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage/endpoint" "go.uber.org/zap" ) @@ -36,7 +37,7 @@ type Scheduler interface { http.Handler GetName() string // GetType should in accordance with the name passing to RegisterScheduler() - GetType() string + GetType() types.CheckerSchedulerType EncodeConfig() ([]byte, error) // ReloadConfig reloads the config from the storage. ReloadConfig() error diff --git a/pkg/schedule/schedulers/shuffle_hot_region.go b/pkg/schedule/schedulers/shuffle_hot_region.go index f4b566c56a4..5bb5d269b63 100644 --- a/pkg/schedule/schedulers/shuffle_hot_region.go +++ b/pkg/schedule/schedulers/shuffle_hot_region.go @@ -26,6 +26,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" @@ -44,30 +45,23 @@ const ( type shuffleHotRegionSchedulerConfig struct { syncutil.RWMutex storage endpoint.ConfigStorage - Name string `json:"name"` Limit uint64 `json:"limit"` } -func (conf *shuffleHotRegionSchedulerConfig) getSchedulerName() string { - return conf.Name -} - func (conf *shuffleHotRegionSchedulerConfig) Clone() *shuffleHotRegionSchedulerConfig { conf.RLock() defer conf.RUnlock() return &shuffleHotRegionSchedulerConfig{ - Name: conf.Name, Limit: conf.Limit, } } func (conf *shuffleHotRegionSchedulerConfig) persistLocked() error { - name := conf.getSchedulerName() data, err := EncodeConfig(conf) if err != nil { return err } - return conf.storage.SaveSchedulerConfig(name, data) + return conf.storage.SaveSchedulerConfig(types.ShuffleHotRegionScheduler.String(), data) } func (conf *shuffleHotRegionSchedulerConfig) getLimit() uint64 { @@ -90,6 +84,7 @@ type shuffleHotRegionScheduler struct { func newShuffleHotRegionScheduler(opController *operator.Controller, conf *shuffleHotRegionSchedulerConfig) Scheduler { base := newBaseHotScheduler(opController, statistics.DefaultHistorySampleDuration, statistics.DefaultHistorySampleInterval) + base.tp = types.ShuffleHotRegionScheduler handler := newShuffleHotRegionHandler(conf) ret := &shuffleHotRegionScheduler{ baseHotScheduler: base, @@ -99,22 +94,17 @@ func newShuffleHotRegionScheduler(opController *operator.Controller, conf *shuff return ret } +// ServeHTTP implements the http.Handler interface. func (s *shuffleHotRegionScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.handler.ServeHTTP(w, r) } -func (s *shuffleHotRegionScheduler) GetName() string { - return s.conf.Name -} - -func (*shuffleHotRegionScheduler) GetType() string { - return ShuffleHotRegionType -} - +// EncodeConfig implements the Scheduler interface. func (s *shuffleHotRegionScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// ReloadConfig implements the Scheduler interface. func (s *shuffleHotRegionScheduler) ReloadConfig() error { s.conf.Lock() defer s.conf.Unlock() @@ -133,23 +123,25 @@ func (s *shuffleHotRegionScheduler) ReloadConfig() error { return nil } +// IsScheduleAllowed implements the Scheduler interface. func (s *shuffleHotRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { hotRegionAllowed := s.OpController.OperatorCount(operator.OpHotRegion) < s.conf.getLimit() conf := cluster.GetSchedulerConfig() regionAllowed := s.OpController.OperatorCount(operator.OpRegion) < conf.GetRegionScheduleLimit() leaderAllowed := s.OpController.OperatorCount(operator.OpLeader) < conf.GetLeaderScheduleLimit() if !hotRegionAllowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpHotRegion.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpHotRegion) } if !regionAllowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpRegion.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpRegion) } if !leaderAllowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return hotRegionAllowed && regionAllowed && leaderAllowed } +// Schedule implements the Scheduler interface. func (s *shuffleHotRegionScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { shuffleHotRegionCounter.Inc() typ := s.randomType() @@ -224,7 +216,7 @@ type shuffleHotRegionHandler struct { config *shuffleHotRegionSchedulerConfig } -func (handler *shuffleHotRegionHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +func (handler *shuffleHotRegionHandler) updateConfig(w http.ResponseWriter, r *http.Request) { var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return @@ -247,7 +239,7 @@ func (handler *shuffleHotRegionHandler) UpdateConfig(w http.ResponseWriter, r *h handler.rd.JSON(w, http.StatusOK, nil) } -func (handler *shuffleHotRegionHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { +func (handler *shuffleHotRegionHandler) listConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } @@ -258,7 +250,7 @@ func newShuffleHotRegionHandler(config *shuffleHotRegionSchedulerConfig) http.Ha rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) return router } diff --git a/pkg/schedule/schedulers/shuffle_leader.go b/pkg/schedule/schedulers/shuffle_leader.go index 17b5fae6448..46e04efb23d 100644 --- a/pkg/schedule/schedulers/shuffle_leader.go +++ b/pkg/schedule/schedulers/shuffle_leader.go @@ -23,6 +23,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" ) const ( @@ -51,7 +52,7 @@ func newShuffleLeaderScheduler(opController *operator.Controller, conf *shuffleL &filter.StoreStateFilter{ActionScope: conf.Name, TransferLeader: true, OperatorLevel: constant.Low}, filter.NewSpecialUseFilter(conf.Name), } - base := NewBaseScheduler(opController) + base := NewBaseScheduler(opController, types.ShuffleLeaderScheduler) return &shuffleLeaderScheduler{ BaseScheduler: base, conf: conf, @@ -59,26 +60,21 @@ func newShuffleLeaderScheduler(opController *operator.Controller, conf *shuffleL } } -func (s *shuffleLeaderScheduler) GetName() string { - return s.conf.Name -} - -func (*shuffleLeaderScheduler) GetType() string { - return ShuffleLeaderType -} - +// EncodeConfig implements the Scheduler interface. func (s *shuffleLeaderScheduler) EncodeConfig() ([]byte, error) { return EncodeConfig(s.conf) } +// IsScheduleAllowed implements the Scheduler interface. func (s *shuffleLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return allowed } +// Schedule implements the Scheduler interface. func (s *shuffleLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { // We shuffle leaders between stores by: // 1. random select a valid store. diff --git a/pkg/schedule/schedulers/shuffle_region.go b/pkg/schedule/schedulers/shuffle_region.go index 57f6c618962..ca759042e8f 100644 --- a/pkg/schedule/schedulers/shuffle_region.go +++ b/pkg/schedule/schedulers/shuffle_region.go @@ -24,6 +24,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" ) const ( @@ -46,7 +47,7 @@ func newShuffleRegionScheduler(opController *operator.Controller, conf *shuffleR &filter.StoreStateFilter{ActionScope: ShuffleRegionName, MoveRegion: true, OperatorLevel: constant.Low}, filter.NewSpecialUseFilter(ShuffleRegionName), } - base := NewBaseScheduler(opController) + base := NewBaseScheduler(opController, types.ShuffleRegionScheduler) return &shuffleRegionScheduler{ BaseScheduler: base, conf: conf, @@ -54,22 +55,17 @@ func newShuffleRegionScheduler(opController *operator.Controller, conf *shuffleR } } +// ServeHTTP implements the http.Handler interface. func (s *shuffleRegionScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.conf.ServeHTTP(w, r) } -func (*shuffleRegionScheduler) GetName() string { - return ShuffleRegionName -} - -func (*shuffleRegionScheduler) GetType() string { - return ShuffleRegionType -} - +// EncodeConfig implements the Scheduler interface. func (s *shuffleRegionScheduler) EncodeConfig() ([]byte, error) { - return s.conf.EncodeConfig() + return s.conf.encodeConfig() } +// ReloadConfig implements the Scheduler interface. func (s *shuffleRegionScheduler) ReloadConfig() error { s.conf.Lock() defer s.conf.Unlock() @@ -89,14 +85,16 @@ func (s *shuffleRegionScheduler) ReloadConfig() error { return nil } +// IsScheduleAllowed implements the Scheduler interface. func (s *shuffleRegionScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpRegion) < cluster.GetSchedulerConfig().GetRegionScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpRegion.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpRegion) } return allowed } +// Schedule implements the Scheduler interface. func (s *shuffleRegionScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { shuffleRegionCounter.Inc() region, oldPeer := s.scheduleRemovePeer(cluster) @@ -129,18 +127,18 @@ func (s *shuffleRegionScheduler) scheduleRemovePeer(cluster sche.SchedulerCluste pendingFilter := filter.NewRegionPendingFilter() downFilter := filter.NewRegionDownFilter() replicaFilter := filter.NewRegionReplicatedFilter(cluster) - ranges := s.conf.GetRanges() + ranges := s.conf.getRanges() for _, source := range candidates.Stores { var region *core.RegionInfo - if s.conf.IsRoleAllow(roleFollower) { + if s.conf.isRoleAllow(roleFollower) { region = filter.SelectOneRegion(cluster.RandFollowerRegions(source.GetID(), ranges), nil, pendingFilter, downFilter, replicaFilter) } - if region == nil && s.conf.IsRoleAllow(roleLeader) { + if region == nil && s.conf.isRoleAllow(roleLeader) { region = filter.SelectOneRegion(cluster.RandLeaderRegions(source.GetID(), ranges), nil, pendingFilter, downFilter, replicaFilter) } - if region == nil && s.conf.IsRoleAllow(roleLearner) { + if region == nil && s.conf.isRoleAllow(roleLearner) { region = filter.SelectOneRegion(cluster.RandLearnerRegions(source.GetID(), ranges), nil, pendingFilter, downFilter, replicaFilter) } diff --git a/pkg/schedule/schedulers/shuffle_region_config.go b/pkg/schedule/schedulers/shuffle_region_config.go index bce64f743b8..fbf53cfeb4d 100644 --- a/pkg/schedule/schedulers/shuffle_region_config.go +++ b/pkg/schedule/schedulers/shuffle_region_config.go @@ -43,19 +43,19 @@ type shuffleRegionSchedulerConfig struct { Roles []string `json:"roles"` // can include `leader`, `follower`, `learner`. } -func (conf *shuffleRegionSchedulerConfig) EncodeConfig() ([]byte, error) { +func (conf *shuffleRegionSchedulerConfig) encodeConfig() ([]byte, error) { conf.RLock() defer conf.RUnlock() return EncodeConfig(conf) } -func (conf *shuffleRegionSchedulerConfig) GetRoles() []string { +func (conf *shuffleRegionSchedulerConfig) getRoles() []string { conf.RLock() defer conf.RUnlock() return conf.Roles } -func (conf *shuffleRegionSchedulerConfig) GetRanges() []core.KeyRange { +func (conf *shuffleRegionSchedulerConfig) getRanges() []core.KeyRange { conf.RLock() defer conf.RUnlock() ranges := make([]core.KeyRange, len(conf.Ranges)) @@ -63,12 +63,13 @@ func (conf *shuffleRegionSchedulerConfig) GetRanges() []core.KeyRange { return ranges } -func (conf *shuffleRegionSchedulerConfig) IsRoleAllow(role string) bool { +func (conf *shuffleRegionSchedulerConfig) isRoleAllow(role string) bool { conf.RLock() defer conf.RUnlock() return slice.AnyOf(conf.Roles, func(i int) bool { return conf.Roles[i] == role }) } +// ServeHTTP implements the http.Handler interface. func (conf *shuffleRegionSchedulerConfig) ServeHTTP(w http.ResponseWriter, r *http.Request) { router := mux.NewRouter() router.HandleFunc("/list", conf.handleGetRoles).Methods(http.MethodGet) @@ -79,7 +80,7 @@ func (conf *shuffleRegionSchedulerConfig) ServeHTTP(w http.ResponseWriter, r *ht func (conf *shuffleRegionSchedulerConfig) handleGetRoles(w http.ResponseWriter, _ *http.Request) { rd := render.New(render.Options{IndentJSON: true}) - rd.JSON(w, http.StatusOK, conf.GetRoles()) + rd.JSON(w, http.StatusOK, conf.getRoles()) } func (conf *shuffleRegionSchedulerConfig) handleSetRoles(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/schedule/schedulers/split_bucket.go b/pkg/schedule/schedulers/split_bucket.go index 7df3ee8f552..7f33b996f1c 100644 --- a/pkg/schedule/schedulers/split_bucket.go +++ b/pkg/schedule/schedulers/split_bucket.go @@ -28,6 +28,7 @@ import ( sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/statistics/buckets" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/reflectutil" @@ -59,7 +60,7 @@ type splitBucketSchedulerConfig struct { SplitLimit uint64 `json:"split-limit"` } -func (conf *splitBucketSchedulerConfig) Clone() *splitBucketSchedulerConfig { +func (conf *splitBucketSchedulerConfig) clone() *splitBucketSchedulerConfig { conf.RLock() defer conf.RUnlock() return &splitBucketSchedulerConfig{ @@ -99,7 +100,7 @@ type splitBucketHandler struct { } func (h *splitBucketHandler) listConfig(w http.ResponseWriter, _ *http.Request) { - conf := h.conf.Clone() + conf := h.conf.clone() h.rd.JSON(w, http.StatusOK, conf) } @@ -154,7 +155,7 @@ func newSplitBucketHandler(conf *splitBucketSchedulerConfig) http.Handler { } func newSplitBucketScheduler(opController *operator.Controller, conf *splitBucketSchedulerConfig) *splitBucketScheduler { - base := NewBaseScheduler(opController) + base := NewBaseScheduler(opController, types.SplitBucketScheduler) handler := newSplitBucketHandler(conf) ret := &splitBucketScheduler{ BaseScheduler: base, @@ -164,16 +165,6 @@ func newSplitBucketScheduler(opController *operator.Controller, conf *splitBucke return ret } -// GetName returns the name of the split bucket scheduler. -func (*splitBucketScheduler) GetName() string { - return SplitBucketName -} - -// GetType returns the type of the split bucket scheduler. -func (*splitBucketScheduler) GetType() string { - return SplitBucketType -} - func (s *splitBucketScheduler) ReloadConfig() error { s.conf.Lock() defer s.conf.Unlock() @@ -207,7 +198,7 @@ func (s *splitBucketScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) allowed := s.BaseScheduler.OpController.OperatorCount(operator.OpSplit) < s.conf.getSplitLimit() if !allowed { splitBuckerSplitLimitCounter.Inc() - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpSplit.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpSplit) } return allowed } @@ -222,7 +213,7 @@ type splitBucketPlan struct { // Schedule return operators if some bucket is too hot. func (s *splitBucketScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { splitBucketScheduleCounter.Inc() - conf := s.conf.Clone() + conf := s.conf.clone() plan := &splitBucketPlan{ conf: conf, cluster: cluster, diff --git a/pkg/schedule/schedulers/transfer_witness_leader.go b/pkg/schedule/schedulers/transfer_witness_leader.go index 2050194b9ae..c1c59620735 100644 --- a/pkg/schedule/schedulers/transfer_witness_leader.go +++ b/pkg/schedule/schedulers/transfer_witness_leader.go @@ -24,6 +24,7 @@ import ( "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" + types "github.com/tikv/pd/pkg/schedule/type" ) const ( @@ -48,35 +49,29 @@ type transferWitnessLeaderScheduler struct { // newTransferWitnessLeaderScheduler creates an admin scheduler that transfers witness leader of a region. func newTransferWitnessLeaderScheduler(opController *operator.Controller) Scheduler { return &transferWitnessLeaderScheduler{ - BaseScheduler: NewBaseScheduler(opController), + BaseScheduler: NewBaseScheduler(opController, types.TransferWitnessLeaderScheduler), regions: make(chan *core.RegionInfo, transferWitnessLeaderRecvMaxRegionSize), } } -func (*transferWitnessLeaderScheduler) GetName() string { - return TransferWitnessLeaderName -} - -func (*transferWitnessLeaderScheduler) GetType() string { - return TransferWitnessLeaderType -} - +// IsScheduleAllowed implements the Scheduler interface. func (*transferWitnessLeaderScheduler) IsScheduleAllowed(sche.SchedulerCluster) bool { return true } +// Schedule implements the Scheduler interface. func (s *transferWitnessLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { transferWitnessLeaderCounter.Inc() - return s.scheduleTransferWitnessLeaderBatch(s.GetName(), s.GetType(), cluster, transferWitnessLeaderBatchSize), nil + return s.scheduleTransferWitnessLeaderBatch(s.GetName(), cluster, transferWitnessLeaderBatchSize), nil } -func (s *transferWitnessLeaderScheduler) scheduleTransferWitnessLeaderBatch(name, typ string, cluster sche.SchedulerCluster, batchSize int) []*operator.Operator { +func (s *transferWitnessLeaderScheduler) scheduleTransferWitnessLeaderBatch(name string, cluster sche.SchedulerCluster, batchSize int) []*operator.Operator { var ops []*operator.Operator batchLoop: for i := 0; i < batchSize; i++ { select { case region := <-s.regions: - op, err := scheduleTransferWitnessLeader(name, typ, cluster, region) + op, err := scheduleTransferWitnessLeader(name, cluster, region) if err != nil { log.Debug("fail to create transfer leader operator", errs.ZapError(err)) continue @@ -93,7 +88,7 @@ batchLoop: return ops } -func scheduleTransferWitnessLeader(name, typ string, cluster sche.SchedulerCluster, region *core.RegionInfo) (*operator.Operator, error) { +func scheduleTransferWitnessLeader(name string, cluster sche.SchedulerCluster, region *core.RegionInfo) (*operator.Operator, error) { var filters []filter.Filter unhealthyPeerStores := make(map[uint64]struct{}) for _, peer := range region.GetDownPeers() { @@ -102,7 +97,8 @@ func scheduleTransferWitnessLeader(name, typ string, cluster sche.SchedulerClust for _, peer := range region.GetPendingPeers() { unhealthyPeerStores[peer.GetStoreId()] = struct{}{} } - filters = append(filters, filter.NewExcludedFilter(name, nil, unhealthyPeerStores), &filter.StoreStateFilter{ActionScope: name, TransferLeader: true, OperatorLevel: constant.Urgent}) + filters = append(filters, filter.NewExcludedFilter(name, nil, unhealthyPeerStores), + &filter.StoreStateFilter{ActionScope: name, TransferLeader: true, OperatorLevel: constant.Urgent}) candidates := filter.NewCandidates(cluster.GetFollowerStores(region)).FilterTarget(cluster.GetSchedulerConfig(), nil, nil, filters...) // Compatible with old TiKV transfer leader logic. target := candidates.RandomPick() @@ -116,7 +112,7 @@ func scheduleTransferWitnessLeader(name, typ string, cluster sche.SchedulerClust for _, t := range targets { targetIDs = append(targetIDs, t.GetID()) } - return operator.CreateTransferLeaderOperator(typ, cluster, region, target.GetID(), targetIDs, operator.OpWitnessLeader) + return operator.CreateTransferLeaderOperator(name, cluster, region, target.GetID(), targetIDs, operator.OpWitnessLeader) } // RecvRegionInfo receives a checked region from coordinator diff --git a/pkg/schedule/schedulers/utils.go b/pkg/schedule/schedulers/utils.go index c708541e02e..1e911cf7b06 100644 --- a/pkg/schedule/schedulers/utils.go +++ b/pkg/schedule/schedulers/utils.go @@ -65,24 +65,24 @@ func newSolver(basePlan *plan.BalanceSchedulerPlan, kind constant.ScheduleKind, } } -func (p *solver) GetOpInfluence(storeID uint64) int64 { +func (p *solver) getOpInfluence(storeID uint64) int64 { return p.opInfluence.GetStoreInfluence(storeID).ResourceProperty(p.kind) } -func (p *solver) SourceStoreID() uint64 { +func (p *solver) sourceStoreID() uint64 { return p.Source.GetID() } -func (p *solver) SourceMetricLabel() string { - return strconv.FormatUint(p.SourceStoreID(), 10) +func (p *solver) sourceMetricLabel() string { + return strconv.FormatUint(p.sourceStoreID(), 10) } -func (p *solver) TargetStoreID() uint64 { +func (p *solver) targetStoreID() uint64 { return p.Target.GetID() } -func (p *solver) TargetMetricLabel() string { - return strconv.FormatUint(p.TargetStoreID(), 10) +func (p *solver) targetMetricLabel() string { + return strconv.FormatUint(p.targetStoreID(), 10) } func (p *solver) sourceStoreScore(scheduleName string) float64 { @@ -90,7 +90,7 @@ func (p *solver) sourceStoreScore(scheduleName string) float64 { tolerantResource := p.getTolerantResource() // to avoid schedule too much, if A's core greater than B and C a little // we want that A should be moved out one region not two - influence := p.GetOpInfluence(sourceID) + influence := p.getOpInfluence(sourceID) if influence > 0 { influence = -influence } @@ -121,7 +121,7 @@ func (p *solver) targetStoreScore(scheduleName string) float64 { tolerantResource := p.getTolerantResource() // to avoid schedule call back // A->B, A's influence is negative, so A will be target, C may move region to A - influence := p.GetOpInfluence(targetID) + influence := p.getOpInfluence(targetID) if influence < 0 { influence = -influence } @@ -358,7 +358,7 @@ func newRetryQuota() *retryQuota { } } -func (q *retryQuota) GetLimit(store *core.StoreInfo) int { +func (q *retryQuota) getLimit(store *core.StoreInfo) int { id := store.GetID() if limit, ok := q.limits[id]; ok { return limit @@ -367,19 +367,19 @@ func (q *retryQuota) GetLimit(store *core.StoreInfo) int { return q.initialLimit } -func (q *retryQuota) ResetLimit(store *core.StoreInfo) { +func (q *retryQuota) resetLimit(store *core.StoreInfo) { q.limits[store.GetID()] = q.initialLimit } -func (q *retryQuota) Attenuate(store *core.StoreInfo) { - newLimit := q.GetLimit(store) / q.attenuation +func (q *retryQuota) attenuate(store *core.StoreInfo) { + newLimit := q.getLimit(store) / q.attenuation if newLimit < q.minLimit { newLimit = q.minLimit } q.limits[store.GetID()] = newLimit } -func (q *retryQuota) GC(keepStores []*core.StoreInfo) { +func (q *retryQuota) gc(keepStores []*core.StoreInfo) { set := make(map[uint64]struct{}, len(keepStores)) for _, store := range keepStores { set[store.GetID()] = struct{}{} diff --git a/pkg/schedule/schedulers/utils_test.go b/pkg/schedule/schedulers/utils_test.go index a2f5aa4dad0..deb7c6e1038 100644 --- a/pkg/schedule/schedulers/utils_test.go +++ b/pkg/schedule/schedulers/utils_test.go @@ -30,24 +30,24 @@ func TestRetryQuota(t *testing.T) { store2 := core.NewStoreInfo(&metapb.Store{Id: 2}) keepStores := []*core.StoreInfo{store1} - // test GetLimit - re.Equal(10, q.GetLimit(store1)) + // test getLimit + re.Equal(10, q.getLimit(store1)) - // test Attenuate + // test attenuate for _, expected := range []int{5, 2, 1, 1, 1} { - q.Attenuate(store1) - re.Equal(expected, q.GetLimit(store1)) + q.attenuate(store1) + re.Equal(expected, q.getLimit(store1)) } // test GC - re.Equal(10, q.GetLimit(store2)) - q.Attenuate(store2) - re.Equal(5, q.GetLimit(store2)) - q.GC(keepStores) - re.Equal(1, q.GetLimit(store1)) - re.Equal(10, q.GetLimit(store2)) - - // test ResetLimit - q.ResetLimit(store1) - re.Equal(10, q.GetLimit(store1)) + re.Equal(10, q.getLimit(store2)) + q.attenuate(store2) + re.Equal(5, q.getLimit(store2)) + q.gc(keepStores) + re.Equal(1, q.getLimit(store1)) + re.Equal(10, q.getLimit(store2)) + + // test resetLimit + q.resetLimit(store1) + re.Equal(10, q.getLimit(store1)) } diff --git a/pkg/schedule/splitter/region_splitter.go b/pkg/schedule/splitter/region_splitter.go index 124ad935655..37b33dad480 100644 --- a/pkg/schedule/splitter/region_splitter.go +++ b/pkg/schedule/splitter/region_splitter.go @@ -58,11 +58,11 @@ func NewSplitRegionsHandler(cluster sche.ClusterInformer, oc *operator.Controlle type RegionSplitter struct { cluster sche.ClusterInformer handler SplitRegionsHandler - addSuspectRegions func(ids ...uint64) + addSuspectRegions func(bool, ...uint64) } // NewRegionSplitter return a region splitter -func NewRegionSplitter(cluster sche.ClusterInformer, handler SplitRegionsHandler, addSuspectRegions func(ids ...uint64)) *RegionSplitter { +func NewRegionSplitter(cluster sche.ClusterInformer, handler SplitRegionsHandler, addSuspectRegions func(bool, ...uint64)) *RegionSplitter { return &RegionSplitter{ cluster: cluster, handler: handler, @@ -173,7 +173,7 @@ func (r *RegionSplitter) groupKeysByRegion(keys [][]byte) map[uint64]*regionGrou func (r *RegionSplitter) checkRegionValid(region *core.RegionInfo) bool { if !filter.IsRegionReplicated(r.cluster, region) { - r.addSuspectRegions(region.GetID()) + r.addSuspectRegions(false, region.GetID()) return false } if region.GetLeader() == nil { diff --git a/pkg/schedule/type/type.go b/pkg/schedule/type/type.go index 16910c631fd..1f6211a9783 100644 --- a/pkg/schedule/type/type.go +++ b/pkg/schedule/type/type.go @@ -52,8 +52,8 @@ const ( GrantLeaderScheduler CheckerSchedulerType = "grant-leader-scheduler" // GrantHotRegionScheduler is grant hot region scheduler name. GrantHotRegionScheduler CheckerSchedulerType = "grant-hot-region-scheduler" - // HotRegionScheduler is balance hot region scheduler name. - HotRegionScheduler CheckerSchedulerType = "balance-hot-region-scheduler" + // BalanceHotRegionScheduler is balance hot region scheduler name. + BalanceHotRegionScheduler CheckerSchedulerType = "balance-hot-region-scheduler" // RandomMergeScheduler is random merge scheduler name. RandomMergeScheduler CheckerSchedulerType = "random-merge-scheduler" // ScatterRangeScheduler is scatter range scheduler name. @@ -73,8 +73,10 @@ const ( LabelScheduler CheckerSchedulerType = "label-scheduler" ) -// SchedulerTypeCompatibleMap temporarily exists for compatibility. -// TODO: remove it after all components use CheckerSchedulerType. +// SchedulerTypeCompatibleMap exists for compatibility. +// +// It is used in the `PersistOptions` and `PersistConfig`. These two structs +// are persisted in the storage, so we need to keep the compatibility. var SchedulerTypeCompatibleMap = map[CheckerSchedulerType]string{ BalanceLeaderScheduler: "balance-leader", BalanceRegionScheduler: "balance-region", @@ -84,7 +86,7 @@ var SchedulerTypeCompatibleMap = map[CheckerSchedulerType]string{ EvictSlowTrendScheduler: "evict-slow-trend", GrantLeaderScheduler: "grant-leader", GrantHotRegionScheduler: "grant-hot-region", - HotRegionScheduler: "hot-region", + BalanceHotRegionScheduler: "hot-region", RandomMergeScheduler: "random-merge", ScatterRangeScheduler: "scatter-range", ShuffleHotRegionScheduler: "shuffle-hot-region", @@ -105,7 +107,7 @@ var SchedulerStr2Type = map[string]CheckerSchedulerType{ "evict-slow-trend-scheduler": EvictSlowTrendScheduler, "grant-leader-scheduler": GrantLeaderScheduler, "grant-hot-region-scheduler": GrantHotRegionScheduler, - "balance-hot-region-scheduler": HotRegionScheduler, + "balance-hot-region-scheduler": BalanceHotRegionScheduler, "random-merge-scheduler": RandomMergeScheduler, // TODO: update to `scatter-range-scheduler` "scatter-range": ScatterRangeScheduler, diff --git a/pkg/storage/hot_region_storage.go b/pkg/storage/hot_region_storage.go index 50fa7455f44..d323b40d435 100644 --- a/pkg/storage/hot_region_storage.go +++ b/pkg/storage/hot_region_storage.go @@ -37,7 +37,6 @@ import ( "github.com/tikv/pd/pkg/storage/kv" "github.com/tikv/pd/pkg/utils/logutil" "github.com/tikv/pd/pkg/utils/syncutil" - "github.com/tikv/pd/pkg/utils/typeutil" "go.uber.org/zap" ) @@ -267,8 +266,8 @@ func (h *HotRegionStorage) packHistoryHotRegions(historyHotRegions []HistoryHotR if err != nil { return err } - historyHotRegions[i].StartKey = typeutil.BytesToString(region.StartKey) - historyHotRegions[i].EndKey = typeutil.BytesToString(region.EndKey) + historyHotRegions[i].StartKey = string(region.StartKey) + historyHotRegions[i].EndKey = string(region.EndKey) key := HotRegionStorePath(hotRegionType, historyHotRegions[i].UpdateTime, historyHotRegions[i].RegionID) h.batchHotInfo[key] = &historyHotRegions[i] } @@ -386,8 +385,8 @@ func (it *HotRegionStorageIterator) Next() (*HistoryHotRegion, error) { if err := encryption.DecryptRegion(region, it.encryptionKeyManager); err != nil { return nil, err } - message.StartKey = typeutil.BytesToString(region.StartKey) - message.EndKey = typeutil.BytesToString(region.EndKey) + message.StartKey = string(region.StartKey) + message.EndKey = string(region.EndKey) message.EncryptionMeta = nil return &message, nil } diff --git a/pkg/storage/hot_region_storage_test.go b/pkg/storage/hot_region_storage_test.go index 1486fb8271d..4e98f2059d6 100644 --- a/pkg/storage/hot_region_storage_test.go +++ b/pkg/storage/hot_region_storage_test.go @@ -21,6 +21,7 @@ import ( "math/rand" "os" "path/filepath" + "strings" "testing" "time" @@ -287,7 +288,7 @@ func newTestHotRegionStorage(pullInterval time.Duration, packHotRegionInfo *MockPackHotRegionInfo) ( hotRegionStorage *HotRegionStorage, clear func(), err error) { - writePath := "./tmp" + writePath := strings.Join([]string{".", "tmp"}, string(filepath.Separator)) ctx := context.Background() packHotRegionInfo.pullInterval = pullInterval packHotRegionInfo.reservedDays = reservedDays diff --git a/pkg/utils/grpcutil/grpcutil_test.go b/pkg/utils/grpcutil/grpcutil_test.go index 99cbeae6cde..fbcfe59f02c 100644 --- a/pkg/utils/grpcutil/grpcutil_test.go +++ b/pkg/utils/grpcutil/grpcutil_test.go @@ -4,7 +4,7 @@ import ( "context" "os" "os/exec" - "path" + "path/filepath" "testing" "github.com/pingcap/errors" @@ -14,8 +14,8 @@ import ( ) var ( - certPath = "../../../tests/integrations/client/" - certScript = "cert_opt.sh" + certPath = filepath.Join("..", "..", "..", "tests", "integrations", "client") + string(filepath.Separator) + certScript = filepath.Join("..", "..", "..", "tests", "integrations", "client", "cert_opt.sh") ) func loadTLSContent(re *require.Assertions, caPath, certPath, keyPath string) (caData, certData, keyData []byte) { @@ -30,20 +30,20 @@ func loadTLSContent(re *require.Assertions, caPath, certPath, keyPath string) (c } func TestToTLSConfig(t *testing.T) { - if err := exec.Command(certPath+certScript, "generate", certPath).Run(); err != nil { + if err := exec.Command(certScript, "generate", certPath).Run(); err != nil { t.Fatal(err) } defer func() { - if err := exec.Command(certPath+certScript, "cleanup", certPath).Run(); err != nil { + if err := exec.Command(certScript, "cleanup", certPath).Run(); err != nil { t.Fatal(err) } }() re := require.New(t) tlsConfig := TLSConfig{ - KeyPath: path.Join(certPath, "pd-server-key.pem"), - CertPath: path.Join(certPath, "pd-server.pem"), - CAPath: path.Join(certPath, "ca.pem"), + KeyPath: filepath.Join(certPath, "pd-server-key.pem"), + CertPath: filepath.Join(certPath, "pd-server.pem"), + CAPath: filepath.Join(certPath, "ca.pem"), } // test without bytes _, err := tlsConfig.ToTLSConfig() diff --git a/pkg/utils/logutil/log.go b/pkg/utils/logutil/log.go index c7a9ac2f3b7..4854fd7ac40 100644 --- a/pkg/utils/logutil/log.go +++ b/pkg/utils/logutil/log.go @@ -23,7 +23,6 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" - "github.com/tikv/pd/pkg/utils/typeutil" "go.uber.org/zap" "go.uber.org/zap/zapcore" ) @@ -223,7 +222,7 @@ func RedactBytes(arg []byte) []byte { return []byte("?") case RedactInfoLogMarker: // Use unsafe conversion to avoid copy. - return typeutil.StringToBytes(redactInfo(typeutil.BytesToString(arg))) + return []byte(redactInfo(string(arg))) default: } return arg diff --git a/pkg/utils/testutil/testutil.go b/pkg/utils/testutil/testutil.go index cef952353bc..76f7058738b 100644 --- a/pkg/utils/testutil/testutil.go +++ b/pkg/utils/testutil/testutil.go @@ -94,7 +94,7 @@ func CleanServer(dataDir string) { // InitTempFileLogger initializes the logger and redirects the log output to a temporary file. func InitTempFileLogger(level string) (fname string) { cfg := &log.Config{} - f, _ := os.CreateTemp("/tmp", "pd_tests") + f, _ := os.CreateTemp(os.TempDir(), "pd_tests") fname = f.Name() f.Close() cfg.File.Filename = fname diff --git a/pkg/utils/typeutil/conversion.go b/pkg/utils/typeutil/conversion.go index dab12a52d9e..128c7a887a4 100644 --- a/pkg/utils/typeutil/conversion.go +++ b/pkg/utils/typeutil/conversion.go @@ -16,7 +16,6 @@ package typeutil import ( "encoding/binary" - "unsafe" "github.com/tikv/pd/pkg/errs" ) @@ -69,19 +68,3 @@ func JSONToUint64Slice(from any) ([]uint64, bool) { } return to, true } - -// BytesToString converts slice of bytes to string without copy. -func BytesToString(b []byte) string { - if len(b) == 0 { - return "" - } - return unsafe.String(unsafe.SliceData(b), len(b)) -} - -// StringToBytes converts string to slice of bytes without copy. -func StringToBytes(s string) []byte { - if len(s) == 0 { - return nil - } - return unsafe.Slice(unsafe.StringData(s), len(s)) -} diff --git a/pkg/utils/typeutil/conversion_test.go b/pkg/utils/typeutil/conversion_test.go index e69eeb57e23..7b17cfcbe2c 100644 --- a/pkg/utils/typeutil/conversion_test.go +++ b/pkg/utils/typeutil/conversion_test.go @@ -73,17 +73,3 @@ func TestJSONToUint64Slice(t *testing.T) { re.False(ok) re.Nil(res) } - -func TestBytesToString(t *testing.T) { - re := require.New(t) - str := "hello" - b := []byte(str) - re.Equal(str, BytesToString(b)) -} - -func TestStringToBytes(t *testing.T) { - re := require.New(t) - str := "hello" - b := StringToBytes(str) - re.Equal([]byte(str), b) -} diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 9ad797e0ae4..49156abc40c 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -30,6 +30,7 @@ import ( "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/plan" "github.com/tikv/pd/pkg/schedule/schedulers" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/syncutil" @@ -42,6 +43,8 @@ const ( // EvictLeaderType is evict leader scheduler type. EvictLeaderType = "user-evict-leader" noStoreInSchedulerInfo = "No store in user-evict-leader-scheduler-config" + + userEvictLeaderScheduler types.CheckerSchedulerType = "user-evict-leader-scheduler" ) func init() { @@ -127,18 +130,13 @@ func (conf *evictLeaderSchedulerConfig) Clone() *evictLeaderSchedulerConfig { // Persist saves the config. func (conf *evictLeaderSchedulerConfig) Persist() error { - name := conf.getScheduleName() conf.mu.RLock() defer conf.mu.RUnlock() data, err := schedulers.EncodeConfig(conf) if err != nil { return err } - return conf.storage.SaveSchedulerConfig(name, data) -} - -func (*evictLeaderSchedulerConfig) getScheduleName() string { - return EvictLeaderName + return conf.storage.SaveSchedulerConfig(EvictLeaderName, data) } func (conf *evictLeaderSchedulerConfig) getRanges(id uint64) []string { @@ -160,7 +158,7 @@ type evictLeaderScheduler struct { // newEvictLeaderScheduler creates an admin scheduler that transfers all leaders // out of a store. func newEvictLeaderScheduler(opController *operator.Controller, conf *evictLeaderSchedulerConfig) schedulers.Scheduler { - base := schedulers.NewBaseScheduler(opController) + base := schedulers.NewBaseScheduler(opController, userEvictLeaderScheduler) handler := newEvictLeaderHandler(conf) return &evictLeaderScheduler{ BaseScheduler: base, @@ -174,17 +172,7 @@ func (s *evictLeaderScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) s.handler.ServeHTTP(w, r) } -// GetName returns the scheduler name. -func (*evictLeaderScheduler) GetName() string { - return EvictLeaderName -} - -// GetType returns the scheduler type. -func (*evictLeaderScheduler) GetType() string { - return EvictLeaderType -} - -// EncodeConfig serializes the config. +// EncodeConfig implements the Scheduler interface. func (s *evictLeaderScheduler) EncodeConfig() ([]byte, error) { s.conf.mu.RLock() defer s.conf.mu.RUnlock() @@ -217,7 +205,7 @@ func (s *evictLeaderScheduler) CleanConfig(cluster sche.SchedulerCluster) { func (s *evictLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() if !allowed { - operator.OperatorLimitCounter.WithLabelValues(s.GetType(), operator.OpLeader.String()).Inc() + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) } return allowed } @@ -257,8 +245,8 @@ type evictLeaderHandler struct { config *evictLeaderSchedulerConfig } -// UpdateConfig updates the config. -func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.Request) { +// updateConfig updates the config. +func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.Request) { var input map[string]any if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { return @@ -298,14 +286,12 @@ func (handler *evictLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.rd.JSON(w, http.StatusOK, nil) } -// ListConfig lists the config. -func (handler *evictLeaderHandler) ListConfig(w http.ResponseWriter, _ *http.Request) { +func (handler *evictLeaderHandler) listConfig(w http.ResponseWriter, _ *http.Request) { conf := handler.config.Clone() handler.rd.JSON(w, http.StatusOK, conf) } -// DeleteConfig deletes the config. -func (handler *evictLeaderHandler) DeleteConfig(w http.ResponseWriter, r *http.Request) { +func (handler *evictLeaderHandler) deleteConfig(w http.ResponseWriter, r *http.Request) { idStr := mux.Vars(r)["store_id"] id, err := strconv.ParseUint(idStr, 10, 64) if err != nil { @@ -344,9 +330,9 @@ func newEvictLeaderHandler(config *evictLeaderSchedulerConfig) http.Handler { rd: render.New(render.Options{IndentJSON: true}), } router := mux.NewRouter() - router.HandleFunc("/config", h.UpdateConfig).Methods(http.MethodPost) - router.HandleFunc("/list", h.ListConfig).Methods(http.MethodGet) - router.HandleFunc("/delete/{store_id}", h.DeleteConfig).Methods(http.MethodDelete) + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) + router.HandleFunc("/delete/{store_id}", h.deleteConfig).Methods(http.MethodDelete) return router } diff --git a/server/api/label_test.go b/server/api/label_test.go index a8599273d5c..b8191a83753 100644 --- a/server/api/label_test.go +++ b/server/api/label_test.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/response" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" @@ -277,6 +278,30 @@ func (suite *strictlyLabelsStoreTestSuite) TestStoreMatch() { valid: false, expectError: "key matching the label was not found", }, + { + store: &metapb.Store{ + Id: 3, + Address: "tiflash1", + State: metapb.StoreState_Up, + Labels: []*metapb.StoreLabel{ + { + Key: "zone", + Value: "us-west-1", + }, + { + Key: "disk", + Value: "ssd", + }, + { + Key: core.EngineKey, + Value: core.EngineTiFlash, + }, + }, + Version: "3.0.0", + }, + valid: true, + expectError: "placement rules is disabled", + }, } for _, testCase := range testCases { @@ -284,12 +309,16 @@ func (suite *strictlyLabelsStoreTestSuite) TestStoreMatch() { Header: &pdpb.RequestHeader{ClusterId: suite.svr.ClusterID()}, Store: &metapb.Store{ Id: testCase.store.Id, - Address: fmt.Sprintf("tikv%d", testCase.store.Id), + Address: testCase.store.Address, State: testCase.store.State, Labels: testCase.store.Labels, Version: testCase.store.Version, }, }) + if testCase.store.Address == "tiflash1" { + re.Contains(resp.GetHeader().GetError().String(), testCase.expectError) + continue + } if testCase.valid { re.NoError(err) re.Nil(resp.GetHeader().GetError()) @@ -309,7 +338,7 @@ func (suite *strictlyLabelsStoreTestSuite) TestStoreMatch() { Header: &pdpb.RequestHeader{ClusterId: suite.svr.ClusterID()}, Store: &metapb.Store{ Id: testCase.store.Id, - Address: fmt.Sprintf("tikv%d", testCase.store.Id), + Address: testCase.store.Address, State: testCase.store.State, Labels: testCase.store.Labels, Version: testCase.store.Version, diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index d1f89ca2128..c8013c63e2d 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -343,7 +343,7 @@ func (c *RaftCluster) Start(s Server) error { log.Error("load external timestamp meets error", zap.Error(err)) } - if s.IsAPIServiceMode() { + if c.isAPIServiceMode { // bootstrap keyspace group manager after starting other parts successfully. // This order avoids a stuck goroutine in keyspaceGroupManager when it fails to create raftcluster. err = c.keyspaceGroupManager.Bootstrap(c.ctx) @@ -1307,6 +1307,9 @@ func (c *RaftCluster) checkStoreLabels(s *core.StoreInfo) error { } for _, label := range s.GetLabels() { key := label.GetKey() + if key == core.EngineKey { + continue + } if _, ok := keysSet[key]; !ok { log.Warn("not found the key match with the store label", zap.Stringer("store", s.GetMeta()), diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index cd7f94e001a..f33354b9668 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -44,6 +44,7 @@ import ( "github.com/tikv/pd/pkg/mock/mockid" "github.com/tikv/pd/pkg/progress" "github.com/tikv/pd/pkg/schedule" + "github.com/tikv/pd/pkg/schedule/checker" sc "github.com/tikv/pd/pkg/schedule/config" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/filter" @@ -1849,7 +1850,7 @@ func Test(t *testing.T) { for i := uint64(0); i < n; i++ { region := regions[i] - regionKey := []byte{byte(i)} + regionKey := []byte(fmt.Sprintf("a%20d", i+1)) re.Nil(cache.GetRegion(i)) re.Nil(cache.GetRegionByKey(regionKey)) @@ -2183,7 +2184,7 @@ func newTestRegions(n, m, np uint64) []*core.RegionInfo { peers := make([]*metapb.Peer, 0, np) for j := uint64(0); j < np; j++ { peer := &metapb.Peer{ - Id: i*np + j, + Id: 100000000 + i*np + j, } peer.StoreId = (i + j) % m peers = append(peers, peer) @@ -2191,8 +2192,8 @@ func newTestRegions(n, m, np uint64) []*core.RegionInfo { region := &metapb.Region{ Id: i, Peers: peers, - StartKey: []byte{byte(i)}, - EndKey: []byte{byte(i + 1)}, + StartKey: []byte(fmt.Sprintf("a%20d", i+1)), + EndKey: []byte(fmt.Sprintf("a%20d", i+2)), RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: 2}, } regions = append(regions, core.NewRegionInfo(region, peers[0], core.SetApproximateSize(100), core.SetApproximateKeys(1000))) @@ -2880,6 +2881,55 @@ func TestCheckCache(t *testing.T) { re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/checker/breakPatrol")) } +func TestScanLimit(t *testing.T) { + re := require.New(t) + + checkScanLimit(re, 1000, checker.MinPatrolRegionScanLimit) + checkScanLimit(re, 10000) + checkScanLimit(re, 100000) + checkScanLimit(re, 1000000) + checkScanLimit(re, 10000000, checker.MaxPatrolScanRegionLimit) +} + +func checkScanLimit(re *require.Assertions, regionCount int, expectScanLimit ...int) { + tc, co, cleanup := prepare(nil, nil, nil, re) + defer cleanup() + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/checker/breakPatrol", `return`)) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/checker/regionCount", fmt.Sprintf("return(\"%d\")", regionCount))) + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/checker/breakPatrol")) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/checker/regionCount")) + }() + + re.NoError(tc.addRegionStore(1, 0)) + re.NoError(tc.addRegionStore(2, 0)) + re.NoError(tc.addRegionStore(3, 0)) + regions := newTestRegions(10, 3, 3) + for i, region := range regions { + if i == 0 { + region.GetMeta().StartKey = []byte("") + } + if i == len(regions)-1 { + region.GetMeta().EndKey = []byte("") + } + re.NoError(tc.putRegion(region)) + } + + co.GetWaitGroup().Add(1) + co.PatrolRegions() + defer func() { + co.GetSchedulersController().Wait() + co.GetWaitGroup().Wait() + }() + + limit := co.GetCheckerController().GetPatrolRegionScanLimit() + re.LessOrEqual(checker.MinPatrolRegionScanLimit, limit) + re.GreaterOrEqual(checker.MaxPatrolScanRegionLimit, limit) + if len(expectScanLimit) > 0 { + re.Equal(expectScanLimit[0], limit) + } +} + func TestPeerState(t *testing.T) { re := require.New(t) diff --git a/server/cluster/cluster_worker.go b/server/cluster/cluster_worker.go index 2d9bb411995..941282c4aca 100644 --- a/server/cluster/cluster_worker.go +++ b/server/cluster/cluster_worker.go @@ -165,7 +165,7 @@ func (c *RaftCluster) HandleAskBatchSplit(request *pdpb.AskBatchSplitRequest) (* // If region splits during the scheduling process, regions with abnormal // status may be left, and these regions need to be checked with higher // priority. - c.AddPendingProcessedRegions(recordRegions...) + c.AddPendingProcessedRegions(false, recordRegions...) resp := &pdpb.AskBatchSplitResponse{Ids: splitIDs} diff --git a/server/cluster/scheduling_controller.go b/server/cluster/scheduling_controller.go index bd515669670..b4c29ceed46 100644 --- a/server/cluster/scheduling_controller.go +++ b/server/cluster/scheduling_controller.go @@ -404,10 +404,10 @@ func (sc *schedulingController) PauseOrResumeChecker(name string, t int64) error } // AddPendingProcessedRegions adds regions to suspect list. -func (sc *schedulingController) AddPendingProcessedRegions(regionIDs ...uint64) { +func (sc *schedulingController) AddPendingProcessedRegions(needCheckLen bool, regionIDs ...uint64) { sc.mu.RLock() defer sc.mu.RUnlock() - sc.coordinator.GetCheckerController().AddPendingProcessedRegions(regionIDs...) + sc.coordinator.GetCheckerController().AddPendingProcessedRegions(needCheckLen, regionIDs...) } // GetPendingProcessedRegions gets all suspect regions. diff --git a/server/config/config_test.go b/server/config/config_test.go index df23241b787..78d6d25b73e 100644 --- a/server/config/config_test.go +++ b/server/config/config_test.go @@ -19,7 +19,7 @@ import ( "fmt" "math" "os" - "path" + "path/filepath" "testing" "time" @@ -123,7 +123,7 @@ func TestValidation(t *testing.T) { cfg := NewConfig() re.NoError(cfg.Adjust(nil, false)) - cfg.Log.File.Filename = path.Join(cfg.DataDir, "test") + cfg.Log.File.Filename = filepath.Join(cfg.DataDir, "test") re.Error(cfg.Validate()) // check schedule config diff --git a/server/config/persist_options.go b/server/config/persist_options.go index d8a7d69f783..b6963a6645a 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -33,6 +33,7 @@ import ( "github.com/tikv/pd/pkg/core/constant" "github.com/tikv/pd/pkg/core/storelimit" sc "github.com/tikv/pd/pkg/schedule/config" + types "github.com/tikv/pd/pkg/schedule/type" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/storage/endpoint" "github.com/tikv/pd/pkg/utils/etcdutil" @@ -669,10 +670,11 @@ func (o *PersistOptions) GetSchedulers() sc.SchedulerConfigs { } // IsSchedulerDisabled returns if the scheduler is disabled. -func (o *PersistOptions) IsSchedulerDisabled(t string) bool { +func (o *PersistOptions) IsSchedulerDisabled(tp types.CheckerSchedulerType) bool { + oldType := types.SchedulerTypeCompatibleMap[tp] schedulers := o.GetScheduleConfig().Schedulers for _, s := range schedulers { - if t == s.Type { + if oldType == s.Type { return s.Disable } } @@ -690,33 +692,35 @@ func (o *PersistOptions) GetHotRegionsReservedDays() uint64 { } // AddSchedulerCfg adds the scheduler configurations. -func (o *PersistOptions) AddSchedulerCfg(tp string, args []string) { +func (o *PersistOptions) AddSchedulerCfg(tp types.CheckerSchedulerType, args []string) { + oldType := types.SchedulerTypeCompatibleMap[tp] v := o.GetScheduleConfig().Clone() for i, schedulerCfg := range v.Schedulers { // comparing args is to cover the case that there are schedulers in same type but not with same name // such as two schedulers of type "evict-leader", // one name is "evict-leader-scheduler-1" and the other is "evict-leader-scheduler-2" - if reflect.DeepEqual(schedulerCfg, sc.SchedulerConfig{Type: tp, Args: args, Disable: false}) { + if reflect.DeepEqual(schedulerCfg, sc.SchedulerConfig{Type: oldType, Args: args, Disable: false}) { return } - if reflect.DeepEqual(schedulerCfg, sc.SchedulerConfig{Type: tp, Args: args, Disable: true}) { + if reflect.DeepEqual(schedulerCfg, sc.SchedulerConfig{Type: oldType, Args: args, Disable: true}) { schedulerCfg.Disable = false v.Schedulers[i] = schedulerCfg o.SetScheduleConfig(v) return } } - v.Schedulers = append(v.Schedulers, sc.SchedulerConfig{Type: tp, Args: args, Disable: false}) + v.Schedulers = append(v.Schedulers, sc.SchedulerConfig{Type: oldType, Args: args, Disable: false}) o.SetScheduleConfig(v) } // RemoveSchedulerCfg removes the scheduler configurations. -func (o *PersistOptions) RemoveSchedulerCfg(tp string) { +func (o *PersistOptions) RemoveSchedulerCfg(tp types.CheckerSchedulerType) { + oldType := types.SchedulerTypeCompatibleMap[tp] v := o.GetScheduleConfig().Clone() for i, schedulerCfg := range v.Schedulers { - if tp == schedulerCfg.Type { - if sc.IsDefaultScheduler(tp) { + if oldType == schedulerCfg.Type { + if sc.IsDefaultScheduler(oldType) { schedulerCfg.Disable = true v.Schedulers[i] = schedulerCfg } else { diff --git a/server/forward.go b/server/forward.go index 650833e1fc1..5c49b871020 100644 --- a/server/forward.go +++ b/server/forward.go @@ -122,7 +122,7 @@ func (s *GrpcServer) forwardTSO(stream pdpb.PD_TsoServer) error { default: } - request, err := server.Recv(s.GetTSOProxyRecvFromClientTimeout()) + request, err := server.recv(s.GetTSOProxyRecvFromClientTimeout()) if err == io.EOF { return nil } @@ -189,7 +189,7 @@ func (s *GrpcServer) forwardTSO(stream pdpb.PD_TsoServer) error { Count: tsopbResp.GetCount(), Timestamp: tsopbResp.GetTimestamp(), } - if err := server.Send(response); err != nil { + if err := server.send(response); err != nil { return errors.WithStack(err) } } diff --git a/server/gc_service.go b/server/gc_service.go index db3879a74fb..8c967d542ee 100644 --- a/server/gc_service.go +++ b/server/gc_service.go @@ -205,7 +205,7 @@ func (s *GrpcServer) GetAllGCSafePointV2(ctx context.Context, request *pdpb.GetA startkey := endpoint.GCSafePointV2Prefix() endkey := clientv3.GetPrefixRangeEnd(startkey) - _, values, revision, err := s.loadRangeFromETCD(startkey, endkey) + _, values, revision, err := s.loadRangeFromEtcd(startkey, endkey) gcSafePoints := make([]*pdpb.GCSafePointV2, 0, len(values)) for _, value := range values { @@ -236,7 +236,7 @@ func (s *GrpcServer) GetAllGCSafePointV2(ctx context.Context, request *pdpb.GetA }, nil } -func (s *GrpcServer) loadRangeFromETCD(startKey, endKey string) ([]string, []string, int64, error) { +func (s *GrpcServer) loadRangeFromEtcd(startKey, endKey string) ([]string, []string, int64, error) { startKey = strings.Join([]string{s.rootPath, startKey}, "/") var opOption []clientv3.OpOption if endKey == "\x00" { diff --git a/server/grpc_service.go b/server/grpc_service.go index 7b18be47fde..fa9156e884e 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -112,7 +112,7 @@ type pdpbTSORequest struct { err error } -func (s *tsoServer) Send(m *pdpb.TsoResponse) error { +func (s *tsoServer) send(m *pdpb.TsoResponse) error { if atomic.LoadInt32(&s.closed) == 1 { return io.EOF } @@ -139,7 +139,7 @@ func (s *tsoServer) Send(m *pdpb.TsoResponse) error { } } -func (s *tsoServer) Recv(timeout time.Duration) (*pdpb.TsoRequest, error) { +func (s *tsoServer) recv(timeout time.Duration) (*pdpb.TsoRequest, error) { if atomic.LoadInt32(&s.closed) == 1 { return nil, io.EOF } @@ -176,6 +176,7 @@ type heartbeatServer struct { closed int32 } +// Send wraps Send() of PD_RegionHeartbeatServer. func (s *heartbeatServer) Send(m core.RegionHeartbeatResponse) error { if atomic.LoadInt32(&s.closed) == 1 { return io.EOF @@ -199,6 +200,7 @@ func (s *heartbeatServer) Send(m core.RegionHeartbeatResponse) error { } } +// Recv wraps Recv() of PD_RegionHeartbeatServer. func (s *heartbeatServer) Recv() (*pdpb.RegionHeartbeatRequest, error) { if atomic.LoadInt32(&s.closed) == 1 { return nil, io.EOF diff --git a/server/handler.go b/server/handler.go index cc924cf9a0b..34a78a93c3c 100644 --- a/server/handler.go +++ b/server/handler.go @@ -53,6 +53,7 @@ type server struct { *Server } +// GetCoordinator returns the coordinator. func (s *server) GetCoordinator() *schedule.Coordinator { c := s.GetRaftCluster() if c == nil { @@ -61,6 +62,7 @@ func (s *server) GetCoordinator() *schedule.Coordinator { return c.GetCoordinator() } +// GetCluster returns RaftCluster. func (s *server) GetCluster() sche.SchedulerCluster { return s.GetRaftCluster() } @@ -186,6 +188,7 @@ func (h *Handler) GetAllRequestHistoryHotRegion(request *HistoryHotRegionsReques // AddScheduler adds a scheduler. func (h *Handler) AddScheduler(tp types.CheckerSchedulerType, args ...string) error { + // TODO: remove this map in subsequent PRs, because we need use new type in the `CreateScheduler`. name := types.SchedulerTypeCompatibleMap[tp] c, err := h.GetRaftCluster() if err != nil { @@ -208,19 +211,19 @@ func (h *Handler) AddScheduler(tp types.CheckerSchedulerType, args ...string) er log.Error("can not add scheduler handler", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args), errs.ZapError(err)) return err } - log.Info("add scheduler handler successfully", zap.String("scheduler-name", name), zap.Strings("scheduler-args", args)) + log.Info("add scheduler handler successfully", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args)) } else { if err = c.AddScheduler(s, args...); err != nil { log.Error("can not add scheduler", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args), errs.ZapError(err)) return err } - log.Info("add scheduler successfully", zap.String("scheduler-name", name), zap.Strings("scheduler-args", args)) + log.Info("add scheduler successfully", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args)) } if err = h.opt.Persist(c.GetStorage()); err != nil { log.Error("can not persist scheduler config", errs.ZapError(err)) return err } - log.Info("persist scheduler config successfully", zap.String("scheduler-name", name), zap.Strings("scheduler-args", args)) + log.Info("persist scheduler config successfully", zap.String("scheduler-name", s.GetName()), zap.Strings("scheduler-args", args)) return nil } diff --git a/server/join/join.go b/server/join/join.go index 6bf111b9b45..8da90b7201b 100644 --- a/server/join/join.go +++ b/server/join/join.go @@ -17,7 +17,7 @@ package join import ( "fmt" "os" - "path" + "path/filepath" "strings" "time" @@ -90,7 +90,7 @@ func PrepareJoinCluster(cfg *config.Config) error { return errors.New("join self is forbidden") } - filePath := path.Join(cfg.DataDir, "join") + filePath := filepath.Join(cfg.DataDir, "join") // Read the persist join config if _, err := os.Stat(filePath); !os.IsNotExist(err) { s, err := os.ReadFile(filePath) @@ -104,7 +104,7 @@ func PrepareJoinCluster(cfg *config.Config) error { initialCluster := "" // Cases with data directory. - if isDataExist(path.Join(cfg.DataDir, "member")) { + if isDataExist(filepath.Join(cfg.DataDir, "member")) { cfg.InitialCluster = initialCluster cfg.InitialClusterState = embed.ClusterStateFlagExisting return nil diff --git a/server/server.go b/server/server.go index ce3c657ef46..7e0ee36278f 100644 --- a/server/server.go +++ b/server/server.go @@ -476,18 +476,16 @@ func (s *Server) startServer(ctx context.Context) error { s.tsoDispatcher = tsoutil.NewTSODispatcher(tsoProxyHandleDuration, tsoProxyBatchSize) s.tsoProtoFactory = &tsoutil.TSOProtoFactory{} s.pdProtoFactory = &tsoutil.PDProtoFactory{} - if !s.IsAPIServiceMode() { - s.tsoAllocatorManager = tso.NewAllocatorManager(s.ctx, mcs.DefaultKeyspaceGroupID, s.member, s.rootPath, s.storage, s, false) - // When disabled the Local TSO, we should clean up the Local TSO Allocator's meta info written in etcd if it exists. - if !s.cfg.EnableLocalTSO { - if err = s.tsoAllocatorManager.CleanUpDCLocation(); err != nil { - return err - } + s.tsoAllocatorManager = tso.NewAllocatorManager(s.ctx, mcs.DefaultKeyspaceGroupID, s.member, s.rootPath, s.storage, s, false) + // When disabled the Local TSO, we should clean up the Local TSO Allocator's meta info written in etcd if it exists. + if !s.cfg.EnableLocalTSO { + if err = s.tsoAllocatorManager.CleanUpDCLocation(); err != nil { + return err } - if zone, exist := s.cfg.Labels[config.ZoneLabel]; exist && zone != "" && s.cfg.EnableLocalTSO { - if err = s.tsoAllocatorManager.SetLocalTSOConfig(zone); err != nil { - return err - } + } + if zone, exist := s.cfg.Labels[config.ZoneLabel]; exist && zone != "" && s.cfg.EnableLocalTSO { + if err = s.tsoAllocatorManager.SetLocalTSOConfig(zone); err != nil { + return err } } diff --git a/server/server_test.go b/server/server_test.go index b2b15962fdc..410afda448d 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -280,7 +280,7 @@ func TestIsPathInDirectory(t *testing.T) { path := filepath.Join(directory, fileName) re.True(isPathInDirectory(path, directory)) - fileName = "../../test" + fileName = filepath.Join("..", "..", "test") path = filepath.Join(directory, fileName) re.False(isPathInDirectory(path, directory)) } diff --git a/server/testutil.go b/server/testutil.go index 5f817d47016..be6b2bbebb0 100644 --- a/server/testutil.go +++ b/server/testutil.go @@ -78,7 +78,7 @@ func NewTestSingleConfig(c *assertutil.Checker) *config.Config { cfg.AdvertiseClientUrls = cfg.ClientUrls cfg.AdvertisePeerUrls = cfg.PeerUrls - cfg.DataDir, _ = os.MkdirTemp("/tmp", "test_pd") + cfg.DataDir, _ = os.MkdirTemp(os.TempDir(), "test_pd") cfg.InitialCluster = fmt.Sprintf("pd=%s", cfg.PeerUrls) cfg.DisableStrictReconfigCheck = true cfg.TickInterval = typeutil.NewDuration(100 * time.Millisecond) diff --git a/tests/config.go b/tests/config.go index 4f653a3dc3c..a162a02009c 100644 --- a/tests/config.go +++ b/tests/config.go @@ -36,7 +36,7 @@ type serverConfig struct { } func newServerConfig(name string, cc *clusterConfig, join bool) *serverConfig { - tempDir, _ := os.MkdirTemp("/tmp", "pd-tests") + tempDir, _ := os.MkdirTemp(os.TempDir(), "pd-tests") return &serverConfig{ Name: name, DataDir: tempDir, diff --git a/tests/integrations/client/client_tls_test.go b/tests/integrations/client/client_tls_test.go index a5f0f5b200d..091fea2a4c8 100644 --- a/tests/integrations/client/client_tls_test.go +++ b/tests/integrations/client/client_tls_test.go @@ -37,25 +37,25 @@ import ( ) var ( - certPath = "./cert" - certExpiredPath = "./cert-expired" - certScript = "./cert_opt.sh" + certPath = strings.Join([]string{".", "cert"}, string(filepath.Separator)) + certExpiredPath = strings.Join([]string{".", "cert-expired"}, string(filepath.Separator)) + certScript = strings.Join([]string{".", "cert_opt.sh"}, string(filepath.Separator)) testTLSInfo = transport.TLSInfo{ - KeyFile: "./cert/pd-server-key.pem", - CertFile: "./cert/pd-server.pem", - TrustedCAFile: "./cert/ca.pem", + KeyFile: strings.Join([]string{".", "cert", "pd-server-key.pem"}, string(filepath.Separator)), + CertFile: strings.Join([]string{".", "cert", "pd-server.pem"}, string(filepath.Separator)), + TrustedCAFile: strings.Join([]string{".", "cert", "ca.pem"}, string(filepath.Separator)), } testClientTLSInfo = transport.TLSInfo{ - KeyFile: "./cert/client-key.pem", - CertFile: "./cert/client.pem", - TrustedCAFile: "./cert/ca.pem", + KeyFile: strings.Join([]string{".", "cert", "client-key.pem"}, string(filepath.Separator)), + CertFile: strings.Join([]string{".", "cert", "client.pem"}, string(filepath.Separator)), + TrustedCAFile: strings.Join([]string{".", "cert", "ca.pem"}, string(filepath.Separator)), } testTLSInfoExpired = transport.TLSInfo{ - KeyFile: "./cert-expired/pd-server-key.pem", - CertFile: "./cert-expired/pd-server.pem", - TrustedCAFile: "./cert-expired/ca.pem", + KeyFile: strings.Join([]string{".", "cert-expired", "pd-server-key.pem"}, string(filepath.Separator)), + CertFile: strings.Join([]string{".", "cert-expired", "pd-server.pem"}, string(filepath.Separator)), + TrustedCAFile: strings.Join([]string{".", "cert-expired", "ca.pem"}, string(filepath.Separator)), } ) diff --git a/tests/integrations/client/http_client_test.go b/tests/integrations/client/http_client_test.go index 7b51d9917ad..fd8b65f01ba 100644 --- a/tests/integrations/client/http_client_test.go +++ b/tests/integrations/client/http_client_test.go @@ -440,6 +440,10 @@ func (suite *httpClientTestSuite) TestRegionLabel() { re.Equal(labelRule.ID, allLabelRules[1].ID) re.Equal(labelRule.Labels, allLabelRules[1].Labels) re.Equal(labelRule.RuleType, allLabelRules[1].RuleType) + labelRules, err = client.GetRegionLabelRulesByIDs(ctx, []string{"rule2"}) + re.NoError(err) + re.Len(labelRules, 1) + re.Equal(labelRule, labelRules[0]) labelRules, err = client.GetRegionLabelRulesByIDs(ctx, []string{"keyspaces/0", "rule2"}) re.NoError(err) sort.Slice(labelRules, func(i, j int) bool { diff --git a/tests/server/cluster/cluster_test.go b/tests/server/cluster/cluster_test.go index 5af750a3c2c..c95aa50cb3d 100644 --- a/tests/server/cluster/cluster_test.go +++ b/tests/server/cluster/cluster_test.go @@ -18,7 +18,9 @@ import ( "context" "fmt" "math" + "os" "strconv" + "strings" "sync" "testing" "time" @@ -1819,3 +1821,55 @@ func TestExternalTimestamp(t *testing.T) { re.Equal(ts, resp4.GetTimestamp()) } } + +func TestPatrolRegionConfigChange(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + tc, err := tests.NewTestCluster(ctx, 1) + defer tc.Destroy() + re.NoError(err) + err = tc.RunInitialServers() + re.NoError(err) + tc.WaitLeader() + leaderServer := tc.GetLeaderServer() + re.NoError(leaderServer.BootstrapCluster()) + for i := 1; i <= 3; i++ { + store := &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + LastHeartbeat: time.Now().UnixNano(), + } + tests.MustPutStore(re, tc, store) + } + for i := 1; i <= 200; i++ { + startKey := []byte(fmt.Sprintf("%d", i*2-1)) + endKey := []byte(fmt.Sprintf("%d", i*2)) + tests.MustPutRegion(re, tc, uint64(i), uint64(i%3+1), startKey, endKey) + } + fname := testutil.InitTempFileLogger("debug") + defer os.RemoveAll(fname) + checkLog(re, fname, "coordinator starts patrol regions") + + // test change patrol region interval + schedule := leaderServer.GetConfig().Schedule + schedule.PatrolRegionInterval = typeutil.NewDuration(99 * time.Millisecond) + leaderServer.GetServer().SetScheduleConfig(schedule) + checkLog(re, fname, "starts patrol regions with new interval") + + // test change schedule halt + schedule = leaderServer.GetConfig().Schedule + schedule.HaltScheduling = true + leaderServer.GetServer().SetScheduleConfig(schedule) + checkLog(re, fname, "skip patrol regions due to scheduling is halted") +} + +func checkLog(re *require.Assertions, fname, expect string) { + testutil.Eventually(re, func() bool { + b, _ := os.ReadFile(fname) + l := string(b) + return strings.Contains(l, expect) + }) + os.Truncate(fname, 0) +} diff --git a/tests/server/join/join_test.go b/tests/server/join/join_test.go index a9d2a69c5e4..1eaa59a4e2a 100644 --- a/tests/server/join/join_test.go +++ b/tests/server/join/join_test.go @@ -17,7 +17,7 @@ package join_test import ( "context" "os" - "path" + "path/filepath" "testing" "time" @@ -56,7 +56,7 @@ func TestSimpleJoin(t *testing.T) { re.NoError(err) err = pd2.Run() re.NoError(err) - _, err = os.Stat(path.Join(pd2.GetConfig().DataDir, "join")) + _, err = os.Stat(filepath.Join(pd2.GetConfig().DataDir, "join")) re.False(os.IsNotExist(err)) members, err = etcdutil.ListEtcdMembers(ctx, client) re.NoError(err) @@ -71,7 +71,7 @@ func TestSimpleJoin(t *testing.T) { re.NoError(err) err = pd3.Run() re.NoError(err) - _, err = os.Stat(path.Join(pd3.GetConfig().DataDir, "join")) + _, err = os.Stat(filepath.Join(pd3.GetConfig().DataDir, "join")) re.False(os.IsNotExist(err)) members, err = etcdutil.ListEtcdMembers(ctx, client) re.NoError(err) diff --git a/tests/testutil.go b/tests/testutil.go index 2fc87298d07..c895d206c05 100644 --- a/tests/testutil.go +++ b/tests/testutil.go @@ -110,6 +110,7 @@ func StartSingleResourceManagerTestServer(ctx context.Context, re *require.Asser cfg := rm.NewConfig() cfg.BackendEndpoints = backendEndpoints cfg.ListenAddr = listenAddrs + cfg.Name = cfg.ListenAddr cfg, err := rm.GenerateConfig(cfg) re.NoError(err) @@ -127,6 +128,7 @@ func StartSingleTSOTestServerWithoutCheck(ctx context.Context, re *require.Asser cfg := tso.NewConfig() cfg.BackendEndpoints = backendEndpoints cfg.ListenAddr = listenAddrs + cfg.Name = cfg.ListenAddr cfg, err := tso.GenerateConfig(cfg) re.NoError(err) // Setup the logger. @@ -164,6 +166,7 @@ func StartSingleSchedulingTestServer(ctx context.Context, re *require.Assertions cfg := sc.NewConfig() cfg.BackendEndpoints = backendEndpoints cfg.ListenAddr = listenAddrs + cfg.Name = cfg.ListenAddr cfg, err := scheduling.GenerateConfig(cfg) re.NoError(err) diff --git a/tools/pd-api-bench/cases/cases.go b/tools/pd-api-bench/cases/cases.go index 18d5c8732e6..6ffa429fdb7 100644 --- a/tools/pd-api-bench/cases/cases.go +++ b/tools/pd-api-bench/cases/cases.go @@ -117,18 +117,18 @@ func (c *baseCase) GetConfig() *Config { return c.cfg.Clone() } -// ETCDCase is the interface for all etcd api cases. -type ETCDCase interface { +// EtcdCase is the interface for all etcd api cases. +type EtcdCase interface { Case Init(context.Context, *clientv3.Client) error Unary(context.Context, *clientv3.Client) error } -// ETCDCreateFn is function type to create ETCDCase. -type ETCDCreateFn func() ETCDCase +// EtcdCreateFn is function type to create EtcdCase. +type EtcdCreateFn func() EtcdCase -// ETCDCaseFnMap is the map for all ETCD case creation function. -var ETCDCaseFnMap = map[string]ETCDCreateFn{ +// EtcdCaseFnMap is the map for all etcd case creation function. +var EtcdCaseFnMap = map[string]EtcdCreateFn{ "Get": newGetKV(), "Put": newPutKV(), "Delete": newDeleteKV(), @@ -440,8 +440,8 @@ type getKV struct { *baseCase } -func newGetKV() func() ETCDCase { - return func() ETCDCase { +func newGetKV() func() EtcdCase { + return func() EtcdCase { return &getKV{ baseCase: &baseCase{ name: "Get", @@ -470,8 +470,8 @@ type putKV struct { *baseCase } -func newPutKV() func() ETCDCase { - return func() ETCDCase { +func newPutKV() func() EtcdCase { + return func() EtcdCase { return &putKV{ baseCase: &baseCase{ name: "Put", @@ -492,8 +492,8 @@ type deleteKV struct { *baseCase } -func newDeleteKV() func() ETCDCase { - return func() ETCDCase { +func newDeleteKV() func() EtcdCase { + return func() EtcdCase { return &deleteKV{ baseCase: &baseCase{ name: "Put", @@ -514,8 +514,8 @@ type txnKV struct { *baseCase } -func newTxnKV() func() ETCDCase { - return func() ETCDCase { +func newTxnKV() func() EtcdCase { + return func() EtcdCase { return &txnKV{ baseCase: &baseCase{ name: "Put", diff --git a/tools/pd-api-bench/cases/controller.go b/tools/pd-api-bench/cases/controller.go index 42eeafe4597..dc48b5280cf 100644 --- a/tools/pd-api-bench/cases/controller.go +++ b/tools/pd-api-bench/cases/controller.go @@ -77,8 +77,8 @@ func (c *Coordinator) GetGRPCCase(name string) (*Config, error) { return nil, errors.Errorf("case %v does not exist", name) } -// GetETCDCase returns the etcd case config. -func (c *Coordinator) GetETCDCase(name string) (*Config, error) { +// GetEtcdCase returns the etcd case config. +func (c *Coordinator) GetEtcdCase(name string) (*Config, error) { c.mu.RLock() defer c.mu.RUnlock() if controller, ok := c.etcd[name]; ok { @@ -109,8 +109,8 @@ func (c *Coordinator) GetAllGRPCCases() map[string]*Config { return ret } -// GetAllETCDCases returns the all etcd case configs. -func (c *Coordinator) GetAllETCDCases() map[string]*Config { +// GetAllEtcdCases returns the all etcd case configs. +func (c *Coordinator) GetAllEtcdCases() map[string]*Config { c.mu.RLock() defer c.mu.RUnlock() ret := make(map[string]*Config) @@ -164,11 +164,11 @@ func (c *Coordinator) SetGRPCCase(name string, cfg *Config) error { return nil } -// SetETCDCase sets the config for the specific case. -func (c *Coordinator) SetETCDCase(name string, cfg *Config) error { +// SetEtcdCase sets the config for the specific case. +func (c *Coordinator) SetEtcdCase(name string, cfg *Config) error { c.mu.Lock() defer c.mu.Unlock() - if fn, ok := ETCDCaseFnMap[name]; ok { + if fn, ok := EtcdCaseFnMap[name]; ok { var controller *etcdController if controller, ok = c.etcd[name]; !ok { controller = newEtcdController(c.ctx, c.etcdClients, fn) @@ -324,7 +324,7 @@ func (c *gRPCController) stop() { } type etcdController struct { - ETCDCase + EtcdCase clients []*clientv3.Client pctx context.Context @@ -334,11 +334,11 @@ type etcdController struct { wg sync.WaitGroup } -func newEtcdController(ctx context.Context, clis []*clientv3.Client, fn ETCDCreateFn) *etcdController { +func newEtcdController(ctx context.Context, clis []*clientv3.Client, fn EtcdCreateFn) *etcdController { c := &etcdController{ pctx: ctx, clients: clis, - ETCDCase: fn(), + EtcdCase: fn(), } return c } diff --git a/tools/pd-api-bench/config/config.go b/tools/pd-api-bench/config/config.go index 35377c12f33..0320665c29b 100644 --- a/tools/pd-api-bench/config/config.go +++ b/tools/pd-api-bench/config/config.go @@ -44,7 +44,7 @@ type Config struct { // only for init HTTP map[string]cases.Config `toml:"http" json:"http"` GRPC map[string]cases.Config `toml:"grpc" json:"grpc"` - ETCD map[string]cases.Config `toml:"etcd" json:"etcd"` + Etcd map[string]cases.Config `toml:"etcd" json:"etcd"` } // NewConfig return a set of settings. @@ -109,9 +109,9 @@ func (c *Config) InitCoordinator(co *cases.Coordinator) { log.Error("create gRPC case failed", zap.Error(err)) } } - for name, cfg := range c.ETCD { + for name, cfg := range c.Etcd { cfg := cfg - err := co.SetETCDCase(name, &cfg) + err := co.SetEtcdCase(name, &cfg) if err != nil { log.Error("create etcd case failed", zap.Error(err)) } diff --git a/tools/pd-api-bench/main.go b/tools/pd-api-bench/main.go index 747fbaa10c1..78bec1e1d01 100644 --- a/tools/pd-api-bench/main.go +++ b/tools/pd-api-bench/main.go @@ -292,14 +292,14 @@ func runHTTPServer(cfg *config.Config, co *cases.Coordinator) { } for name, cfg := range input { cfg := cfg - co.SetETCDCase(name, &cfg) + co.SetEtcdCase(name, &cfg) } c.String(http.StatusOK, "") }) engine.POST("config/etcd/:name", func(c *gin.Context) { name := c.Param("name") cfg := getCfg(c) - co.SetETCDCase(name, cfg) + co.SetEtcdCase(name, cfg) c.String(http.StatusOK, "") }) @@ -330,12 +330,12 @@ func runHTTPServer(cfg *config.Config, co *cases.Coordinator) { c.IndentedJSON(http.StatusOK, cfg) }) engine.GET("config/etcd/all", func(c *gin.Context) { - all := co.GetAllETCDCases() + all := co.GetAllEtcdCases() c.IndentedJSON(http.StatusOK, all) }) engine.GET("config/etcd/:name", func(c *gin.Context) { name := c.Param("name") - cfg, err := co.GetETCDCase(name) + cfg, err := co.GetEtcdCase(name) if err != nil { c.String(http.StatusBadRequest, err.Error()) return diff --git a/tools/pd-backup/pdbackup/backup_test.go b/tools/pd-backup/pdbackup/backup_test.go index 0ab9116ddbe..b67873baf8e 100644 --- a/tools/pd-backup/pdbackup/backup_test.go +++ b/tools/pd-backup/pdbackup/backup_test.go @@ -10,6 +10,7 @@ import ( "net/http/httptest" "os" "path" + "path/filepath" "strconv" "testing" "time" @@ -68,7 +69,7 @@ func setupServer() (*httptest.Server, *config.Config) { AdvertiseClientUrls: "example.com:2380", AdvertisePeerUrls: "example.com:2380", Name: "test-svc", - DataDir: "/data", + DataDir: string(filepath.Separator) + "data", ForceNewCluster: true, EnableGRPCGateway: true, InitialCluster: "pd1=http://127.0.0.1:10208", diff --git a/tools/pd-ctl/pdctl/command/global_test.go b/tools/pd-ctl/pdctl/command/global_test.go index 86eb4366d04..0d1cf74ac74 100644 --- a/tools/pd-ctl/pdctl/command/global_test.go +++ b/tools/pd-ctl/pdctl/command/global_test.go @@ -16,6 +16,7 @@ package command import ( "os" "os/exec" + "path/filepath" "testing" "github.com/spf13/cobra" @@ -30,16 +31,16 @@ func TestParseTLSConfig(t *testing.T) { Short: "Placement Driver control", SilenceErrors: true, } - certPath := "../../tests/cert" - rootCmd.Flags().String("cacert", certPath+"/ca.pem", "path of file that contains list of trusted SSL CAs") - rootCmd.Flags().String("cert", certPath+"/client.pem", "path of file that contains X509 certificate in PEM format") - rootCmd.Flags().String("key", certPath+"/client-key.pem", "path of file that contains X509 key in PEM format") + certPath := filepath.Join("..", "..", "tests", "cert") + rootCmd.Flags().String("cacert", filepath.Join(certPath, "ca.pem"), "path of file that contains list of trusted SSL CAs") + rootCmd.Flags().String("cert", filepath.Join(certPath, "client.pem"), "path of file that contains X509 certificate in PEM format") + rootCmd.Flags().String("key", filepath.Join(certPath, "client-key.pem"), "path of file that contains X509 key in PEM format") // generate certs if err := os.Mkdir(certPath, 0755); err != nil { t.Fatal(err) } - certScript := "../../tests/cert_opt.sh" + certScript := filepath.Join("..", "..", "tests", "cert_opt.sh") if err := exec.Command(certScript, "generate", certPath).Run(); err != nil { t.Fatal(err) } diff --git a/tools/pd-ctl/tests/config/config_test.go b/tools/pd-ctl/tests/config/config_test.go index cd77104f01f..f3c261e1f49 100644 --- a/tools/pd-ctl/tests/config/config_test.go +++ b/tools/pd-ctl/tests/config/config_test.go @@ -181,9 +181,11 @@ func (suite *configTestSuite) checkConfig(cluster *pdTests.TestCluster) { scheduleConfig.MaxMergeRegionKeys = scheduleConfig.GetMaxMergeRegionKeys() re.Equal(scheduleConfig, &scheduleCfg) - re.Equal(20, int(svr.GetScheduleConfig().MaxMergeRegionSize)) + // After https://github.com/tikv/tikv/issues/17309, the default value is enlarged from 20 to 54, + // to make it compatible with the default value of region size of tikv. + re.Equal(54, int(svr.GetScheduleConfig().MaxMergeRegionSize)) re.Equal(0, int(svr.GetScheduleConfig().MaxMergeRegionKeys)) - re.Equal(20*10000, int(svr.GetScheduleConfig().GetMaxMergeRegionKeys())) + re.Equal(54*10000, int(svr.GetScheduleConfig().GetMaxMergeRegionKeys())) // set max-merge-region-size to 40MB args = []string{"-u", pdAddr, "config", "set", "max-merge-region-size", "40"} @@ -357,7 +359,7 @@ func (suite *configTestSuite) checkConfigForwardControl(cluster *pdTests.TestClu leaderServer := cluster.GetLeaderServer() pdAddr := leaderServer.GetAddr() - f, _ := os.CreateTemp("/tmp", "pd_tests") + f, _ := os.CreateTemp(os.TempDir(), "pd_tests") fname := f.Name() f.Close() defer os.RemoveAll(fname) @@ -570,7 +572,7 @@ func (suite *configTestSuite) checkPlacementRules(cluster *pdTests.TestCluster) // test show checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) - f, _ := os.CreateTemp("/tmp", "pd_tests") + f, _ := os.CreateTemp(os.TempDir(), "pd_tests") fname := f.Name() f.Close() defer os.RemoveAll(fname) @@ -717,7 +719,7 @@ func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestClus re.NoError(json.Unmarshal(output, &bundle)) re.Equal(placement.GroupBundle{ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, bundle) - f, err := os.CreateTemp("/tmp", "pd_tests") + f, err := os.CreateTemp(os.TempDir(), "pd_tests") re.NoError(err) fname := f.Name() f.Close() diff --git a/tools/pd-ctl/tests/health/health_test.go b/tools/pd-ctl/tests/health/health_test.go index f1d3c7cfbf1..1d6cf884ccf 100644 --- a/tools/pd-ctl/tests/health/health_test.go +++ b/tools/pd-ctl/tests/health/health_test.go @@ -80,8 +80,8 @@ func TestHealthTLS(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - certPath := "../cert" - certScript := "../cert_opt.sh" + certPath := filepath.Join("..", "cert") + certScript := filepath.Join("..", "cert_opt.sh") // generate certs if err := os.Mkdir(certPath, 0755); err != nil { t.Fatal(err) @@ -143,9 +143,9 @@ func TestHealthTLS(t *testing.T) { pdAddr := tc.GetConfig().GetClientURL() pdAddr = strings.ReplaceAll(pdAddr, "http", "https") args := []string{"-u", pdAddr, "health", - "--cacert=../cert/ca.pem", - "--cert=../cert/client.pem", - "--key=../cert/client-key.pem"} + "--cacert=" + filepath.Join("..", "cert", "ca.pem"), + "--cert=" + filepath.Join("..", "cert", "client.pem"), + "--key=" + filepath.Join("..", "cert", "client-key.pem")} output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) h := make([]api.Health, len(healths)) diff --git a/tools/pd-ctl/tests/region/region_test.go b/tools/pd-ctl/tests/region/region_test.go index afffba411bc..49f1eaa0a58 100644 --- a/tools/pd-ctl/tests/region/region_test.go +++ b/tools/pd-ctl/tests/region/region_test.go @@ -142,7 +142,7 @@ func TestRegion(t *testing.T) { // region check empty-region command {[]string{"region", "check", "empty-region"}, []*core.RegionInfo{r1}}, // region check undersized-region command - {[]string{"region", "check", "undersized-region"}, []*core.RegionInfo{r1, r4}}, + {[]string{"region", "check", "undersized-region"}, []*core.RegionInfo{r1, r3, r4}}, // region check oversized-region command {[]string{"region", "check", "oversized-region"}, []*core.RegionInfo{r2}}, // region keys --format=raw command diff --git a/tools/pd-ctl/tests/store/store_test.go b/tools/pd-ctl/tests/store/store_test.go index 2e1e7ac9444..ae35839837e 100644 --- a/tools/pd-ctl/tests/store/store_test.go +++ b/tools/pd-ctl/tests/store/store_test.go @@ -595,8 +595,8 @@ func TestStoreTLS(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() - certPath := "../cert" - certScript := "../cert_opt.sh" + certPath := filepath.Join("..", "cert") + certScript := filepath.Join("..", "cert_opt.sh") // generate certs if err := os.Mkdir(certPath, 0755); err != nil { t.Fatal(err) @@ -674,9 +674,9 @@ func TestStoreTLS(t *testing.T) { pdAddr = strings.ReplaceAll(pdAddr, "http", "https") // store command args := []string{"-u", pdAddr, "store", - "--cacert=../cert/ca.pem", - "--cert=../cert/client.pem", - "--key=../cert/client-key.pem"} + "--cacert=" + filepath.Join("..", "cert", "ca.pem"), + "--cert=" + filepath.Join("..", "cert", "client.pem"), + "--key=" + filepath.Join("..", "cert", "client-key.pem")} output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) storesInfo := new(response.StoresInfo) diff --git a/tools/pd-simulator/simulator/config/config.go b/tools/pd-simulator/simulator/config/config.go index 030655bd3dc..4d182a2a03f 100644 --- a/tools/pd-simulator/simulator/config/config.go +++ b/tools/pd-simulator/simulator/config/config.go @@ -98,7 +98,7 @@ func NewSimConfig(serverLogLevel string) *SimConfig { cfg.AdvertiseClientUrls = cfg.ClientUrls cfg.AdvertisePeerUrls = cfg.PeerUrls - cfg.DataDir, _ = os.MkdirTemp("/tmp", "test_pd") + cfg.DataDir, _ = os.MkdirTemp(os.TempDir(), "test_pd") cfg.InitialCluster = fmt.Sprintf("pd=%s", cfg.PeerUrls) cfg.Log.Level = serverLogLevel return &SimConfig{ServerConfig: cfg} diff --git a/tools/pd-ut/coverProfile.go b/tools/pd-ut/coverProfile.go index 0ed1c3f3c61..75410f4b707 100644 --- a/tools/pd-ut/coverProfile.go +++ b/tools/pd-ut/coverProfile.go @@ -18,7 +18,7 @@ import ( "bufio" "fmt" "os" - "path" + "path/filepath" "sort" "golang.org/x/tools/cover" @@ -70,7 +70,7 @@ func collectCoverProfileFile() { } func collectOneCoverProfileFile(result map[string]*cover.Profile, file os.DirEntry) { - f, err := os.Open(path.Join(coverFileTempDir, file.Name())) + f, err := os.Open(filepath.Join(coverFileTempDir, file.Name())) if err != nil { fmt.Println("open temp cover file error:", err) os.Exit(-1) diff --git a/tools/pd-ut/ut.go b/tools/pd-ut/ut.go index dcf0c17c686..6b366a137a2 100644 --- a/tools/pd-ut/ut.go +++ b/tools/pd-ut/ut.go @@ -25,7 +25,7 @@ import ( "math/rand" "os" "os/exec" - "path" + "path/filepath" "regexp" "runtime" "strconv" @@ -92,8 +92,8 @@ go tool cover --func=xxx` } var ( - modulePath = "github.com/tikv/pd" - integrationsTestPath = "tests/integrations" + modulePath = filepath.Join("github.com", "tikv", "pd") + integrationsTestPath = filepath.Join("tests", "integrations") ) var ( @@ -171,8 +171,8 @@ func main() { case "it": // run integration tests if len(os.Args) >= 3 { - modulePath = path.Join(modulePath, integrationsTestPath) - workDir = path.Join(workDir, integrationsTestPath) + modulePath = filepath.Join(modulePath, integrationsTestPath) + workDir = filepath.Join(workDir, integrationsTestPath) switch os.Args[2] { case "run": isSucceed = cmdRun(os.Args[3:]...) @@ -414,7 +414,7 @@ func runExistingTestCases(pkgs []string) (tasks []task, err error) { wg := &sync.WaitGroup{} tasksChannel := make(chan []task, len(pkgs)) for _, pkg := range pkgs { - _, ok := existPkgs[fmt.Sprintf("%s/%s", modulePath, pkg)] + _, ok := existPkgs[filepath.Join(modulePath, pkg)] if !ok { fmt.Println("no test case in ", pkg) continue @@ -528,7 +528,8 @@ func filterTestCases(tasks []task, arg1 string) ([]task, error) { } func listPackages() ([]string, error) { - cmd := exec.Command("go", "list", "./...") + listPath := strings.Join([]string{".", "..."}, string(filepath.Separator)) + cmd := exec.Command("go", "list", listPath) cmd.Dir = workDir ss, err := cmdToLines(cmd) if err != nil { @@ -576,7 +577,7 @@ type testResult struct { func (n *numa) runTestCase(pkg string, fn string) testResult { res := testResult{ JUnitTestCase: JUnitTestCase{ - ClassName: path.Join(modulePath, pkg), + ClassName: filepath.Join(modulePath, pkg), Name: fn, }, } @@ -586,7 +587,7 @@ func (n *numa) runTestCase(pkg string, fn string) testResult { var start time.Time for i := 0; i < 3; i++ { cmd := n.testCommand(pkg, fn) - cmd.Dir = path.Join(workDir, pkg) + cmd.Dir = filepath.Join(workDir, pkg) // Combine the test case output, so the run result for failed cases can be displayed. cmd.Stdout = &buf cmd.Stderr = &buf @@ -675,10 +676,10 @@ func (*numa) testCommand(pkg string, fn string) *exec.Cmd { args := make([]string, 0, 10) // let the test run in the verbose mode. args = append(args, "-test.v") - exe := "./" + testFileName(pkg) + exe := strings.Join([]string{".", testFileName(pkg)}, string(filepath.Separator)) if coverProfile != "" { - fileName := strings.ReplaceAll(pkg, "/", "_") + "." + fn - tmpFile := path.Join(coverFileTempDir, fileName) + fileName := strings.ReplaceAll(pkg, string(filepath.Separator), "_") + "." + fn + tmpFile := filepath.Join(coverFileTempDir, fileName) args = append(args, "-test.coverprofile", tmpFile) } if strings.Contains(fn, "Suite") { @@ -720,12 +721,12 @@ func generateBuildCache() error { fmt.Println("generate build cache") // cd cmd/pd-server && go test -tags=tso_function_test,deadlock -exec-=true -vet=off -toolexec=go-compile-without-link cmd := exec.Command("go", "test", "-exec=true", "-vet", "off", "--tags=tso_function_test,deadlock") - goCompileWithoutLink := fmt.Sprintf("-toolexec=%s/tools/pd-ut/go-compile-without-link.sh", workDir) - cmd.Dir = fmt.Sprintf("%s/cmd/pd-server", workDir) + goCompileWithoutLink := fmt.Sprintf("-toolexec=%s", filepath.Join(workDir, "tools", "pd-ut", "go-compile-without-link.sh")) + cmd.Dir = filepath.Join(workDir, "cmd", "pd-server") if strings.Contains(workDir, integrationsTestPath) { - cmd.Dir = fmt.Sprintf("%s/cmd/pd-server", workDir[:strings.LastIndex(workDir, integrationsTestPath)]) - goCompileWithoutLink = fmt.Sprintf("-toolexec=%s/tools/pd-ut/go-compile-without-link.sh", - workDir[:strings.LastIndex(workDir, integrationsTestPath)]) + cmd.Dir = filepath.Join(workDir[:strings.LastIndex(workDir, integrationsTestPath)], "cmd", "pd-server") + goCompileWithoutLink = fmt.Sprintf("-toolexec=%s", filepath.Join(workDir[:strings.LastIndex(workDir, integrationsTestPath)], + "tools", "pd-ut", "go-compile-without-link.sh")) } cmd.Args = append(cmd.Args, goCompileWithoutLink) cmd.Stdout = os.Stdout @@ -746,24 +747,24 @@ func buildTestBinaryMulti(pkgs []string) ([]byte, error) { // go test --exec=xprog --tags=tso_function_test,deadlock -vet=off --count=0 $(pkgs) // workPath just like `/pd/tests/integrations` - xprogPath := path.Join(workDir, "bin/xprog") + xprogPath := filepath.Join(workDir, "bin", "xprog") if strings.Contains(workDir, integrationsTestPath) { - xprogPath = path.Join(workDir[:strings.LastIndex(workDir, integrationsTestPath)], "bin/xprog") + xprogPath = filepath.Join(workDir[:strings.LastIndex(workDir, integrationsTestPath)], "bin", "xprog") } packages := make([]string, 0, len(pkgs)) for _, pkg := range pkgs { - packages = append(packages, path.Join(modulePath, pkg)) + packages = append(packages, filepath.Join(modulePath, pkg)) } // We use 2 * parallel for `go build` to make it faster. p := strconv.Itoa(parallel * 2) cmd := exec.Command("go", "test", "-p", p, "--exec", xprogPath, "-vet", "off", "--tags=tso_function_test,deadlock") if coverProfile != "" { - coverpkg := "./..." + coverPkg := strings.Join([]string{".", "..."}, string(filepath.Separator)) if strings.Contains(workDir, integrationsTestPath) { - coverpkg = "../../..." + coverPkg = filepath.Join("..", "..", "...") } - cmd.Args = append(cmd.Args, "-cover", fmt.Sprintf("-coverpkg=%s", coverpkg)) + cmd.Args = append(cmd.Args, "-cover", fmt.Sprintf("-coverpkg=%s", coverPkg)) } cmd.Args = append(cmd.Args, packages...) if race { @@ -794,12 +795,13 @@ func buildTestBinary(pkg string) error { //nolint:gosec cmd := exec.Command("go", "test", "-c", "-vet", "off", "--tags=tso_function_test,deadlock", "-o", testFileName(pkg), "-v") if coverProfile != "" { - cmd.Args = append(cmd.Args, "-cover", "-coverpkg=./...") + coverPkg := strings.Join([]string{".", "..."}, string(filepath.Separator)) + cmd.Args = append(cmd.Args, "-cover", fmt.Sprintf("-coverpkg=%s", coverPkg)) } if race { cmd.Args = append(cmd.Args, "-race") } - cmd.Dir = path.Join(workDir, pkg) + cmd.Dir = filepath.Join(workDir, pkg) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { @@ -820,20 +822,19 @@ func testBinaryExist(pkg string) (bool, error) { } func testFileName(pkg string) string { - _, file := path.Split(pkg) + _, file := filepath.Split(pkg) return file + ".test.bin" } func testFileFullPath(pkg string) string { - return path.Join(workDir, pkg, testFileName(pkg)) + return filepath.Join(workDir, pkg, testFileName(pkg)) } func listNewTestCases(pkg string) []string { - exe := "./" + testFileName(pkg) - + exe := strings.Join([]string{".", testFileName(pkg)}, string(filepath.Separator)) // core.test -test.list Test cmd := exec.Command(exe, "-test.list", "Test") - cmd.Dir = path.Join(workDir, pkg) + cmd.Dir = filepath.Join(workDir, pkg) var buf bytes.Buffer cmd.Stdout = &buf err := cmd.Run() diff --git a/tools/pd-ut/xprog.go b/tools/pd-ut/xprog.go index cf3e9b295e2..4a593be8f31 100644 --- a/tools/pd-ut/xprog.go +++ b/tools/pd-ut/xprog.go @@ -34,7 +34,7 @@ func main() { // Extract the current work directory cwd := os.Args[0] - cwd = cwd[:len(cwd)-len("bin/xprog")] + cwd = cwd[:len(cwd)-len(filepath.Join("bin", "xprog"))] testBinaryPath := os.Args[1] dir, _ := filepath.Split(testBinaryPath) @@ -42,7 +42,7 @@ func main() { // Extract the package info from /tmp/go-build2662369829/b1382/importcfg.link pkg := getPackageInfo(dir) - const prefix = "github.com/tikv/pd/" + var prefix = filepath.Join("github.com", "tikv", "pd") if !strings.HasPrefix(pkg, prefix) { os.Exit(-3) }