diff --git a/pkg/schedule/schedulers/evict_leader.go b/pkg/schedule/schedulers/evict_leader.go new file mode 100644 index 00000000000..d3c9e1ebff2 --- /dev/null +++ b/pkg/schedule/schedulers/evict_leader.go @@ -0,0 +1,487 @@ +// Copyright 2017 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schedulers + +import ( + "net/http" + "strconv" + + "github.com/gorilla/mux" + "github.com/pingcap/errors" + "github.com/pingcap/log" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/core/constant" + "github.com/tikv/pd/pkg/errs" + sche "github.com/tikv/pd/pkg/schedule/core" + "github.com/tikv/pd/pkg/schedule/filter" + "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/plan" + "github.com/tikv/pd/pkg/schedule/types" + "github.com/tikv/pd/pkg/utils/apiutil" + "github.com/tikv/pd/pkg/utils/syncutil" + "github.com/unrolled/render" + "go.uber.org/zap" +) + +const ( + // EvictLeaderBatchSize is the number of operators to transfer + // leaders by one scheduling + EvictLeaderBatchSize = 3 + lastStoreDeleteInfo = "The last store has been deleted" +) + +type evictLeaderSchedulerConfig struct { + syncutil.RWMutex + schedulerConfig + + StoreIDWithRanges map[uint64][]core.KeyRange `json:"store-id-ranges"` + // Batch is used to generate multiple operators by one scheduling + Batch int `json:"batch"` + cluster *core.BasicCluster + removeSchedulerCb func(string) error +} + +func (conf *evictLeaderSchedulerConfig) getStores() []uint64 { + conf.RLock() + defer conf.RUnlock() + stores := make([]uint64, 0, len(conf.StoreIDWithRanges)) + for storeID := range conf.StoreIDWithRanges { + stores = append(stores, storeID) + } + return stores +} + +func (conf *evictLeaderSchedulerConfig) getBatch() int { + conf.RLock() + defer conf.RUnlock() + return conf.Batch +} + +func (conf *evictLeaderSchedulerConfig) clone() *evictLeaderSchedulerConfig { + conf.RLock() + defer conf.RUnlock() + storeIDWithRanges := make(map[uint64][]core.KeyRange) + for id, ranges := range conf.StoreIDWithRanges { + storeIDWithRanges[id] = append(storeIDWithRanges[id], ranges...) + } + return &evictLeaderSchedulerConfig{ + StoreIDWithRanges: storeIDWithRanges, + Batch: conf.Batch, + } +} + +func (conf *evictLeaderSchedulerConfig) getRanges(id uint64) []string { + conf.RLock() + defer conf.RUnlock() + ranges := conf.StoreIDWithRanges[id] + res := make([]string, 0, len(ranges)*2) + for index := range ranges { + res = append(res, (string)(ranges[index].StartKey), (string)(ranges[index].EndKey)) + } + return res +} + +func (conf *evictLeaderSchedulerConfig) removeStoreLocked(id uint64) (bool, error) { + _, exists := conf.StoreIDWithRanges[id] + if exists { + delete(conf.StoreIDWithRanges, id) + conf.cluster.ResumeLeaderTransfer(id) + return len(conf.StoreIDWithRanges) == 0, nil + } + return false, errs.ErrScheduleConfigNotExist.FastGenByArgs() +} + +func (conf *evictLeaderSchedulerConfig) resetStoreLocked(id uint64, keyRange []core.KeyRange) { + if err := conf.cluster.PauseLeaderTransfer(id); err != nil { + log.Error("pause leader transfer failed", zap.Uint64("store-id", id), errs.ZapError(err)) + } + conf.StoreIDWithRanges[id] = keyRange +} + +func (conf *evictLeaderSchedulerConfig) resetStore(id uint64, keyRange []core.KeyRange) { + conf.Lock() + defer conf.Unlock() + conf.resetStoreLocked(id, keyRange) +} + +func (conf *evictLeaderSchedulerConfig) getKeyRangesByID(id uint64) []core.KeyRange { + conf.RLock() + defer conf.RUnlock() + if ranges, exist := conf.StoreIDWithRanges[id]; exist { + return ranges + } + return nil +} + +func (conf *evictLeaderSchedulerConfig) encodeConfig() ([]byte, error) { + conf.RLock() + defer conf.RUnlock() + return EncodeConfig(conf) +} + +func (conf *evictLeaderSchedulerConfig) reloadConfig() error { + conf.Lock() + defer conf.Unlock() + newCfg := &evictLeaderSchedulerConfig{} + if err := conf.load(newCfg); err != nil { + return err + } + pauseAndResumeLeaderTransfer(conf.cluster, conf.StoreIDWithRanges, newCfg.StoreIDWithRanges) + conf.StoreIDWithRanges = newCfg.StoreIDWithRanges + conf.Batch = newCfg.Batch + return nil +} + +func (conf *evictLeaderSchedulerConfig) pauseLeaderTransfer(cluster sche.SchedulerCluster) error { + conf.RLock() + defer conf.RUnlock() + var res error + for id := range conf.StoreIDWithRanges { + if err := cluster.PauseLeaderTransfer(id); err != nil { + res = err + } + } + return res +} + +func (conf *evictLeaderSchedulerConfig) resumeLeaderTransfer(cluster sche.SchedulerCluster) { + conf.RLock() + defer conf.RUnlock() + for id := range conf.StoreIDWithRanges { + cluster.ResumeLeaderTransfer(id) + } +} + +func (conf *evictLeaderSchedulerConfig) pauseLeaderTransferIfStoreNotExist(id uint64) (bool, error) { + conf.RLock() + defer conf.RUnlock() + if _, exist := conf.StoreIDWithRanges[id]; !exist { + if err := conf.cluster.PauseLeaderTransfer(id); err != nil { + return exist, err + } + } + return true, nil +} + +func (conf *evictLeaderSchedulerConfig) resumeLeaderTransferIfExist(id uint64) { + conf.RLock() + defer conf.RUnlock() + conf.cluster.ResumeLeaderTransfer(id) +} + +func (conf *evictLeaderSchedulerConfig) update(id uint64, newRanges []core.KeyRange, batch int) error { + conf.Lock() + defer conf.Unlock() + if id != 0 { + conf.StoreIDWithRanges[id] = newRanges + } + conf.Batch = batch + err := conf.save() + if err != nil && id != 0 { + _, _ = conf.removeStoreLocked(id) + } + return err +} + +func (conf *evictLeaderSchedulerConfig) delete(id uint64) (any, error) { + conf.Lock() + var resp any + last, err := conf.removeStoreLocked(id) + if err != nil { + conf.Unlock() + return resp, err + } + + keyRanges := conf.StoreIDWithRanges[id] + err = conf.save() + if err != nil { + conf.resetStoreLocked(id, keyRanges) + conf.Unlock() + return resp, err + } + if !last { + conf.Unlock() + return resp, nil + } + conf.Unlock() + if err := conf.removeSchedulerCb(types.EvictLeaderScheduler.String()); err != nil { + if !errors.ErrorEqual(err, errs.ErrSchedulerNotFound.FastGenByArgs()) { + conf.resetStore(id, keyRanges) + } + return resp, err + } + resp = lastStoreDeleteInfo + return resp, nil +} + +type evictLeaderScheduler struct { + *BaseScheduler + conf *evictLeaderSchedulerConfig + handler http.Handler +} + +// newEvictLeaderScheduler creates an admin scheduler that transfers all leaders +// out of a store. +func newEvictLeaderScheduler(opController *operator.Controller, conf *evictLeaderSchedulerConfig) Scheduler { + handler := newEvictLeaderHandler(conf) + return &evictLeaderScheduler{ + BaseScheduler: NewBaseScheduler(opController, types.EvictLeaderScheduler, conf), + conf: conf, + handler: handler, + } +} + +// EvictStoreIDs returns the IDs of the evict-stores. +func (s *evictLeaderScheduler) EvictStoreIDs() []uint64 { + return s.conf.getStores() +} + +// ServeHTTP implements the http.Handler interface. +func (s *evictLeaderScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + s.handler.ServeHTTP(w, r) +} + +// GetName implements the Scheduler interface. +func (s *evictLeaderScheduler) EncodeConfig() ([]byte, error) { + return s.conf.encodeConfig() +} + +// ReloadConfig reloads the config from the storage. +func (s *evictLeaderScheduler) ReloadConfig() error { + return s.conf.reloadConfig() +} + +// PrepareConfig implements the Scheduler interface. +func (s *evictLeaderScheduler) PrepareConfig(cluster sche.SchedulerCluster) error { + return s.conf.pauseLeaderTransfer(cluster) +} + +// CleanConfig implements the Scheduler interface. +func (s *evictLeaderScheduler) CleanConfig(cluster sche.SchedulerCluster) { + s.conf.resumeLeaderTransfer(cluster) +} + +// IsScheduleAllowed implements the Scheduler interface. +func (s *evictLeaderScheduler) IsScheduleAllowed(cluster sche.SchedulerCluster) bool { + allowed := s.OpController.OperatorCount(operator.OpLeader) < cluster.GetSchedulerConfig().GetLeaderScheduleLimit() + if !allowed { + operator.IncOperatorLimitCounter(s.GetType(), operator.OpLeader) + } + return allowed +} + +// Schedule implements the Scheduler interface. +func (s *evictLeaderScheduler) Schedule(cluster sche.SchedulerCluster, _ bool) ([]*operator.Operator, []plan.Plan) { + evictLeaderCounter.Inc() + return scheduleEvictLeaderBatch(s.GetName(), cluster, s.conf), nil +} + +func uniqueAppendOperator(dst []*operator.Operator, src ...*operator.Operator) []*operator.Operator { + regionIDs := make(map[uint64]struct{}) + for i := range dst { + regionIDs[dst[i].RegionID()] = struct{}{} + } + for i := range src { + if _, ok := regionIDs[src[i].RegionID()]; ok { + continue + } + regionIDs[src[i].RegionID()] = struct{}{} + dst = append(dst, src[i]) + } + return dst +} + +type evictLeaderStoresConf interface { + getStores() []uint64 + getKeyRangesByID(id uint64) []core.KeyRange + getBatch() int +} + +func scheduleEvictLeaderBatch(name string, cluster sche.SchedulerCluster, conf evictLeaderStoresConf) []*operator.Operator { + var ops []*operator.Operator + batchSize := conf.getBatch() + for i := 0; i < batchSize; i++ { + once := scheduleEvictLeaderOnce(name, cluster, conf) + // no more regions + if len(once) == 0 { + break + } + ops = uniqueAppendOperator(ops, once...) + // the batch has been fulfilled + if len(ops) > batchSize { + break + } + } + return ops +} + +func scheduleEvictLeaderOnce(name string, cluster sche.SchedulerCluster, conf evictLeaderStoresConf) []*operator.Operator { + stores := conf.getStores() + ops := make([]*operator.Operator, 0, len(stores)) + for _, storeID := range stores { + ranges := conf.getKeyRangesByID(storeID) + if len(ranges) == 0 { + continue + } + var filters []filter.Filter + pendingFilter := filter.NewRegionPendingFilter() + downFilter := filter.NewRegionDownFilter() + region := filter.SelectOneRegion(cluster.RandLeaderRegions(storeID, ranges), nil, pendingFilter, downFilter) + if region == nil { + // try to pick unhealthy region + region = filter.SelectOneRegion(cluster.RandLeaderRegions(storeID, ranges), nil) + if region == nil { + evictLeaderNoLeaderCounter.Inc() + continue + } + evictLeaderPickUnhealthyCounter.Inc() + unhealthyPeerStores := make(map[uint64]struct{}) + for _, peer := range region.GetDownPeers() { + unhealthyPeerStores[peer.GetPeer().GetStoreId()] = struct{}{} + } + for _, peer := range region.GetPendingPeers() { + unhealthyPeerStores[peer.GetStoreId()] = struct{}{} + } + filters = append(filters, filter.NewExcludedFilter(name, nil, unhealthyPeerStores)) + } + + filters = append(filters, &filter.StoreStateFilter{ActionScope: name, TransferLeader: true, OperatorLevel: constant.Urgent}) + candidates := filter.NewCandidates(cluster.GetFollowerStores(region)). + FilterTarget(cluster.GetSchedulerConfig(), nil, nil, filters...) + // Compatible with old TiKV transfer leader logic. + target := candidates.RandomPick() + targets := candidates.PickAll() + // `targets` MUST contains `target`, so only needs to check if `target` is nil here. + if target == nil { + evictLeaderNoTargetStoreCounter.Inc() + continue + } + targetIDs := make([]uint64, 0, len(targets)) + for _, t := range targets { + targetIDs = append(targetIDs, t.GetID()) + } + op, err := operator.CreateTransferLeaderOperator(name, cluster, region, target.GetID(), targetIDs, operator.OpLeader) + if err != nil { + log.Debug("fail to create evict leader operator", errs.ZapError(err)) + continue + } + op.SetPriorityLevel(constant.Urgent) + op.Counters = append(op.Counters, evictLeaderNewOperatorCounter) + ops = append(ops, op) + } + return ops +} + +type evictLeaderHandler struct { + rd *render.Render + config *evictLeaderSchedulerConfig +} + +func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.Request) { + var input map[string]any + if err := apiutil.ReadJSONRespondError(handler.rd, w, r.Body, &input); err != nil { + return + } + var ( + exist bool + err error + id uint64 + newRanges []core.KeyRange + ) + idFloat, inputHasStoreID := input["store_id"].(float64) + if inputHasStoreID { + id = (uint64)(idFloat) + exist, err = handler.config.pauseLeaderTransferIfStoreNotExist(id) + if err != nil { + handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + } + + batch := handler.config.getBatch() + batchFloat, ok := input["batch"].(float64) + if ok { + if batchFloat < 1 || batchFloat > 10 { + handler.config.resumeLeaderTransferIfExist(id) + handler.rd.JSON(w, http.StatusBadRequest, "batch is invalid, it should be in [1, 10]") + return + } + batch = (int)(batchFloat) + } + + ranges, ok := (input["ranges"]).([]string) + if ok { + if !inputHasStoreID { + handler.config.resumeLeaderTransferIfExist(id) + handler.rd.JSON(w, http.StatusInternalServerError, errs.ErrSchedulerConfig.FastGenByArgs("id")) + return + } + } else if exist { + ranges = handler.config.getRanges(id) + } + + newRanges, err = getKeyRanges(ranges) + if err != nil { + handler.config.resumeLeaderTransferIfExist(id) + handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + + // StoreIDWithRanges is only changed in update function. + err = handler.config.update(id, newRanges, batch) + if err != nil { + handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + handler.rd.JSON(w, http.StatusOK, "The scheduler has been applied to the store.") +} + +func (handler *evictLeaderHandler) listConfig(w http.ResponseWriter, _ *http.Request) { + conf := handler.config.clone() + handler.rd.JSON(w, http.StatusOK, conf) +} + +func (handler *evictLeaderHandler) deleteConfig(w http.ResponseWriter, r *http.Request) { + idStr := mux.Vars(r)["store_id"] + id, err := strconv.ParseUint(idStr, 10, 64) + if err != nil { + handler.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + + resp, err := handler.config.delete(id) + if err != nil { + if errors.ErrorEqual(err, errs.ErrSchedulerNotFound.FastGenByArgs()) || errors.ErrorEqual(err, errs.ErrScheduleConfigNotExist.FastGenByArgs()) { + handler.rd.JSON(w, http.StatusNotFound, err.Error()) + } else { + handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) + } + return + } + + handler.rd.JSON(w, http.StatusOK, resp) +} + +func newEvictLeaderHandler(config *evictLeaderSchedulerConfig) http.Handler { + h := &evictLeaderHandler{ + config: config, + rd: render.New(render.Options{IndentJSON: true}), + } + router := mux.NewRouter() + router.HandleFunc("/config", h.updateConfig).Methods(http.MethodPost) + router.HandleFunc("/list", h.listConfig).Methods(http.MethodGet) + router.HandleFunc("/delete/{store_id}", h.deleteConfig).Methods(http.MethodDelete) + return router +} diff --git a/server/schedulers/grant_leader.go b/server/schedulers/grant_leader.go index 18be38f16a2..743ccb6413c 100644 --- a/server/schedulers/grant_leader.go +++ b/server/schedulers/grant_leader.go @@ -297,7 +297,19 @@ func (handler *grantLeaderHandler) UpdateConfig(w http.ResponseWriter, r *http.R handler.config.BuildWithArgs(args) err := handler.config.Persist() if err != nil { +<<<<<<< HEAD:server/schedulers/grant_leader.go handler.config.removeStore(id) +======= + handler.config.Lock() + handler.config.cluster.ResumeLeaderTransfer(id) + handler.config.Unlock() + handler.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + err = handler.config.persist() + if err != nil { + _, _ = handler.config.removeStore(id) +>>>>>>> f3e9d9ad0 (*: let TestEvictLeaderScheduler run in two modes (#8663)):pkg/schedule/schedulers/grant_leader.go handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } diff --git a/tests/server/api/region_test.go b/tests/server/api/region_test.go new file mode 100644 index 00000000000..3a187629830 --- /dev/null +++ b/tests/server/api/region_test.go @@ -0,0 +1,437 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "strconv" + "testing" + + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/schedule/placement" + tu "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/tests" +) + +type regionTestSuite struct { + suite.Suite + env *tests.SchedulingTestEnvironment +} + +func TestRegionTestSuite(t *testing.T) { + suite.Run(t, new(regionTestSuite)) +} + +func (suite *regionTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *regionTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *regionTestSuite) TearDownTest() { + cleanFunc := func(cluster *tests.TestCluster) { + // clean region cache + leader := cluster.GetLeaderServer() + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + for _, region := range leader.GetRegions() { + url := fmt.Sprintf("%s/pd/api/v1/admin/cache/region/%d", pdAddr, region.GetID()) + err := tu.CheckDelete(tests.TestDialClient, url, tu.StatusOK(re)) + re.NoError(err) + } + re.Empty(leader.GetRegions()) + // clean rules + def := placement.GroupBundle{ + ID: "pd", + Rules: []*placement.Rule{ + {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + }, + } + data, err := json.Marshal([]placement.GroupBundle{def}) + re.NoError(err) + urlPrefix := cluster.GetLeaderServer().GetAddr() + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + // clean stores + for _, store := range leader.GetStores() { + re.NoError(cluster.GetLeaderServer().GetRaftCluster().RemoveStore(store.GetId(), true)) + re.NoError(cluster.GetLeaderServer().GetRaftCluster().BuryStore(store.GetId(), true)) + } + re.NoError(cluster.GetLeaderServer().GetRaftCluster().RemoveTombStoneRecords()) + re.Empty(leader.GetStores()) + tu.Eventually(re, func() bool { + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + for _, s := range sche.GetBasicCluster().GetStores() { + if s.GetState() != metapb.StoreState_Tombstone { + return false + } + } + } + return true + }) + } + suite.env.RunTestBasedOnMode(cleanFunc) +} + +func (suite *regionTestSuite) TestSplitRegions() { + // use a new environment to avoid affecting other tests + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestBasedOnMode(suite.checkSplitRegions) + env.Cleanup() +} + +func (suite *regionTestSuite) checkSplitRegions(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + s1 := &metapb.Store{ + Id: 13, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + r1 := core.NewTestRegionInfo(601, 13, []byte("aaa"), []byte("ggg")) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 14}, &metapb.Peer{Id: 6, StoreId: 15}) + tests.MustPutRegionInfo(re, cluster, r1) + checkRegionCount(re, cluster, 1) + + newRegionID := uint64(11) + body := fmt.Sprintf(`{"retry_limit":%v, "split_keys": ["%s","%s","%s"]}`, 3, + hex.EncodeToString([]byte("bbb")), + hex.EncodeToString([]byte("ccc")), + hex.EncodeToString([]byte("ddd"))) + checkOpt := func(res []byte, _ int, _ http.Header) { + s := &struct { + ProcessedPercentage int `json:"processed-percentage"` + NewRegionsID []uint64 `json:"regions-id"` + }{} + err := json.Unmarshal(res, s) + re.NoError(err) + re.Equal(100, s.ProcessedPercentage) + re.Equal([]uint64{newRegionID}, s.NewRegionsID) + } + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/handler/splitResponses", fmt.Sprintf("return(%v)", newRegionID))) + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/split", urlPrefix), []byte(body), checkOpt) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/handler/splitResponses")) + re.NoError(err) +} + +func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRange() { + re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/checker/skipCheckSuspectRanges", "return(true)")) + suite.env.RunTestBasedOnMode(suite.checkAccelerateRegionsScheduleInRange) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/checker/skipCheckSuspectRanges")) +} + +func (suite *regionTestSuite) checkAccelerateRegionsScheduleInRange(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + for i := 1; i <= 3; i++ { + s1 := &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + } + regionCount := uint64(3) + for i := uint64(1); i <= regionCount; i++ { + r1 := core.NewTestRegionInfo(550+i, 1, []byte("a"+strconv.FormatUint(i, 10)), []byte("a"+strconv.FormatUint(i+1, 10))) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 100 + i, StoreId: (i + 1) % regionCount}, &metapb.Peer{Id: 200 + i, StoreId: (i + 2) % regionCount}) + tests.MustPutRegionInfo(re, cluster, r1) + } + checkRegionCount(re, cluster, regionCount) + + body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3"))) + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/accelerate-schedule", urlPrefix), []byte(body), + tu.StatusOK(re)) + re.NoError(err) + idList := leader.GetRaftCluster().GetPendingProcessedRegions() + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + idList = sche.GetCluster().GetCoordinator().GetCheckerController().GetPendingProcessedRegions() + } + re.Len(idList, 2, len(idList)) +} + +func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRanges() { + re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/checker/skipCheckSuspectRanges", "return(true)")) + suite.env.RunTestBasedOnMode(suite.checkAccelerateRegionsScheduleInRanges) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/checker/skipCheckSuspectRanges")) +} + +func (suite *regionTestSuite) checkAccelerateRegionsScheduleInRanges(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + for i := 1; i <= 6; i++ { + s1 := &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + } + regionCount := uint64(6) + for i := uint64(1); i <= regionCount; i++ { + r1 := core.NewTestRegionInfo(550+i, 1, []byte("a"+strconv.FormatUint(i, 10)), []byte("a"+strconv.FormatUint(i+1, 10))) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 100 + i, StoreId: (i + 1) % regionCount}, &metapb.Peer{Id: 200 + i, StoreId: (i + 2) % regionCount}) + tests.MustPutRegionInfo(re, cluster, r1) + } + checkRegionCount(re, cluster, regionCount) + + body := fmt.Sprintf(`[{"start_key":"%s", "end_key": "%s"}, {"start_key":"%s", "end_key": "%s"}]`, + hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3")), hex.EncodeToString([]byte("a4")), hex.EncodeToString([]byte("a6"))) + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/accelerate-schedule/batch", urlPrefix), []byte(body), + tu.StatusOK(re)) + re.NoError(err) + idList := leader.GetRaftCluster().GetPendingProcessedRegions() + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + idList = sche.GetCluster().GetCoordinator().GetCheckerController().GetPendingProcessedRegions() + } + re.Len(idList, 4) +} + +func (suite *regionTestSuite) TestScatterRegions() { + // use a new environment to avoid affecting other tests + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestBasedOnMode(suite.checkScatterRegions) + env.Cleanup() +} + +func (suite *regionTestSuite) checkScatterRegions(cluster *tests.TestCluster) { + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + re := suite.Require() + for i := 13; i <= 16; i++ { + s1 := &metapb.Store{ + Id: uint64(i), + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + } + r1 := core.NewTestRegionInfo(701, 13, []byte("b1"), []byte("b2")) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 14}, &metapb.Peer{Id: 6, StoreId: 15}) + r2 := core.NewTestRegionInfo(702, 13, []byte("b2"), []byte("b3")) + r2.GetMeta().Peers = append(r2.GetMeta().Peers, &metapb.Peer{Id: 7, StoreId: 14}, &metapb.Peer{Id: 8, StoreId: 15}) + r3 := core.NewTestRegionInfo(703, 13, []byte("b4"), []byte("b4")) + r3.GetMeta().Peers = append(r3.GetMeta().Peers, &metapb.Peer{Id: 9, StoreId: 14}, &metapb.Peer{Id: 10, StoreId: 15}) + tests.MustPutRegionInfo(re, cluster, r1) + tests.MustPutRegionInfo(re, cluster, r2) + tests.MustPutRegionInfo(re, cluster, r3) + checkRegionCount(re, cluster, 3) + + body := fmt.Sprintf(`{"start_key":"%s", "end_key": "%s"}`, hex.EncodeToString([]byte("b1")), hex.EncodeToString([]byte("b3"))) + err := tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/scatter", urlPrefix), []byte(body), tu.StatusOK(re)) + re.NoError(err) + oc := leader.GetRaftCluster().GetOperatorController() + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + oc = sche.GetCoordinator().GetOperatorController() + } + + op1 := oc.GetOperator(701) + op2 := oc.GetOperator(702) + op3 := oc.GetOperator(703) + // At least one operator used to scatter region + re.True(op1 != nil || op2 != nil || op3 != nil) + + body = `{"regions_id": [701, 702, 703]}` + err = tu.CheckPostJSON(tests.TestDialClient, fmt.Sprintf("%s/regions/scatter", urlPrefix), []byte(body), tu.StatusOK(re)) + re.NoError(err) +} + +func (suite *regionTestSuite) TestCheckRegionsReplicated() { + suite.env.RunTestBasedOnMode(suite.checkRegionsReplicated) +} + +func (suite *regionTestSuite) checkRegionsReplicated(cluster *tests.TestCluster) { + re := suite.Require() + pauseAllCheckers(re, cluster) + leader := cluster.GetLeaderServer() + urlPrefix := leader.GetAddr() + "/pd/api/v1" + + // add test region + s1 := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + } + tests.MustPutStore(re, cluster, s1) + r1 := core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) + tests.MustPutRegionInfo(re, cluster, r1) + checkRegionCount(re, cluster, 1) + + // set the bundle + bundle := []placement.GroupBundle{ + { + ID: "5", + Index: 5, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 1, Role: placement.Voter, Count: 1, + }, + }, + }, + } + + status := "" + + // invalid url + url := fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, "_", "t") + err := tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) + re.NoError(err) + + url = fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString(r1.GetStartKey()), "_") + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, http.StatusBadRequest)) + re.NoError(err) + + // correct test + url = fmt.Sprintf(`%s/regions/replicated?startKey=%s&endKey=%s`, urlPrefix, hex.EncodeToString(r1.GetStartKey()), hex.EncodeToString(r1.GetEndKey())) + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.StatusOK(re)) + re.NoError(err) + + // test one rule + data, err := json.Marshal(bundle) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + + tu.Eventually(re, func() bool { + respBundle := make([]placement.GroupBundle, 0) + err = tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, + tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) + re.NoError(err) + return len(respBundle) == 1 && respBundle[0].ID == "5" + }) + + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) + re.NoError(err) + return status == "REPLICATED" + }) + + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/handler/mockPending", "return(true)")) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) + re.NoError(err) + re.Equal("PENDING", status) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/handler/mockPending")) + // test multiple rules + r1 = core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 1}) + tests.MustPutRegionInfo(re, cluster, r1) + + bundle[0].Rules = append(bundle[0].Rules, &placement.Rule{ + ID: "bar", Index: 1, Role: placement.Voter, Count: 1, + }) + data, err = json.Marshal(bundle) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + + tu.Eventually(re, func() bool { + respBundle := make([]placement.GroupBundle, 0) + err = tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, + tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) + re.NoError(err) + return len(respBundle) == 1 && len(respBundle[0].Rules) == 2 + }) + + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) + re.NoError(err) + return status == "REPLICATED" + }) + + // test multiple bundles + bundle = append(bundle, placement.GroupBundle{ + ID: "6", + Index: 6, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 1, Role: placement.Voter, Count: 2, + }, + }, + }) + data, err = json.Marshal(bundle) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + + tu.Eventually(re, func() bool { + respBundle := make([]placement.GroupBundle, 0) + err = tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, + tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) + re.NoError(err) + if len(respBundle) != 2 { + return false + } + s1 := respBundle[0].ID == "5" && respBundle[1].ID == "6" + s2 := respBundle[0].ID == "6" && respBundle[1].ID == "5" + return s1 || s2 + }) + + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) + re.NoError(err) + return status == "INPROGRESS" + }) + + r1 = core.NewTestRegionInfo(2, 1, []byte("a"), []byte("b")) + r1.GetMeta().Peers = append(r1.GetMeta().Peers, &metapb.Peer{Id: 5, StoreId: 1}, &metapb.Peer{Id: 6, StoreId: 1}, &metapb.Peer{Id: 7, StoreId: 1}) + tests.MustPutRegionInfo(re, cluster, r1) + + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &status) + re.NoError(err) + return status == "REPLICATED" + }) +} + +func checkRegionCount(re *require.Assertions, cluster *tests.TestCluster, count uint64) { + leader := cluster.GetLeaderServer() + tu.Eventually(re, func() bool { + return leader.GetRaftCluster().GetRegionCount([]byte{}, []byte{}) == int(count) + }) + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + tu.Eventually(re, func() bool { + return sche.GetCluster().GetRegionCount([]byte{}, []byte{}) == int(count) + }) + } +} + +func pauseAllCheckers(re *require.Assertions, cluster *tests.TestCluster) { + checkerNames := []string{"learner", "replica", "rule", "split", "merge", "joint-state"} + addr := cluster.GetLeaderServer().GetAddr() + for _, checkerName := range checkerNames { + resp := make(map[string]any) + url := fmt.Sprintf("%s/pd/api/v1/checker/%s", addr, checkerName) + err := tu.CheckPostJSON(tests.TestDialClient, url, []byte(`{"delay":1000}`), tu.StatusOK(re)) + re.NoError(err) + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) + re.NoError(err) + re.True(resp["paused"].(bool)) + } +} diff --git a/tests/server/api/rule_test.go b/tests/server/api/rule_test.go new file mode 100644 index 00000000000..303264d7057 --- /dev/null +++ b/tests/server/api/rule_test.go @@ -0,0 +1,1435 @@ +// Copyright 2020 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "net/url" + "sort" + "strconv" + "sync" + "testing" + + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/schedule/labeler" + "github.com/tikv/pd/pkg/schedule/placement" + "github.com/tikv/pd/pkg/utils/etcdutil" + "github.com/tikv/pd/pkg/utils/syncutil" + tu "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/server/config" + "github.com/tikv/pd/tests" +) + +type ruleTestSuite struct { + suite.Suite + env *tests.SchedulingTestEnvironment +} + +func TestRuleTestSuite(t *testing.T) { + suite.Run(t, new(ruleTestSuite)) +} + +func (suite *ruleTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, _ string) { + conf.PDServerCfg.KeyType = "raw" + conf.Replication.EnablePlacementRules = true + }) +} + +func (suite *ruleTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *ruleTestSuite) TearDownTest() { + re := suite.Require() + cleanFunc := func(cluster *tests.TestCluster) { + def := placement.GroupBundle{ + ID: "pd", + Rules: []*placement.Rule{ + {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + }, + } + data, err := json.Marshal([]placement.GroupBundle{def}) + re.NoError(err) + urlPrefix := cluster.GetLeaderServer().GetAddr() + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + } + suite.env.RunTestBasedOnMode(cleanFunc) +} + +func (suite *ruleTestSuite) TestSet() { + suite.env.RunTestBasedOnMode(suite.checkSet) +} + +func (suite *ruleTestSuite) checkSet(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + successData, err := json.Marshal(rule) + re.NoError(err) + oldStartKey, err := hex.DecodeString(rule.StartKeyHex) + re.NoError(err) + oldEndKey, err := hex.DecodeString(rule.EndKeyHex) + re.NoError(err) + parseErrData := []byte("foo") + rule1 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + checkErrData, err := json.Marshal(rule1) + re.NoError(err) + rule2 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1} + setErrData, err := json.Marshal(rule2) + re.NoError(err) + rule3 := placement.Rule{GroupID: "a", ID: "10", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Follower, Count: 3} + updateData, err := json.Marshal(rule3) + re.NoError(err) + newStartKey, err := hex.DecodeString(rule.StartKeyHex) + re.NoError(err) + newEndKey, err := hex.DecodeString(rule.EndKeyHex) + re.NoError(err) + + testCases := []struct { + name string + rawData []byte + success bool + response string + popKeyRange map[string]struct{} + }{ + { + name: "Set a new rule success", + rawData: successData, + success: true, + response: "", + popKeyRange: map[string]struct{}{ + hex.EncodeToString(oldStartKey): {}, + hex.EncodeToString(oldEndKey): {}, + }, + }, + { + name: "Update an existed rule success", + rawData: updateData, + success: true, + response: "", + popKeyRange: map[string]struct{}{ + hex.EncodeToString(oldStartKey): {}, + hex.EncodeToString(oldEndKey): {}, + hex.EncodeToString(newStartKey): {}, + hex.EncodeToString(newEndKey): {}, + }, + }, + { + name: "Parse Json failed", + rawData: parseErrData, + success: false, + response: `{ + "code": "input", + "msg": "invalid character 'o' in literal false (expecting 'a')", + "data": { + "Offset": 2 + } +} +`, + }, + { + name: "Check rule failed", + rawData: checkErrData, + success: false, + response: `"[PD:hex:ErrHexDecodingString]decode string XXXX error" +`, + }, + { + name: "Set Rule Failed", + rawData: setErrData, + success: false, + response: `"[PD:placement:ErrRuleContent]invalid rule content, invalid count -1" +`, + }, + } + for _, testCase := range testCases { + suite.T().Log(testCase.name) + // clear suspect keyRanges to prevent test case from others + leaderServer.GetRaftCluster().ClearSuspectKeyRanges() + if testCase.success { + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", testCase.rawData, tu.StatusOK(re)) + popKeyRangeMap := map[string]struct{}{} + for i := 0; i < len(testCase.popKeyRange)/2; i++ { + v, got := leaderServer.GetRaftCluster().PopOneSuspectKeyRange() + re.True(got) + popKeyRangeMap[hex.EncodeToString(v[0])] = struct{}{} + popKeyRangeMap[hex.EncodeToString(v[1])] = struct{}{} + } + re.Len(popKeyRangeMap, len(testCase.popKeyRange)) + for k := range popKeyRangeMap { + _, ok := testCase.popKeyRange[k] + re.True(ok) + } + } else { + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", testCase.rawData, + tu.StatusNotOK(re), + tu.StringEqual(re, testCase.response)) + } + re.NoError(err) + } +} + +func (suite *ruleTestSuite) TestGet() { + suite.env.RunTestBasedOnMode(suite.checkGet) +} + +func (suite *ruleTestSuite) checkGet(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "a", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + data, err := json.Marshal(rule) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + re.NoError(err) + + testCases := []struct { + name string + rule placement.Rule + found bool + code int + }{ + { + name: "found", + rule: rule, + found: true, + code: http.StatusOK, + }, + { + name: "not found", + rule: placement.Rule{GroupID: "a", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, + found: false, + code: http.StatusNotFound, + }, + } + for i, testCase := range testCases { + suite.T().Log(testCase.name) + var resp placement.Rule + url := fmt.Sprintf("%s/rule/%s/%s", urlPrefix, testCase.rule.GroupID, testCase.rule.ID) + if testCase.found { + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) + return compareRule(&resp, &testCases[i].rule) + }) + } else { + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, testCase.code)) + } + re.NoError(err) + } +} + +func (suite *ruleTestSuite) TestGetAll() { + suite.env.RunTestBasedOnMode(suite.checkGetAll) +} + +func (suite *ruleTestSuite) checkGetAll(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "b", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + data, err := json.Marshal(rule) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + re.NoError(err) + + var resp2 []*placement.Rule + err = tu.ReadGetJSON(re, tests.TestDialClient, urlPrefix+"/rules", &resp2) + re.NoError(err) + re.NotEmpty(resp2) +} + +func (suite *ruleTestSuite) TestSetAll() { + suite.env.RunTestBasedOnMode(suite.checkSetAll) +} + +func (suite *ruleTestSuite) checkSetAll(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule1 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule2 := placement.Rule{GroupID: "b", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule3 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + rule4 := placement.Rule{GroupID: "a", ID: "12", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1} + rule5 := placement.Rule{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", Role: placement.Voter, Count: 1, + LocationLabels: []string{"host"}} + rule6 := placement.Rule{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, StartKeyHex: "", EndKeyHex: "", Role: placement.Voter, Count: 3} + + leaderServer.GetPersistOptions().GetReplicationConfig().LocationLabels = []string{"host"} + defaultRule := leaderServer.GetRaftCluster().GetRuleManager().GetRule(placement.DefaultGroupID, placement.DefaultRuleID) + defaultRule.LocationLabels = []string{"host"} + leaderServer.GetRaftCluster().GetRuleManager().SetRule(defaultRule) + + successData, err := json.Marshal([]*placement.Rule{&rule1, &rule2}) + re.NoError(err) + + checkErrData, err := json.Marshal([]*placement.Rule{&rule1, &rule3}) + re.NoError(err) + + setErrData, err := json.Marshal([]*placement.Rule{&rule1, &rule4}) + re.NoError(err) + + defaultData, err := json.Marshal([]*placement.Rule{&rule1, &rule5}) + re.NoError(err) + + recoverData, err := json.Marshal([]*placement.Rule{&rule1, &rule6}) + re.NoError(err) + + testCases := []struct { + name string + rawData []byte + success bool + response string + isDefaultRule bool + count int + }{ + { + name: "Set rules successfully, with oldRules full of nil", + rawData: successData, + success: true, + response: "", + isDefaultRule: false, + }, + { + name: "Parse Json failed", + rawData: []byte("foo"), + success: false, + isDefaultRule: false, + response: `{ + "code": "input", + "msg": "invalid character 'o' in literal false (expecting 'a')", + "data": { + "Offset": 2 + } +} +`, + }, + { + name: "Check rule failed", + rawData: checkErrData, + success: false, + isDefaultRule: false, + response: `"[PD:hex:ErrHexDecodingString]decode string XXXX error" +`, + }, + { + name: "Set Rule Failed", + rawData: setErrData, + success: false, + isDefaultRule: false, + response: `"[PD:placement:ErrRuleContent]invalid rule content, invalid count -1" +`, + }, + { + name: "set default rule", + rawData: defaultData, + success: true, + response: "", + isDefaultRule: true, + count: 1, + }, + { + name: "recover default rule", + rawData: recoverData, + success: true, + response: "", + isDefaultRule: true, + count: 3, + }, + } + for _, testCase := range testCases { + suite.T().Log(testCase.name) + if testCase.success { + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rules", testCase.rawData, tu.StatusOK(re)) + re.NoError(err) + if testCase.isDefaultRule { + re.Equal(int(leaderServer.GetPersistOptions().GetReplicationConfig().MaxReplicas), testCase.count) + } + } else { + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rules", testCase.rawData, + tu.StringEqual(re, testCase.response)) + re.NoError(err) + } + } +} + +func (suite *ruleTestSuite) TestGetAllByGroup() { + suite.env.RunTestBasedOnMode(suite.checkGetAllByGroup) +} + +func (suite *ruleTestSuite) checkGetAllByGroup(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "c", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + data, err := json.Marshal(rule) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + re.NoError(err) + + rule1 := placement.Rule{GroupID: "c", ID: "30", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + data, err = json.Marshal(rule1) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + re.NoError(err) + + testCases := []struct { + name string + groupID string + count int + }{ + { + name: "found group c", + groupID: "c", + count: 2, + }, + { + name: "not found d", + groupID: "d", + count: 0, + }, + } + + for _, testCase := range testCases { + suite.T().Log(testCase.name) + var resp []*placement.Rule + url := fmt.Sprintf("%s/rules/group/%s", urlPrefix, testCase.groupID) + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) + re.NoError(err) + if len(resp) != testCase.count { + return false + } + if testCase.count == 2 { + return compareRule(resp[0], &rule) && compareRule(resp[1], &rule1) + } + return true + }) + } +} + +func (suite *ruleTestSuite) TestGetAllByRegion() { + suite.env.RunTestBasedOnMode(suite.checkGetAllByRegion) +} + +func (suite *ruleTestSuite) checkGetAllByRegion(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "e", ID: "20", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1} + data, err := json.Marshal(rule) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + re.NoError(err) + + r := core.NewTestRegionInfo(4, 1, []byte{0x22, 0x22}, []byte{0x33, 0x33}) + tests.MustPutRegionInfo(re, cluster, r) + + testCases := []struct { + name string + regionID string + success bool + code int + }{ + { + name: "found region", + regionID: "4", + success: true, + }, + { + name: "parse regionId failed", + regionID: "abc", + success: false, + code: 400, + }, + { + name: "region not found", + regionID: "5", + success: false, + code: 404, + }, + } + for _, testCase := range testCases { + suite.T().Log(testCase.name) + var resp []*placement.Rule + url := fmt.Sprintf("%s/rules/region/%s", urlPrefix, testCase.regionID) + + if testCase.success { + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) + for _, r := range resp { + if r.GroupID == "e" { + return compareRule(r, &rule) + } + } + return true + }) + } else { + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, testCase.code)) + } + re.NoError(err) + } +} + +func (suite *ruleTestSuite) TestGetAllByKey() { + suite.env.RunTestBasedOnMode(suite.checkGetAllByKey) +} + +func (suite *ruleTestSuite) checkGetAllByKey(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "f", ID: "40", StartKeyHex: "8888", EndKeyHex: "9111", Role: placement.Voter, Count: 1} + data, err := json.Marshal(rule) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + re.NoError(err) + + testCases := []struct { + name string + key string + success bool + respSize int + code int + }{ + { + name: "key in range", + key: "8899", + success: true, + respSize: 2, + }, + { + name: "parse key failed", + key: "abc", + success: false, + code: 400, + respSize: 0, + }, + { + name: "key out of range", + key: "9999", + success: true, + respSize: 1, + }, + } + for _, testCase := range testCases { + suite.T().Log(testCase.name) + var resp []*placement.Rule + url := fmt.Sprintf("%s/rules/key/%s", urlPrefix, testCase.key) + if testCase.success { + tu.Eventually(re, func() bool { + err = tu.ReadGetJSON(re, tests.TestDialClient, url, &resp) + return len(resp) == testCase.respSize + }) + } else { + err = tu.CheckGetJSON(tests.TestDialClient, url, nil, tu.Status(re, testCase.code)) + } + re.NoError(err) + } +} + +func (suite *ruleTestSuite) TestDelete() { + suite.env.RunTestBasedOnMode(suite.checkDelete) +} + +func (suite *ruleTestSuite) checkDelete(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + rule := placement.Rule{GroupID: "g", ID: "10", StartKeyHex: "8888", EndKeyHex: "9111", Role: placement.Voter, Count: 1} + data, err := json.Marshal(rule) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rule", data, tu.StatusOK(re)) + re.NoError(err) + oldStartKey, err := hex.DecodeString(rule.StartKeyHex) + re.NoError(err) + oldEndKey, err := hex.DecodeString(rule.EndKeyHex) + re.NoError(err) + + testCases := []struct { + name string + groupID string + id string + popKeyRange map[string]struct{} + }{ + { + name: "delete existed rule", + groupID: "g", + id: "10", + popKeyRange: map[string]struct{}{ + hex.EncodeToString(oldStartKey): {}, + hex.EncodeToString(oldEndKey): {}, + }, + }, + { + name: "delete non-existed rule", + groupID: "g", + id: "15", + popKeyRange: map[string]struct{}{}, + }, + } + for _, testCase := range testCases { + suite.T().Log(testCase.name) + url := fmt.Sprintf("%s/rule/%s/%s", urlPrefix, testCase.groupID, testCase.id) + // clear suspect keyRanges to prevent test case from others + leaderServer.GetRaftCluster().ClearSuspectKeyRanges() + err = tu.CheckDelete(tests.TestDialClient, url, tu.StatusOK(re)) + re.NoError(err) + if len(testCase.popKeyRange) > 0 { + popKeyRangeMap := map[string]struct{}{} + for i := 0; i < len(testCase.popKeyRange)/2; i++ { + v, got := leaderServer.GetRaftCluster().PopOneSuspectKeyRange() + re.True(got) + popKeyRangeMap[hex.EncodeToString(v[0])] = struct{}{} + popKeyRangeMap[hex.EncodeToString(v[1])] = struct{}{} + } + re.Len(popKeyRangeMap, len(testCase.popKeyRange)) + for k := range popKeyRangeMap { + _, ok := testCase.popKeyRange[k] + re.True(ok) + } + } + } +} + +func (suite *ruleTestSuite) TestBatch() { + suite.env.RunTestBasedOnMode(suite.checkBatch) +} + +func (suite *ruleTestSuite) checkBatch(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + opt1 := placement.RuleOp{ + Action: placement.RuleOpAdd, + Rule: &placement.Rule{GroupID: "a", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, + } + opt2 := placement.RuleOp{ + Action: placement.RuleOpAdd, + Rule: &placement.Rule{GroupID: "b", ID: "13", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, + } + opt3 := placement.RuleOp{ + Action: placement.RuleOpAdd, + Rule: &placement.Rule{GroupID: "a", ID: "14", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, + } + opt4 := placement.RuleOp{ + Action: placement.RuleOpAdd, + Rule: &placement.Rule{GroupID: "a", ID: "15", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, + } + opt5 := placement.RuleOp{ + Action: placement.RuleOpDel, + Rule: &placement.Rule{GroupID: "a", ID: "14"}, + } + opt6 := placement.RuleOp{ + Action: placement.RuleOpDel, + Rule: &placement.Rule{GroupID: "b", ID: "1"}, + DeleteByIDPrefix: true, + } + opt7 := placement.RuleOp{ + Action: placement.RuleOpDel, + Rule: &placement.Rule{GroupID: "a", ID: "1"}, + } + opt8 := placement.RuleOp{ + Action: placement.RuleOpAdd, + Rule: &placement.Rule{GroupID: "a", ID: "16", StartKeyHex: "XXXX", EndKeyHex: "3333", Role: placement.Voter, Count: 1}, + } + opt9 := placement.RuleOp{ + Action: placement.RuleOpAdd, + Rule: &placement.Rule{GroupID: "a", ID: "17", StartKeyHex: "1111", EndKeyHex: "3333", Role: placement.Voter, Count: -1}, + } + + successData1, err := json.Marshal([]placement.RuleOp{opt1, opt2, opt3}) + re.NoError(err) + + successData2, err := json.Marshal([]placement.RuleOp{opt5, opt7}) + re.NoError(err) + + successData3, err := json.Marshal([]placement.RuleOp{opt4, opt6}) + re.NoError(err) + + checkErrData, err := json.Marshal([]placement.RuleOp{opt8}) + re.NoError(err) + + setErrData, err := json.Marshal([]placement.RuleOp{opt9}) + re.NoError(err) + + testCases := []struct { + name string + rawData []byte + success bool + response string + }{ + { + name: "Batch adds successfully", + rawData: successData1, + success: true, + response: "", + }, + { + name: "Batch removes successfully", + rawData: successData2, + success: true, + response: "", + }, + { + name: "Batch add and remove successfully", + rawData: successData3, + success: true, + response: "", + }, + { + name: "Parse Json failed", + rawData: []byte("foo"), + success: false, + response: `{ + "code": "input", + "msg": "invalid character 'o' in literal false (expecting 'a')", + "data": { + "Offset": 2 + } +} +`, + }, + { + name: "Check rule failed", + rawData: checkErrData, + success: false, + response: `"[PD:hex:ErrHexDecodingString]decode string XXXX error" +`, + }, + { + name: "Set Rule Failed", + rawData: setErrData, + success: false, + response: `"[PD:placement:ErrRuleContent]invalid rule content, invalid count -1" +`, + }, + } + for _, testCase := range testCases { + suite.T().Log(testCase.name) + if testCase.success { + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rules/batch", testCase.rawData, tu.StatusOK(re)) + re.NoError(err) + } else { + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/rules/batch", testCase.rawData, + tu.StatusNotOK(re), + tu.StringEqual(re, testCase.response)) + re.NoError(err) + } + } +} + +func (suite *ruleTestSuite) TestBundle() { + suite.env.RunTestBasedOnMode(suite.checkBundle) +} + +func (suite *ruleTestSuite) checkBundle(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + re := suite.Require() + // GetAll + b1 := placement.GroupBundle{ + ID: placement.DefaultGroupID, + Rules: []*placement.Rule{ + { + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Role: placement.Voter, + Count: 3, + }, + }, + } + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1}, 1) + + // Set + b2 := placement.GroupBundle{ + ID: "foo", + Index: 42, + Override: true, + Rules: []*placement.Rule{ + {GroupID: "foo", ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, + }, + } + data, err := json.Marshal(b2) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/placement-rule/foo", data, tu.StatusOK(re)) + re.NoError(err) + + // Get + assertBundleEqual(re, urlPrefix+"/placement-rule/foo", b2) + + // GetAll again + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1, b2}, 2) + + // Delete + err = tu.CheckDelete(tests.TestDialClient, urlPrefix+"/placement-rule/pd", tu.StatusOK(re)) + re.NoError(err) + + // GetAll again + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b2}, 1) + + // SetAll + b2.Rules = append(b2.Rules, &placement.Rule{GroupID: "foo", ID: "baz", Index: 2, Role: placement.Follower, Count: 1}) + b2.Index, b2.Override = 0, false + b3 := placement.GroupBundle{ID: "foobar", Index: 100} + data, err = json.Marshal([]placement.GroupBundle{b1, b2, b3}) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + + // GetAll again + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1, b2, b3}, 3) + + // Delete using regexp + err = tu.CheckDelete(tests.TestDialClient, urlPrefix+"/placement-rule/"+url.PathEscape("foo.*")+"?regexp", tu.StatusOK(re)) + re.NoError(err) + + // GetAll again + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1}, 1) + + // Set + id := "rule-without-group-id" + b4 := placement.GroupBundle{ + Index: 4, + Rules: []*placement.Rule{ + {ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, + }, + } + data, err = json.Marshal(b4) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/placement-rule/"+id, data, tu.StatusOK(re)) + re.NoError(err) + + b4.ID = id + b4.Rules[0].GroupID = b4.ID + // Get + assertBundleEqual(re, urlPrefix+"/placement-rule/"+id, b4) + + // GetAll again + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1, b4}, 2) + + // SetAll + b5 := placement.GroupBundle{ + ID: "rule-without-group-id-2", + Index: 5, + Rules: []*placement.Rule{ + {ID: "bar", Index: 1, Override: true, Role: placement.Voter, Count: 1}, + }, + } + data, err = json.Marshal([]placement.GroupBundle{b1, b4, b5}) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + + b5.Rules[0].GroupID = b5.ID + + // GetAll again + assertBundlesEqual(re, urlPrefix+"/placement-rule", []placement.GroupBundle{b1, b4, b5}, 3) +} + +func (suite *ruleTestSuite) TestBundleBadRequest() { + suite.env.RunTestBasedOnMode(suite.checkBundleBadRequest) +} + +func (suite *ruleTestSuite) checkBundleBadRequest(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1/config", pdAddr, apiPrefix) + + testCases := []struct { + uri string + data string + ok bool + }{ + {"/placement-rule/foo", `{"group_id":"foo"}`, true}, + {"/placement-rule/foo", `{"group_id":"bar"}`, false}, + {"/placement-rule/foo", `{"group_id":"foo", "rules": [{"group_id":"foo", "id":"baz", "role":"voter", "count":1}]}`, true}, + {"/placement-rule/foo", `{"group_id":"foo", "rules": [{"group_id":"bar", "id":"baz", "role":"voter", "count":1}]}`, false}, + {"/placement-rule", `[{"group_id":"foo", "rules": [{"group_id":"foo", "id":"baz", "role":"voter", "count":1}]}]`, true}, + {"/placement-rule", `[{"group_id":"foo", "rules": [{"group_id":"bar", "id":"baz", "role":"voter", "count":1}]}]`, false}, + } + for _, testCase := range testCases { + err := tu.CheckPostJSON(tests.TestDialClient, urlPrefix+testCase.uri, []byte(testCase.data), + func(_ []byte, code int, _ http.Header) { + re.Equal(testCase.ok, code == http.StatusOK) + }) + re.NoError(err) + } +} + +func (suite *ruleTestSuite) TestLeaderAndVoter() { + suite.env.RunTestBasedOnMode(suite.checkLeaderAndVoter) +} + +func (suite *ruleTestSuite) checkLeaderAndVoter(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1", pdAddr, apiPrefix) + + stores := []*metapb.Store{ + { + Id: 1, + Address: "tikv1", + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + Version: "7.5.0", + Labels: []*metapb.StoreLabel{{Key: "zone", Value: "z1"}}, + }, + { + Id: 2, + Address: "tikv2", + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + Version: "7.5.0", + Labels: []*metapb.StoreLabel{{Key: "zone", Value: "z2"}}, + }, + } + + for _, store := range stores { + tests.MustPutStore(re, cluster, store) + } + + bundles := [][]placement.GroupBundle{ + { + { + ID: "1", + Index: 1, + Rules: []*placement.Rule{ + { + ID: "rule_1", Index: 1, Role: placement.Voter, Count: 1, GroupID: "1", + LabelConstraints: []placement.LabelConstraint{ + {Key: "zone", Op: "in", Values: []string{"z1"}}, + }, + }, + { + ID: "rule_2", Index: 2, Role: placement.Leader, Count: 1, GroupID: "1", + LabelConstraints: []placement.LabelConstraint{ + {Key: "zone", Op: "in", Values: []string{"z2"}}, + }, + }, + }, + }, + }, + { + { + ID: "1", + Index: 1, + Rules: []*placement.Rule{ + { + ID: "rule_1", Index: 1, Role: placement.Leader, Count: 1, GroupID: "1", + LabelConstraints: []placement.LabelConstraint{ + {Key: "zone", Op: "in", Values: []string{"z2"}}, + }, + }, + { + ID: "rule_2", Index: 2, Role: placement.Voter, Count: 1, GroupID: "1", + LabelConstraints: []placement.LabelConstraint{ + {Key: "zone", Op: "in", Values: []string{"z1"}}, + }, + }, + }, + }, + }} + for _, bundle := range bundles { + data, err := json.Marshal(bundle) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + + tu.Eventually(re, func() bool { + respBundle := make([]placement.GroupBundle, 0) + err := tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, + tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) + re.NoError(err) + re.Len(respBundle, 1) + return compareBundle(respBundle[0], bundle[0]) + }) + } +} + +func (suite *ruleTestSuite) TestDeleteAndUpdate() { + suite.env.RunTestBasedOnMode(suite.checkDeleteAndUpdate) +} + +func (suite *ruleTestSuite) checkDeleteAndUpdate(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1", pdAddr, apiPrefix) + + bundles := [][]placement.GroupBundle{ + // 1 rule group with 1 rule + {{ + ID: "1", + Index: 1, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 1, Role: placement.Voter, Count: 1, GroupID: "1", + }, + }, + }}, + // 2 rule groups with different range rules + {{ + ID: "1", + Index: 1, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 1, Role: placement.Voter, Count: 1, GroupID: "1", + StartKey: []byte("a"), EndKey: []byte("b"), + }, + }, + }, { + ID: "2", + Index: 2, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 2, Role: placement.Voter, Count: 1, GroupID: "2", + StartKey: []byte("b"), EndKey: []byte("c"), + }, + }, + }}, + // 2 rule groups with 1 rule and 2 rules + {{ + ID: "3", + Index: 3, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 3, Role: placement.Voter, Count: 1, GroupID: "3", + }, + }, + }, { + ID: "4", + Index: 4, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 4, Role: placement.Voter, Count: 1, GroupID: "4", + }, + { + ID: "bar", Index: 6, Role: placement.Voter, Count: 1, GroupID: "4", + }, + }, + }}, + // 1 rule group with 2 rules + {{ + ID: "5", + Index: 5, + Rules: []*placement.Rule{ + { + ID: "foo", Index: 5, Role: placement.Voter, Count: 1, GroupID: "5", + }, + { + ID: "bar", Index: 6, Role: placement.Voter, Count: 1, GroupID: "5", + }, + }, + }}, + } + + for _, bundle := range bundles { + suite.postAndCheckRuleBundle(urlPrefix, bundle) + } +} + +func (suite *ruleTestSuite) TestConcurrency() { + suite.env.RunTestBasedOnMode(suite.checkConcurrency) +} + +func (suite *ruleTestSuite) checkConcurrency(cluster *tests.TestCluster) { + // test concurrency of set rule group with different group id + suite.checkConcurrencyWith(cluster, + func(i int) []placement.GroupBundle { + return []placement.GroupBundle{ + { + ID: strconv.Itoa(i), + Index: i, + Rules: []*placement.Rule{ + { + ID: "foo", Index: i, Role: placement.Voter, Count: 1, GroupID: strconv.Itoa(i), + }, + }, + }, + } + }, + func(resp []placement.GroupBundle, i int) bool { + return len(resp) == 1 && resp[0].ID == strconv.Itoa(i) + }, + ) + // test concurrency of set rule with different id + suite.checkConcurrencyWith(cluster, + func(i int) []placement.GroupBundle { + return []placement.GroupBundle{ + { + ID: "pd", + Index: 1, + Rules: []*placement.Rule{ + { + ID: strconv.Itoa(i), Index: i, Role: placement.Voter, Count: 1, GroupID: "pd", + }, + }, + }, + } + }, + func(resp []placement.GroupBundle, i int) bool { + return len(resp) == 1 && resp[0].ID == "pd" && resp[0].Rules[0].ID == strconv.Itoa(i) + }, + ) +} + +func (suite *ruleTestSuite) checkConcurrencyWith(cluster *tests.TestCluster, + genBundle func(int) []placement.GroupBundle, + checkBundle func([]placement.GroupBundle, int) bool) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1", pdAddr, apiPrefix) + expectResult := struct { + syncutil.RWMutex + val int + }{} + wg := sync.WaitGroup{} + + for i := 1; i <= 10; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + bundle := genBundle(i) + data, err := json.Marshal(bundle) + re.NoError(err) + for j := 0; j < 10; j++ { + expectResult.Lock() + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + expectResult.val = i + expectResult.Unlock() + } + }(i) + } + + wg.Wait() + expectResult.RLock() + defer expectResult.RUnlock() + re.NotZero(expectResult.val) + tu.Eventually(re, func() bool { + respBundle := make([]placement.GroupBundle, 0) + err := tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, + tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) + re.NoError(err) + re.Len(respBundle, 1) + return checkBundle(respBundle, expectResult.val) + }) +} + +func (suite *ruleTestSuite) TestLargeRules() { + suite.env.RunTestBasedOnMode(suite.checkLargeRules) +} + +func (suite *ruleTestSuite) checkLargeRules(cluster *tests.TestCluster) { + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1", pdAddr, apiPrefix) + genBundlesWithRulesNum := func(num int) []placement.GroupBundle { + bundle := []placement.GroupBundle{ + { + ID: "1", + Index: 1, + Rules: make([]*placement.Rule, 0), + }, + } + for i := 0; i < num; i++ { + bundle[0].Rules = append(bundle[0].Rules, &placement.Rule{ + ID: strconv.Itoa(i), Index: i, Role: placement.Voter, Count: 1, GroupID: "1", + StartKey: []byte(strconv.Itoa(i)), EndKey: []byte(strconv.Itoa(i + 1)), + }) + } + return bundle + } + suite.postAndCheckRuleBundle(urlPrefix, genBundlesWithRulesNum(etcdutil.MaxEtcdTxnOps/2)) + suite.postAndCheckRuleBundle(urlPrefix, genBundlesWithRulesNum(etcdutil.MaxEtcdTxnOps*2)) +} + +func assertBundleEqual(re *require.Assertions, url string, expectedBundle placement.GroupBundle) { + var bundle placement.GroupBundle + tu.Eventually(re, func() bool { + err := tu.ReadGetJSON(re, tests.TestDialClient, url, &bundle) + if err != nil { + return false + } + return compareBundle(bundle, expectedBundle) + }) +} + +func assertBundlesEqual(re *require.Assertions, url string, expectedBundles []placement.GroupBundle, expectedLen int) { + var bundles []placement.GroupBundle + tu.Eventually(re, func() bool { + err := tu.ReadGetJSON(re, tests.TestDialClient, url, &bundles) + if err != nil { + return false + } + if len(bundles) != expectedLen { + return false + } + sort.Slice(bundles, func(i, j int) bool { return bundles[i].ID < bundles[j].ID }) + sort.Slice(expectedBundles, func(i, j int) bool { return expectedBundles[i].ID < expectedBundles[j].ID }) + for i := range bundles { + if !compareBundle(bundles[i], expectedBundles[i]) { + return false + } + } + return true + }) +} + +func compareBundle(b1, b2 placement.GroupBundle) bool { + if b2.ID != b1.ID || b2.Index != b1.Index || b2.Override != b1.Override || len(b2.Rules) != len(b1.Rules) { + return false + } + sort.Slice(b1.Rules, func(i, j int) bool { return b1.Rules[i].ID < b1.Rules[j].ID }) + sort.Slice(b2.Rules, func(i, j int) bool { return b2.Rules[i].ID < b2.Rules[j].ID }) + for i := range b1.Rules { + if !compareRule(b1.Rules[i], b2.Rules[i]) { + return false + } + } + return true +} + +func compareRule(r1 *placement.Rule, r2 *placement.Rule) bool { + return r2.GroupID == r1.GroupID && + r2.ID == r1.ID && + r2.StartKeyHex == r1.StartKeyHex && + r2.EndKeyHex == r1.EndKeyHex && + r2.Role == r1.Role && + r2.Count == r1.Count +} + +func (suite *ruleTestSuite) postAndCheckRuleBundle(urlPrefix string, bundle []placement.GroupBundle) { + re := suite.Require() + data, err := json.Marshal(bundle) + re.NoError(err) + err = tu.CheckPostJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", data, tu.StatusOK(re)) + re.NoError(err) + + tu.Eventually(re, func() bool { + respBundle := make([]placement.GroupBundle, 0) + err = tu.CheckGetJSON(tests.TestDialClient, urlPrefix+"/config/placement-rule", nil, + tu.StatusOK(re), tu.ExtractJSON(re, &respBundle)) + re.NoError(err) + if len(respBundle) != len(bundle) { + return false + } + sort.Slice(respBundle, func(i, j int) bool { return respBundle[i].ID < respBundle[j].ID }) + sort.Slice(bundle, func(i, j int) bool { return bundle[i].ID < bundle[j].ID }) + for i := range respBundle { + if !compareBundle(respBundle[i], bundle[i]) { + return false + } + } + return true + }) +} + +type regionRuleTestSuite struct { + suite.Suite + env *tests.SchedulingTestEnvironment +} + +func TestRegionRuleTestSuite(t *testing.T) { + suite.Run(t, new(regionRuleTestSuite)) +} + +func (suite *regionRuleTestSuite) SetupSuite() { + suite.env = tests.NewSchedulingTestEnvironment(suite.T(), func(conf *config.Config, _ string) { + conf.Replication.EnablePlacementRules = true + conf.Replication.MaxReplicas = 1 + }) +} + +func (suite *regionRuleTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *regionRuleTestSuite) TestRegionPlacementRule() { + suite.env.RunTestBasedOnMode(suite.checkRegionPlacementRule) +} + +func (suite *regionRuleTestSuite) checkRegionPlacementRule(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + urlPrefix := fmt.Sprintf("%s%s/api/v1", pdAddr, apiPrefix) + + stores := []*metapb.Store{ + { + Id: 1, + Address: "tikv1", + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + Version: "2.0.0", + }, + { + Id: 2, + Address: "tikv2", + State: metapb.StoreState_Up, + NodeState: metapb.NodeState_Serving, + Version: "2.0.0", + }, + } + for _, store := range stores { + tests.MustPutStore(re, cluster, store) + } + regions := make([]*core.RegionInfo, 0) + peers1 := []*metapb.Peer{ + {Id: 102, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 103, StoreId: 2, Role: metapb.PeerRole_Voter}} + regions = append(regions, core.NewRegionInfo(&metapb.Region{Id: 1, Peers: peers1, RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}}, peers1[0], + core.WithStartKey([]byte("abc")), core.WithEndKey([]byte("def")))) + peers2 := []*metapb.Peer{ + {Id: 104, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 105, StoreId: 2, Role: metapb.PeerRole_Learner}} + regions = append(regions, core.NewRegionInfo(&metapb.Region{Id: 2, Peers: peers2, RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: 2}}, peers2[0], + core.WithStartKey([]byte("ghi")), core.WithEndKey([]byte("jkl")))) + peers3 := []*metapb.Peer{ + {Id: 106, StoreId: 1, Role: metapb.PeerRole_Voter}, + {Id: 107, StoreId: 2, Role: metapb.PeerRole_Learner}} + regions = append(regions, core.NewRegionInfo(&metapb.Region{Id: 3, Peers: peers3, RegionEpoch: &metapb.RegionEpoch{ConfVer: 3, Version: 3}}, peers3[0], + core.WithStartKey([]byte("mno")), core.WithEndKey([]byte("pqr")))) + for _, rg := range regions { + tests.MustPutRegionInfo(re, cluster, rg) + } + + ruleManager := leaderServer.GetRaftCluster().GetRuleManager() + ruleManager.SetRule(&placement.Rule{ + GroupID: "test", + ID: "test2", + StartKeyHex: hex.EncodeToString([]byte("ghi")), + EndKeyHex: hex.EncodeToString([]byte("jkl")), + Role: placement.Learner, + Count: 1, + }) + ruleManager.SetRule(&placement.Rule{ + GroupID: "test", + ID: "test3", + StartKeyHex: hex.EncodeToString([]byte("ooo")), + EndKeyHex: hex.EncodeToString([]byte("ppp")), + Role: placement.Learner, + Count: 1, + }) + fit := &placement.RegionFit{} + + u := fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 1) + err := tu.ReadGetJSON(re, tests.TestDialClient, u, fit) + re.NoError(err) + re.Len(fit.RuleFits, 1) + re.Len(fit.OrphanPeers, 1) + u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 2) + fit = &placement.RegionFit{} + err = tu.ReadGetJSON(re, tests.TestDialClient, u, fit) + re.NoError(err) + re.Len(fit.RuleFits, 2) + re.Empty(fit.OrphanPeers) + u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 3) + fit = &placement.RegionFit{} + err = tu.ReadGetJSON(re, tests.TestDialClient, u, fit) + re.NoError(err) + re.Empty(fit.RuleFits) + re.Len(fit.OrphanPeers, 2) + + var label labeler.LabelRule + escapedID := url.PathEscape("keyspaces/0") + u = fmt.Sprintf("%s/config/region-label/rule/%s", urlPrefix, escapedID) + err = tu.ReadGetJSON(re, tests.TestDialClient, u, &label) + re.NoError(err) + re.Equal("keyspaces/0", label.ID) + + var labels []labeler.LabelRule + u = fmt.Sprintf("%s/config/region-label/rules", urlPrefix) + err = tu.ReadGetJSON(re, tests.TestDialClient, u, &labels) + re.NoError(err) + re.Len(labels, 1) + re.Equal("keyspaces/0", labels[0].ID) + + u = fmt.Sprintf("%s/config/region-label/rules/ids", urlPrefix) + err = tu.CheckGetJSON(tests.TestDialClient, u, []byte(`["rule1", "rule3"]`), func(resp []byte, _ int, _ http.Header) { + err := json.Unmarshal(resp, &labels) + re.NoError(err) + re.Empty(labels) + }) + re.NoError(err) + + err = tu.CheckGetJSON(tests.TestDialClient, u, []byte(`["keyspaces/0"]`), func(resp []byte, _ int, _ http.Header) { + err := json.Unmarshal(resp, &labels) + re.NoError(err) + re.Len(labels, 1) + re.Equal("keyspaces/0", labels[0].ID) + }) + re.NoError(err) + + u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 4) + err = tu.CheckGetJSON(tests.TestDialClient, u, nil, tu.Status(re, http.StatusNotFound), tu.StringContain( + re, "region 4 not found")) + re.NoError(err) + + u = fmt.Sprintf("%s/config/rules/region/%s/detail", urlPrefix, "id") + err = tu.CheckGetJSON(tests.TestDialClient, u, nil, tu.Status(re, http.StatusBadRequest), tu.StringContain( + re, errs.ErrRegionInvalidID.Error())) + re.NoError(err) + + data := make(map[string]any) + data["enable-placement-rules"] = "false" + reqData, e := json.Marshal(data) + re.NoError(e) + u = fmt.Sprintf("%s/config", urlPrefix) + err = tu.CheckPostJSON(tests.TestDialClient, u, reqData, tu.StatusOK(re)) + re.NoError(err) + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + // wait for the scheduling server to update the config + tu.Eventually(re, func() bool { + return !sche.GetCluster().GetCheckerConfig().IsPlacementRulesEnabled() + }) + } + u = fmt.Sprintf("%s/config/rules/region/%d/detail", urlPrefix, 1) + err = tu.CheckGetJSON(tests.TestDialClient, u, nil, tu.Status(re, http.StatusPreconditionFailed), tu.StringContain( + re, "placement rules feature is disabled")) + re.NoError(err) +} diff --git a/tests/testutil.go b/tests/testutil.go new file mode 100644 index 00000000000..22a5ab40a7e --- /dev/null +++ b/tests/testutil.go @@ -0,0 +1,467 @@ +// Copyright 2023 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tests + +import ( + "context" + "fmt" + "math/rand" + "net" + "net/http" + "os" + "runtime" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/docker/go-units" + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/log" + "github.com/stretchr/testify/require" + bs "github.com/tikv/pd/pkg/basicserver" + "github.com/tikv/pd/pkg/core" + rm "github.com/tikv/pd/pkg/mcs/resourcemanager/server" + scheduling "github.com/tikv/pd/pkg/mcs/scheduling/server" + sc "github.com/tikv/pd/pkg/mcs/scheduling/server/config" + tso "github.com/tikv/pd/pkg/mcs/tso/server" + "github.com/tikv/pd/pkg/mcs/utils/constant" + "github.com/tikv/pd/pkg/mock/mockid" + "github.com/tikv/pd/pkg/utils/logutil" + "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/versioninfo" + "github.com/tikv/pd/server" + "go.uber.org/zap" +) + +var ( + // TestDialClient is a http client for test. + TestDialClient = &http.Client{ + Transport: &http.Transport{ + DisableKeepAlives: true, + }, + } + + testPortMutex sync.Mutex + testPortMap = make(map[string]struct{}) +) + +// SetRangePort sets the range of ports for test. +func SetRangePort(start, end int) { + portRange := []int{start, end} + dialContext := func(ctx context.Context, network, addr string) (net.Conn, error) { + dialer := &net.Dialer{} + randomPort := strconv.Itoa(rand.Intn(portRange[1]-portRange[0]) + portRange[0]) + testPortMutex.Lock() + for i := 0; i < 10; i++ { + if _, ok := testPortMap[randomPort]; !ok { + break + } + randomPort = strconv.Itoa(rand.Intn(portRange[1]-portRange[0]) + portRange[0]) + } + testPortMutex.Unlock() + localAddr, err := net.ResolveTCPAddr(network, "0.0.0.0:"+randomPort) + if err != nil { + return nil, err + } + dialer.LocalAddr = localAddr + return dialer.DialContext(ctx, network, addr) + } + + TestDialClient.Transport = &http.Transport{ + DisableKeepAlives: true, + DialContext: dialContext, + } +} + +var once sync.Once + +// InitLogger initializes the logger for test. +func InitLogger(logConfig log.Config, logger *zap.Logger, logProps *log.ZapProperties, redactInfoLog logutil.RedactInfoLogType) (err error) { + once.Do(func() { + // Setup the logger. + err = logutil.SetupLogger(logConfig, &logger, &logProps, redactInfoLog) + if err != nil { + return + } + log.ReplaceGlobals(logger, logProps) + // Flushing any buffered log entries. + log.Sync() + }) + return err +} + +// StartSingleResourceManagerTestServer creates and starts a resource manager server with default config for testing. +func StartSingleResourceManagerTestServer(ctx context.Context, re *require.Assertions, backendEndpoints, listenAddrs string) (*rm.Server, func()) { + cfg := rm.NewConfig() + cfg.BackendEndpoints = backendEndpoints + cfg.ListenAddr = listenAddrs + cfg.Name = cfg.ListenAddr + cfg, err := rm.GenerateConfig(cfg) + re.NoError(err) + + s, cleanup, err := rm.NewTestServer(ctx, re, cfg) + re.NoError(err) + testutil.Eventually(re, func() bool { + return !s.IsClosed() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + + return s, cleanup +} + +// StartSingleTSOTestServerWithoutCheck creates and starts a tso server with default config for testing. +func StartSingleTSOTestServerWithoutCheck(ctx context.Context, re *require.Assertions, backendEndpoints, listenAddrs string) (*tso.Server, func(), error) { + cfg := tso.NewConfig() + cfg.BackendEndpoints = backendEndpoints + cfg.ListenAddr = listenAddrs + cfg.Name = cfg.ListenAddr + cfg, err := tso.GenerateConfig(cfg) + re.NoError(err) + // Setup the logger. + err = InitLogger(cfg.Log, cfg.Logger, cfg.LogProps, cfg.Security.RedactInfoLog) + re.NoError(err) + return NewTSOTestServer(ctx, cfg) +} + +// StartSingleTSOTestServer creates and starts a tso server with default config for testing. +func StartSingleTSOTestServer(ctx context.Context, re *require.Assertions, backendEndpoints, listenAddrs string) (*tso.Server, func()) { + s, cleanup, err := StartSingleTSOTestServerWithoutCheck(ctx, re, backendEndpoints, listenAddrs) + re.NoError(err) + testutil.Eventually(re, func() bool { + return !s.IsClosed() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + + return s, cleanup +} + +// NewTSOTestServer creates a tso server with given config for testing. +func NewTSOTestServer(ctx context.Context, cfg *tso.Config) (*tso.Server, testutil.CleanupFunc, error) { + s := tso.CreateServer(ctx, cfg) + if err := s.Run(); err != nil { + return nil, nil, err + } + cleanup := func() { + s.Close() + os.RemoveAll(cfg.DataDir) + } + return s, cleanup, nil +} + +// StartSingleSchedulingTestServer creates and starts a scheduling server with default config for testing. +func StartSingleSchedulingTestServer(ctx context.Context, re *require.Assertions, backendEndpoints, listenAddrs string) (*scheduling.Server, func()) { + cfg := sc.NewConfig() + cfg.BackendEndpoints = backendEndpoints + cfg.ListenAddr = listenAddrs + cfg.Name = cfg.ListenAddr + cfg, err := scheduling.GenerateConfig(cfg) + re.NoError(err) + + s, cleanup, err := scheduling.NewTestServer(ctx, re, cfg) + re.NoError(err) + testutil.Eventually(re, func() bool { + return !s.IsClosed() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + + return s, cleanup +} + +// NewSchedulingTestServer creates a scheduling server with given config for testing. +func NewSchedulingTestServer(ctx context.Context, cfg *sc.Config) (*scheduling.Server, testutil.CleanupFunc, error) { + s := scheduling.CreateServer(ctx, cfg) + if err := s.Run(); err != nil { + return nil, nil, err + } + cleanup := func() { + s.Close() + os.RemoveAll(cfg.DataDir) + } + return s, cleanup, nil +} + +// WaitForPrimaryServing waits for one of servers being elected to be the primary/leader +func WaitForPrimaryServing(re *require.Assertions, serverMap map[string]bs.Server) string { + var primary string + testutil.Eventually(re, func() bool { + for name, s := range serverMap { + if s.IsServing() { + primary = name + return true + } + } + return false + }, testutil.WithWaitFor(10*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + + return primary +} + +// MustPutStore is used for test purpose. +func MustPutStore(re *require.Assertions, cluster *TestCluster, store *metapb.Store) { + store.Address = fmt.Sprintf("tikv%d", store.GetId()) + if len(store.Version) == 0 { + store.Version = versioninfo.MinSupportedVersion(versioninfo.Version2_0).String() + } + svr := cluster.GetLeaderServer().GetServer() + grpcServer := &server.GrpcServer{Server: svr} + _, err := grpcServer.PutStore(context.Background(), &pdpb.PutStoreRequest{ + Header: &pdpb.RequestHeader{ClusterId: svr.ClusterID()}, + Store: store, + }) + re.NoError(err) + + ts := store.GetLastHeartbeat() + if ts == 0 { + ts = time.Now().UnixNano() + } + storeInfo := grpcServer.GetRaftCluster().GetStore(store.GetId()) + newStore := storeInfo.Clone( + core.SetStoreStats(&pdpb.StoreStats{ + Capacity: uint64(10 * units.GiB), + UsedSize: uint64(9 * units.GiB), + Available: uint64(1 * units.GiB), + }), + core.SetLastHeartbeatTS(time.Unix(ts/1e9, ts%1e9)), + ) + grpcServer.GetRaftCluster().GetBasicCluster().PutStore(newStore) + if cluster.GetSchedulingPrimaryServer() != nil { + cluster.GetSchedulingPrimaryServer().GetCluster().PutStore(newStore) + } +} + +// MustPutRegion is used for test purpose. +func MustPutRegion(re *require.Assertions, cluster *TestCluster, regionID, storeID uint64, start, end []byte, opts ...core.RegionCreateOption) *core.RegionInfo { + leader := &metapb.Peer{ + Id: regionID, + StoreId: storeID, + } + metaRegion := &metapb.Region{ + Id: regionID, + StartKey: start, + EndKey: end, + Peers: []*metapb.Peer{leader}, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + } + opts = append(opts, core.SetSource(core.Heartbeat)) + r := core.NewRegionInfo(metaRegion, leader, opts...) + MustPutRegionInfo(re, cluster, r) + return r +} + +// MustPutRegionInfo is used for test purpose. +func MustPutRegionInfo(re *require.Assertions, cluster *TestCluster, regionInfo *core.RegionInfo) { + err := cluster.HandleRegionHeartbeat(regionInfo) + re.NoError(err) + if cluster.GetSchedulingPrimaryServer() != nil { + err = cluster.GetSchedulingPrimaryServer().GetCluster().HandleRegionHeartbeat(regionInfo) + re.NoError(err) + } +} + +// MustReportBuckets is used for test purpose. +func MustReportBuckets(re *require.Assertions, cluster *TestCluster, regionID uint64, start, end []byte, stats *metapb.BucketStats) *metapb.Buckets { + buckets := &metapb.Buckets{ + RegionId: regionID, + Version: 1, + Keys: [][]byte{start, end}, + Stats: stats, + // report buckets interval is 10s + PeriodInMs: 10000, + } + err := cluster.HandleReportBuckets(buckets) + re.NoError(err) + // TODO: forwards to scheduling server after it supports buckets + return buckets +} + +// SchedulerMode is used for test purpose. +type SchedulerMode int + +const ( + // Both represents both PD mode and API mode. + Both SchedulerMode = iota + // PDMode represents PD mode. + PDMode + // APIMode represents API mode. + APIMode +) + +// SchedulingTestEnvironment is used for test purpose. +type SchedulingTestEnvironment struct { + t *testing.T + opts []ConfigOption + clusters map[SchedulerMode]*TestCluster + cancels []context.CancelFunc + RunMode SchedulerMode +} + +// NewSchedulingTestEnvironment is to create a new SchedulingTestEnvironment. +func NewSchedulingTestEnvironment(t *testing.T, opts ...ConfigOption) *SchedulingTestEnvironment { + return &SchedulingTestEnvironment{ + t: t, + opts: opts, + clusters: make(map[SchedulerMode]*TestCluster), + cancels: make([]context.CancelFunc, 0), + } +} + +// RunTestBasedOnMode runs test based on mode. +// If mode not set, it will run test in both PD mode and API mode. +func (s *SchedulingTestEnvironment) RunTestBasedOnMode(test func(*TestCluster)) { + switch s.RunMode { + case PDMode: + s.RunTestInPDMode(test) + case APIMode: + s.RunTestInAPIMode(test) + default: + s.RunTestInPDMode(test) + s.RunTestInAPIMode(test) + } +} + +// RunTestInPDMode is to run test in pd mode. +func (s *SchedulingTestEnvironment) RunTestInPDMode(test func(*TestCluster)) { + s.t.Logf("start test %s in pd mode", getTestName()) + if _, ok := s.clusters[PDMode]; !ok { + s.startCluster(PDMode) + } + test(s.clusters[PDMode]) +} + +func getTestName() string { + pc, _, _, _ := runtime.Caller(2) + caller := runtime.FuncForPC(pc) + if caller == nil || strings.Contains(caller.Name(), "RunTestBasedOnMode") { + pc, _, _, _ = runtime.Caller(3) + caller = runtime.FuncForPC(pc) + } + if caller != nil { + elements := strings.Split(caller.Name(), ".") + return elements[len(elements)-1] + } + return "" +} + +// RunTestInAPIMode is to run test in api mode. +func (s *SchedulingTestEnvironment) RunTestInAPIMode(test func(*TestCluster)) { + re := require.New(s.t) + re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs", `return(true)`)) + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/mcs/scheduling/server/fastUpdateMember", `return(true)`)) + defer func() { + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/mcs/scheduling/server/fastUpdateMember")) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/highFrequencyClusterJobs")) + }() + s.t.Logf("start test %s in api mode", getTestName()) + if _, ok := s.clusters[APIMode]; !ok { + s.startCluster(APIMode) + } + test(s.clusters[APIMode]) +} + +// Cleanup is to cleanup the environment. +func (s *SchedulingTestEnvironment) Cleanup() { + for _, cluster := range s.clusters { + cluster.Destroy() + } + for _, cancel := range s.cancels { + cancel() + } +} + +func (s *SchedulingTestEnvironment) startCluster(m SchedulerMode) { + re := require.New(s.t) + ctx, cancel := context.WithCancel(context.Background()) + s.cancels = append(s.cancels, cancel) + switch m { + case PDMode: + cluster, err := NewTestCluster(ctx, 1, s.opts...) + re.NoError(err) + err = cluster.RunInitialServers() + re.NoError(err) + re.NotEmpty(cluster.WaitLeader()) + leaderServer := cluster.GetServer(cluster.GetLeader()) + re.NoError(leaderServer.BootstrapCluster()) + s.clusters[PDMode] = cluster + case APIMode: + cluster, err := NewTestAPICluster(ctx, 1, s.opts...) + re.NoError(err) + err = cluster.RunInitialServers() + re.NoError(err) + re.NotEmpty(cluster.WaitLeader()) + leaderServer := cluster.GetServer(cluster.GetLeader()) + re.NoError(leaderServer.BootstrapCluster()) + leaderServer.GetRaftCluster().SetPrepared() + // start scheduling cluster + tc, err := NewTestSchedulingCluster(ctx, 1, leaderServer.GetAddr()) + re.NoError(err) + tc.WaitForPrimaryServing(re) + tc.GetPrimaryServer().GetCluster().SetPrepared() + cluster.SetSchedulingCluster(tc) + time.Sleep(200 * time.Millisecond) // wait for scheduling cluster to update member + testutil.Eventually(re, func() bool { + return cluster.GetLeaderServer().GetServer().GetRaftCluster().IsServiceIndependent(constant.SchedulingServiceName) + }) + s.clusters[APIMode] = cluster + } +} + +type idAllocator struct { + allocator *mockid.IDAllocator +} + +func (i *idAllocator) alloc() uint64 { + v, _ := i.allocator.Alloc() + return v +} + +// InitRegions is used for test purpose. +func InitRegions(regionLen int) []*core.RegionInfo { + allocator := &idAllocator{allocator: mockid.NewIDAllocator()} + regions := make([]*core.RegionInfo, 0, regionLen) + for i := 0; i < regionLen; i++ { + r := &metapb.Region{ + Id: allocator.alloc(), + RegionEpoch: &metapb.RegionEpoch{ + ConfVer: 1, + Version: 1, + }, + StartKey: []byte{byte(i)}, + EndKey: []byte{byte(i + 1)}, + Peers: []*metapb.Peer{ + {Id: allocator.alloc(), StoreId: uint64(1)}, + {Id: allocator.alloc(), StoreId: uint64(2)}, + {Id: allocator.alloc(), StoreId: uint64(3)}, + }, + } + if i == 0 { + r.StartKey = []byte{} + } else if i == regionLen-1 { + r.EndKey = []byte{} + } + region := core.NewRegionInfo(r, r.Peers[0], core.SetSource(core.Heartbeat)) + // Here is used to simulate the upgrade process. + if i < regionLen/2 { + buckets := &metapb.Buckets{ + RegionId: r.Id, + Keys: [][]byte{r.StartKey, r.EndKey}, + Version: 1, + } + region.UpdateBuckets(buckets, region.GetBuckets()) + } + regions = append(regions, region) + } + return regions +} diff --git a/tools/pd-ctl/tests/config/config_test.go b/tools/pd-ctl/tests/config/config_test.go new file mode 100644 index 00000000000..17fd0b4ec63 --- /dev/null +++ b/tools/pd-ctl/tests/config/config_test.go @@ -0,0 +1,1209 @@ +// Copyright 2019 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config_test + +import ( + "context" + "encoding/json" + "net/http" + "os" + "reflect" + "strconv" + "strings" + "testing" + "time" + + "github.com/coreos/go-semver/semver" + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/schedule/placement" + "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/utils/typeutil" + "github.com/tikv/pd/server/config" + pdTests "github.com/tikv/pd/tests" + ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" + "github.com/tikv/pd/tools/pd-ctl/tests" +) + +// testDialClient used to dial http request. only used for test. +var testDialClient = &http.Client{ + Transport: &http.Transport{ + DisableKeepAlives: true, + }, +} + +type testCase struct { + name string + value any + read func(scheduleConfig *sc.ScheduleConfig) any +} + +func (t *testCase) judge(re *require.Assertions, scheduleConfigs ...*sc.ScheduleConfig) { + value := t.value + for _, scheduleConfig := range scheduleConfigs { + re.NotNil(scheduleConfig) + re.IsType(value, t.read(scheduleConfig)) + } +} + +type configTestSuite struct { + suite.Suite + env *pdTests.SchedulingTestEnvironment +} + +func TestConfigTestSuite(t *testing.T) { + suite.Run(t, new(configTestSuite)) +} + +func (suite *configTestSuite) SetupTest() { + // use a new environment to avoid affecting other tests + suite.env = pdTests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *configTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *configTestSuite) TearDownTest() { + re := suite.Require() + cleanFunc := func(cluster *pdTests.TestCluster) { + def := placement.GroupBundle{ + ID: "pd", + Rules: []*placement.Rule{ + {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + }, + } + data, err := json.Marshal([]placement.GroupBundle{def}) + re.NoError(err) + leader := cluster.GetLeaderServer() + re.NotNil(leader) + urlPrefix := leader.GetAddr() + err = testutil.CheckPostJSON(testDialClient, urlPrefix+"/pd/api/v1/config/placement-rule", data, testutil.StatusOK(re)) + re.NoError(err) + } + suite.env.RunTestBasedOnMode(cleanFunc) + suite.env.Cleanup() +} + +func (suite *configTestSuite) TestConfig() { + re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/dashboard/adapter/skipDashboardLoop", `return(true)`)) + suite.env.RunTestBasedOnMode(suite.checkConfig) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/dashboard/adapter/skipDashboardLoop")) +} + +func (suite *configTestSuite) checkConfig(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + store := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + } + svr := leaderServer.GetServer() + pdTests.MustPutStore(re, cluster, store) + + // config show + args := []string{"-u", pdAddr, "config", "show"} + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + cfg := config.Config{} + re.NoError(json.Unmarshal(output, &cfg)) + scheduleConfig := svr.GetScheduleConfig() + + // hidden config + scheduleConfig.Schedulers = nil + scheduleConfig.StoreLimit = nil + scheduleConfig.SchedulerMaxWaitingOperator = 0 + scheduleConfig.EnableRemoveDownReplica = false + scheduleConfig.EnableReplaceOfflineReplica = false + scheduleConfig.EnableMakeUpReplica = false + scheduleConfig.EnableRemoveExtraReplica = false + scheduleConfig.EnableLocationReplacement = false + re.Equal(uint64(0), scheduleConfig.MaxMergeRegionKeys) + // The result of config show doesn't be 0. + scheduleConfig.MaxMergeRegionKeys = scheduleConfig.GetMaxMergeRegionKeys() + re.Equal(scheduleConfig, &cfg.Schedule) + re.Equal(svr.GetReplicationConfig(), &cfg.Replication) + + // config set trace-region-flow + args = []string{"-u", pdAddr, "config", "set", "trace-region-flow", "false"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.False(svr.GetPDServerConfig().TraceRegionFlow) + + origin := svr.GetPDServerConfig().FlowRoundByDigit + args = []string{"-u", pdAddr, "config", "set", "flow-round-by-digit", "10"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal(10, svr.GetPDServerConfig().FlowRoundByDigit) + + args = []string{"-u", pdAddr, "config", "set", "flow-round-by-digit", "-10"} + _, err = tests.ExecuteCommand(cmd, args...) + re.Error(err) + + args = []string{"-u", pdAddr, "config", "set", "flow-round-by-digit", strconv.Itoa(origin)} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "show", "server") + re.NoError(err) + var conf config.PDServerConfig + re.NoError(json.Unmarshal(output, &conf)) + return conf.FlowRoundByDigit == origin + }) + + // config show schedule + args = []string{"-u", pdAddr, "config", "show", "schedule"} + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + scheduleCfg := sc.ScheduleConfig{} + re.NoError(json.Unmarshal(output, &scheduleCfg)) + scheduleConfig = svr.GetScheduleConfig() + scheduleConfig.MaxMergeRegionKeys = scheduleConfig.GetMaxMergeRegionKeys() + re.Equal(scheduleConfig, &scheduleCfg) + + // After https://github.com/tikv/tikv/issues/17309, the default value is enlarged from 20 to 54, + // to make it compatible with the default value of region size of tikv. + re.Equal(54, int(svr.GetScheduleConfig().MaxMergeRegionSize)) + re.Equal(0, int(svr.GetScheduleConfig().MaxMergeRegionKeys)) + re.Equal(54*10000, int(svr.GetScheduleConfig().GetMaxMergeRegionKeys())) + + // set max-merge-region-size to 40MB + args = []string{"-u", pdAddr, "config", "set", "max-merge-region-size", "40"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal(40, int(svr.GetScheduleConfig().MaxMergeRegionSize)) + re.Equal(0, int(svr.GetScheduleConfig().MaxMergeRegionKeys)) + re.Equal(40*10000, int(svr.GetScheduleConfig().GetMaxMergeRegionKeys())) + args = []string{"-u", pdAddr, "config", "set", "max-merge-region-keys", "200000"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal(20*10000, int(svr.GetScheduleConfig().MaxMergeRegionKeys)) + re.Equal(20*10000, int(svr.GetScheduleConfig().GetMaxMergeRegionKeys())) + + // set store limit v2 + args = []string{"-u", pdAddr, "config", "set", "store-limit-version", "v2"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal("v2", svr.GetScheduleConfig().StoreLimitVersion) + args = []string{"-u", pdAddr, "config", "set", "store-limit-version", "v1"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal("v1", svr.GetScheduleConfig().StoreLimitVersion) + + // config show replication + args = []string{"-u", pdAddr, "config", "show", "replication"} + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + replicationCfg := sc.ReplicationConfig{} + re.NoError(json.Unmarshal(output, &replicationCfg)) + re.Equal(svr.GetReplicationConfig(), &replicationCfg) + + // config show cluster-version + args1 := []string{"-u", pdAddr, "config", "show", "cluster-version"} + output, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + clusterVersion := semver.Version{} + re.NoError(json.Unmarshal(output, &clusterVersion)) + re.Equal(svr.GetClusterVersion(), clusterVersion) + + // config set cluster-version + args2 := []string{"-u", pdAddr, "config", "set", "cluster-version", "2.1.0-rc.5"} + _, err = tests.ExecuteCommand(cmd, args2...) + re.NoError(err) + re.NotEqual(svr.GetClusterVersion(), clusterVersion) + output, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + clusterVersion = semver.Version{} + re.NoError(json.Unmarshal(output, &clusterVersion)) + re.Equal(svr.GetClusterVersion(), clusterVersion) + + // config show label-property + args1 = []string{"-u", pdAddr, "config", "show", "label-property"} + output, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + labelPropertyCfg := config.LabelPropertyConfig{} + re.NoError(json.Unmarshal(output, &labelPropertyCfg)) + re.Equal(svr.GetLabelProperty(), labelPropertyCfg) + + // config set label-property + args2 = []string{"-u", pdAddr, "config", "set", "label-property", "reject-leader", "zone", "cn"} + _, err = tests.ExecuteCommand(cmd, args2...) + re.NoError(err) + re.NotEqual(svr.GetLabelProperty(), labelPropertyCfg) + output, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + labelPropertyCfg = config.LabelPropertyConfig{} + re.NoError(json.Unmarshal(output, &labelPropertyCfg)) + re.Equal(svr.GetLabelProperty(), labelPropertyCfg) + + // config delete label-property + args3 := []string{"-u", pdAddr, "config", "delete", "label-property", "reject-leader", "zone", "cn"} + _, err = tests.ExecuteCommand(cmd, args3...) + re.NoError(err) + re.NotEqual(svr.GetLabelProperty(), labelPropertyCfg) + output, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + labelPropertyCfg = config.LabelPropertyConfig{} + re.NoError(json.Unmarshal(output, &labelPropertyCfg)) + re.Equal(svr.GetLabelProperty(), labelPropertyCfg) + + // config set min-resolved-ts-persistence-interval + args = []string{"-u", pdAddr, "config", "set", "min-resolved-ts-persistence-interval", "1s"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal(typeutil.NewDuration(time.Second), svr.GetPDServerConfig().MinResolvedTSPersistenceInterval) + + // config set max-store-preparing-time 10m + args = []string{"-u", pdAddr, "config", "set", "max-store-preparing-time", "10m"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal(typeutil.NewDuration(10*time.Minute), svr.GetScheduleConfig().MaxStorePreparingTime) + + args = []string{"-u", pdAddr, "config", "set", "max-store-preparing-time", "0s"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Equal(typeutil.NewDuration(0), svr.GetScheduleConfig().MaxStorePreparingTime) + + // test config read and write + testCases := []testCase{ + {"leader-schedule-limit", uint64(64), func(scheduleConfig *sc.ScheduleConfig) any { + return scheduleConfig.LeaderScheduleLimit + }}, {"hot-region-schedule-limit", uint64(64), func(scheduleConfig *sc.ScheduleConfig) any { + return scheduleConfig.HotRegionScheduleLimit + }}, {"hot-region-cache-hits-threshold", uint64(5), func(scheduleConfig *sc.ScheduleConfig) any { + return scheduleConfig.HotRegionCacheHitsThreshold + }}, {"enable-remove-down-replica", false, func(scheduleConfig *sc.ScheduleConfig) any { + return scheduleConfig.EnableRemoveDownReplica + }}, + {"enable-debug-metrics", true, func(scheduleConfig *sc.ScheduleConfig) any { + return scheduleConfig.EnableDebugMetrics + }}, + // set again + {"enable-debug-metrics", true, func(scheduleConfig *sc.ScheduleConfig) any { + return scheduleConfig.EnableDebugMetrics + }}, + } + for _, testCase := range testCases { + // write + args1 = []string{"-u", pdAddr, "config", "set", testCase.name, reflect.TypeOf(testCase.value).String()} + _, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + // read + args2 = []string{"-u", pdAddr, "config", "show"} + output, err = tests.ExecuteCommand(cmd, args2...) + re.NoError(err) + cfg = config.Config{} + re.NoError(json.Unmarshal(output, &cfg)) + // judge + testCase.judge(re, &cfg.Schedule, svr.GetScheduleConfig()) + } + + // test error or deprecated config name + args1 = []string{"-u", pdAddr, "config", "set", "foo-bar", "1"} + output, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + re.Contains(string(output), "not found") + args1 = []string{"-u", pdAddr, "config", "set", "disable-remove-down-replica", "true"} + output, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + re.Contains(string(output), "already been deprecated") + + // set enable-placement-rules twice, make sure it does not return error. + args1 = []string{"-u", pdAddr, "config", "set", "enable-placement-rules", "true"} + _, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + args1 = []string{"-u", pdAddr, "config", "set", "enable-placement-rules", "true"} + _, err = tests.ExecuteCommand(cmd, args1...) + re.NoError(err) + + // test invalid value + argsInvalid := []string{"-u", pdAddr, "config", "set", "leader-schedule-policy", "aaa"} + output, err = tests.ExecuteCommand(cmd, argsInvalid...) + re.NoError(err) + re.Contains(string(output), "is invalid") + argsInvalid = []string{"-u", pdAddr, "config", "set", "key-type", "aaa"} + output, err = tests.ExecuteCommand(cmd, argsInvalid...) + re.NoError(err) + re.Contains(string(output), "is invalid") +} + +func (suite *configTestSuite) TestConfigForwardControl() { + re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/pkg/dashboard/adapter/skipDashboardLoop", `return(true)`)) + suite.env.RunTestBasedOnMode(suite.checkConfigForwardControl) + re.NoError(failpoint.Disable("github.com/tikv/pd/pkg/dashboard/adapter/skipDashboardLoop")) +} + +func (suite *configTestSuite) checkConfigForwardControl(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + + f, _ := os.CreateTemp("", "pd_tests") + fname := f.Name() + f.Close() + defer os.RemoveAll(fname) + + checkScheduleConfig := func(scheduleCfg *sc.ScheduleConfig, isFromAPIServer bool) { + if schedulingServer := cluster.GetSchedulingPrimaryServer(); schedulingServer != nil { + if isFromAPIServer { + re.Equal(scheduleCfg.LeaderScheduleLimit, leaderServer.GetPersistOptions().GetLeaderScheduleLimit()) + re.NotEqual(scheduleCfg.LeaderScheduleLimit, schedulingServer.GetPersistConfig().GetLeaderScheduleLimit()) + } else { + re.Equal(scheduleCfg.LeaderScheduleLimit, schedulingServer.GetPersistConfig().GetLeaderScheduleLimit()) + re.NotEqual(scheduleCfg.LeaderScheduleLimit, leaderServer.GetPersistOptions().GetLeaderScheduleLimit()) + } + } else { + re.Equal(scheduleCfg.LeaderScheduleLimit, leaderServer.GetPersistOptions().GetLeaderScheduleLimit()) + } + } + + checkReplicateConfig := func(replicationCfg *sc.ReplicationConfig, isFromAPIServer bool) { + if schedulingServer := cluster.GetSchedulingPrimaryServer(); schedulingServer != nil { + if isFromAPIServer { + re.Equal(replicationCfg.MaxReplicas, uint64(leaderServer.GetPersistOptions().GetMaxReplicas())) + re.NotEqual(int(replicationCfg.MaxReplicas), schedulingServer.GetPersistConfig().GetMaxReplicas()) + } else { + re.Equal(int(replicationCfg.MaxReplicas), schedulingServer.GetPersistConfig().GetMaxReplicas()) + re.NotEqual(replicationCfg.MaxReplicas, uint64(leaderServer.GetPersistOptions().GetMaxReplicas())) + } + } else { + re.Equal(replicationCfg.MaxReplicas, uint64(leaderServer.GetPersistOptions().GetMaxReplicas())) + } + } + + checkRules := func(rules []*placement.Rule, isFromAPIServer bool) { + apiRules := leaderServer.GetRaftCluster().GetRuleManager().GetAllRules() + if schedulingServer := cluster.GetSchedulingPrimaryServer(); schedulingServer != nil { + schedulingRules := schedulingServer.GetCluster().GetRuleManager().GetAllRules() + if isFromAPIServer { + re.Len(apiRules, len(rules)) + re.NotEqual(len(schedulingRules), len(rules)) + } else { + re.Len(schedulingRules, len(rules)) + re.NotEqual(len(apiRules), len(rules)) + } + } else { + re.Len(apiRules, len(rules)) + } + } + + checkGroup := func(group placement.RuleGroup, isFromAPIServer bool) { + apiGroup := leaderServer.GetRaftCluster().GetRuleManager().GetRuleGroup(placement.DefaultGroupID) + if schedulingServer := cluster.GetSchedulingPrimaryServer(); schedulingServer != nil { + schedulingGroup := schedulingServer.GetCluster().GetRuleManager().GetRuleGroup(placement.DefaultGroupID) + if isFromAPIServer { + re.Equal(apiGroup.Index, group.Index) + re.NotEqual(schedulingGroup.Index, group.Index) + } else { + re.Equal(schedulingGroup.Index, group.Index) + re.NotEqual(apiGroup.Index, group.Index) + } + } else { + re.Equal(apiGroup.Index, group.Index) + } + } + + testConfig := func(options ...string) { + for _, isFromAPIServer := range []bool{true, false} { + cmd := ctl.GetRootCmd() + args := []string{"-u", pdAddr, "config", "show"} + args = append(args, options...) + if isFromAPIServer { + args = append(args, "--from_api_server") + } + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + if len(options) == 0 || options[0] == "all" { + cfg := config.Config{} + re.NoError(json.Unmarshal(output, &cfg)) + checkReplicateConfig(&cfg.Replication, isFromAPIServer) + checkScheduleConfig(&cfg.Schedule, isFromAPIServer) + } else if options[0] == "replication" { + replicationCfg := &sc.ReplicationConfig{} + re.NoError(json.Unmarshal(output, replicationCfg)) + checkReplicateConfig(replicationCfg, isFromAPIServer) + } else if options[0] == "schedule" { + scheduleCfg := &sc.ScheduleConfig{} + re.NoError(json.Unmarshal(output, scheduleCfg)) + checkScheduleConfig(scheduleCfg, isFromAPIServer) + } else { + re.Fail("no implement") + } + } + } + + testRules := func(options ...string) { + for _, isFromAPIServer := range []bool{true, false} { + cmd := ctl.GetRootCmd() + args := []string{"-u", pdAddr, "config", "placement-rules"} + args = append(args, options...) + if isFromAPIServer { + args = append(args, "--from_api_server") + } + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + if options[0] == "show" { + var rules []*placement.Rule + re.NoError(json.Unmarshal(output, &rules)) + checkRules(rules, isFromAPIServer) + } else if options[0] == "load" { + var rules []*placement.Rule + b, _ := os.ReadFile(fname) + re.NoError(json.Unmarshal(b, &rules)) + checkRules(rules, isFromAPIServer) + } else if options[0] == "rule-group" { + var group placement.RuleGroup + re.NoError(json.Unmarshal(output, &group), string(output)) + checkGroup(group, isFromAPIServer) + } else if options[0] == "rule-bundle" && options[1] == "get" { + var bundle placement.GroupBundle + re.NoError(json.Unmarshal(output, &bundle), string(output)) + checkRules(bundle.Rules, isFromAPIServer) + } else if options[0] == "rule-bundle" && options[1] == "load" { + var bundles []placement.GroupBundle + b, _ := os.ReadFile(fname) + re.NoError(json.Unmarshal(b, &bundles), string(output)) + checkRules(bundles[0].Rules, isFromAPIServer) + } else { + re.Fail("no implement") + } + } + } + + // Test Config + // inject different config to scheduling server + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + scheCfg := sche.GetPersistConfig().GetScheduleConfig().Clone() + scheCfg.LeaderScheduleLimit = 233 + sche.GetPersistConfig().SetScheduleConfig(scheCfg) + repCfg := sche.GetPersistConfig().GetReplicationConfig().Clone() + repCfg.MaxReplicas = 7 + sche.GetPersistConfig().SetReplicationConfig(repCfg) + re.Equal(uint64(233), sche.GetPersistConfig().GetLeaderScheduleLimit()) + re.Equal(7, sche.GetPersistConfig().GetMaxReplicas()) + } + // show config from api server rather than scheduling server + testConfig() + // show all config from api server rather than scheduling server + testConfig("all") + // show replication config from api server rather than scheduling server + testConfig("replication") + // show schedule config from api server rather than scheduling server + testConfig("schedule") + + // Test Rule + // inject different rule to scheduling server + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + ruleManager := sche.GetCluster().GetRuleManager() + ruleManager.SetAllGroupBundles([]placement.GroupBundle{{ + ID: placement.DefaultGroupID, + Index: 233, + Override: true, + Rules: []*placement.Rule{ + { + GroupID: placement.DefaultGroupID, + ID: "test", + Index: 100, + Role: placement.Voter, + Count: 5, + }, + { + GroupID: placement.DefaultGroupID, + ID: "pd", + Index: 101, + Role: placement.Voter, + Count: 3, + }, + }, + }}, true) + re.Len(ruleManager.GetAllRules(), 2) + } + + // show placement rules + testRules("show") + // load placement rules + testRules("load", "--out="+fname) + // show placement rules group + testRules("rule-group", "show", placement.DefaultGroupID) + // show placement rules group bundle + testRules("rule-bundle", "get", placement.DefaultGroupID) + // load placement rules bundle + testRules("rule-bundle", "load", "--out="+fname) +} + +func (suite *configTestSuite) TestPlacementRules() { + suite.env.RunTestBasedOnMode(suite.checkPlacementRules) +} + +func (suite *configTestSuite) checkPlacementRules(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + store := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + } + pdTests.MustPutStore(re, cluster, store) + + output, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "enable") + re.NoError(err) + re.Contains(string(output), "Success!") + + // test show + checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) + + f, _ := os.CreateTemp("", "pd_tests") + fname := f.Name() + f.Close() + defer os.RemoveAll(fname) + + // test load + rules := checkLoadRule(re, pdAddr, fname, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}) + + // test save + rules = append(rules, placement.Rule{ + GroupID: placement.DefaultGroupID, + ID: "test1", + Role: placement.Voter, + Count: 1, + }, placement.Rule{ + GroupID: "test-group", + ID: "test2", + Role: placement.Voter, + Count: 2, + }) + b, _ := json.Marshal(rules) + os.WriteFile(fname, b, 0600) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) + re.NoError(err) + + // test show group + checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}, {placement.DefaultGroupID, "test1"}}, "--group=pd") + + // test rule region detail + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) + checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, placement.DefaultRuleID}}, "--region=1", "--detail") + + // test delete + // need clear up args, so create new a cobra.Command. Otherwise gourp still exists. + rules[0].Count = 0 + b, _ = json.Marshal(rules) + os.WriteFile(fname, b, 0600) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) + re.NoError(err) + checkShowRuleKey(re, pdAddr, [][2]string{{placement.DefaultGroupID, "test1"}}, "--group=pd") +} + +func (suite *configTestSuite) TestPlacementRuleGroups() { + suite.env.RunTestBasedOnMode(suite.checkPlacementRuleGroups) +} + +func (suite *configTestSuite) checkPlacementRuleGroups(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + store := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + } + pdTests.MustPutStore(re, cluster, store) + output, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "enable") + re.NoError(err) + re.Contains(string(output), "Success!") + + // test show + var group placement.RuleGroup + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", placement.DefaultGroupID) + re.NoError(err) + return !strings.Contains(string(output), "404") + }) + re.NoError(json.Unmarshal(output, &group), string(output)) + re.Equal(placement.RuleGroup{ID: placement.DefaultGroupID}, group) + + // test set + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", placement.DefaultGroupID, "42", "true") + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", "group2", "100", "false") + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "set", "group3", "200", "false") + re.NoError(err) + re.Contains(string(output), "Success!") + + // show all + var groups []placement.RuleGroup + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show") + re.NoError(err) + re.NoError(json.Unmarshal(output, &groups)) + return reflect.DeepEqual([]placement.RuleGroup{ + {ID: placement.DefaultGroupID, Index: 42, Override: true}, + {ID: "group2", Index: 100, Override: false}, + {ID: "group3", Index: 200, Override: false}, + }, groups) + }) + + // delete + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "delete", "group2") + re.NoError(err) + re.Contains(string(output), "Delete group and rules successfully.") + + // show again + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group2") + re.NoError(err) + return strings.Contains(string(output), "404") + }) + + // delete using regex + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "delete", "--regexp", ".*3") + re.NoError(err) + + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-group", "show", "group3") + re.NoError(err) + return strings.Contains(string(output), "404") + }) +} + +func (suite *configTestSuite) TestPlacementRuleBundle() { + suite.env.RunTestBasedOnMode(suite.checkPlacementRuleBundle) +} + +func (suite *configTestSuite) checkPlacementRuleBundle(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + store := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + } + pdTests.MustPutStore(re, cluster, store) + + output, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "enable") + re.NoError(err) + re.Contains(string(output), "Success!") + + // test get + var bundle placement.GroupBundle + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "get", placement.DefaultGroupID) + re.NoError(err) + re.NoError(json.Unmarshal(output, &bundle)) + re.Equal(placement.GroupBundle{ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, bundle) + + f, err := os.CreateTemp("", "pd_tests") + re.NoError(err) + fname := f.Name() + f.Close() + defer os.RemoveAll(fname) + + // test load + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + {ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) + + // test set + bundle.ID = "pe" + bundle.Rules[0].GroupID = "pe" + b, err := json.Marshal(bundle) + re.NoError(err) + re.NoError(os.WriteFile(fname, b, 0600)) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) + re.NoError(err) + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + {ID: placement.DefaultGroupID, Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: placement.DefaultGroupID, ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) + + // test delete + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", placement.DefaultGroupID) + re.NoError(err) + + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) + + // test delete regexp + bundle.ID = "pf" + bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}} + b, err = json.Marshal(bundle) + re.NoError(err) + re.NoError(os.WriteFile(fname, b, 0600)) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "set", "--in="+fname) + re.NoError(err) + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) + + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "--regexp", ".*f") + re.NoError(err) + + bundles := []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + } + checkLoadRuleBundle(re, pdAddr, fname, bundles) + + // test save + bundle.Rules = []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}} + bundles = append(bundles, bundle) + b, err = json.Marshal(bundles) + re.NoError(err) + re.NoError(os.WriteFile(fname, b, 0600)) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname) + re.NoError(err) + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + {ID: "pe", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pe", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) + + // partial update, so still one group is left, no error + bundles = []placement.GroupBundle{{ID: "pe", Rules: []*placement.Rule{}}} + b, err = json.Marshal(bundles) + re.NoError(err) + re.NoError(os.WriteFile(fname, b, 0600)) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname, "--partial") + re.NoError(err) + + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + {ID: "pf", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pf", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) + + // set default rule only + bundles = []placement.GroupBundle{{ + ID: "pd", + Rules: []*placement.Rule{ + {GroupID: "pd", ID: "default", Role: "voter", Count: 3}, + }, + }} + b, err = json.Marshal(bundles) + re.NoError(err) + re.NoError(os.WriteFile(fname, b, 0600)) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "save", "--in="+fname) + re.NoError(err) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "delete", "--regexp", ".*f") + re.NoError(err) + + checkLoadRuleBundle(re, pdAddr, fname, []placement.GroupBundle{ + {ID: "pd", Index: 0, Override: false, Rules: []*placement.Rule{{GroupID: "pd", ID: placement.DefaultRuleID, Role: placement.Voter, Count: 3}}}, + }) +} + +func checkLoadRuleBundle(re *require.Assertions, pdAddr string, fname string, expectValues []placement.GroupBundle) { + var bundles []placement.GroupBundle + cmd := ctl.GetRootCmd() + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + _, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "rule-bundle", "load", "--out="+fname) + re.NoError(err) + b, _ := os.ReadFile(fname) + re.NoError(json.Unmarshal(b, &bundles)) + return len(bundles) == len(expectValues) + }) + assertBundles(re, bundles, expectValues) +} + +func checkLoadRule(re *require.Assertions, pdAddr string, fname string, expectValues [][2]string) []placement.Rule { + var rules []placement.Rule + cmd := ctl.GetRootCmd() + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + _, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "load", "--out="+fname) + re.NoError(err) + b, _ := os.ReadFile(fname) + re.NoError(json.Unmarshal(b, &rules)) + return len(rules) == len(expectValues) + }) + for i, v := range expectValues { + re.Equal(v, rules[i].Key()) + } + return rules +} + +func checkShowRuleKey(re *require.Assertions, pdAddr string, expectValues [][2]string, opts ...string) { + var ( + rules []placement.Rule + fit placement.RegionFit + ) + cmd := ctl.GetRootCmd() + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + args := []string{"-u", pdAddr, "config", "placement-rules", "show"} + output, err := tests.ExecuteCommand(cmd, append(args, opts...)...) + re.NoError(err) + err = json.Unmarshal(output, &rules) + if err == nil { + return len(rules) == len(expectValues) + } + re.NoError(json.Unmarshal(output, &fit)) + return len(fit.RuleFits) != 0 + }) + if len(rules) != 0 { + for i, v := range expectValues { + re.Equal(v, rules[i].Key()) + } + } + if len(fit.RuleFits) != 0 { + for i, v := range expectValues { + re.Equal(v, fit.RuleFits[i].Rule.Key()) + } + } +} + +func TestReplicationMode(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := pdTests.NewTestCluster(ctx, 1) + re.NoError(err) + defer cluster.Destroy() + err = cluster.RunInitialServers() + re.NoError(err) + re.NotEmpty(cluster.WaitLeader()) + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + store := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + } + leaderServer := cluster.GetLeaderServer() + re.NoError(leaderServer.BootstrapCluster()) + pdTests.MustPutStore(re, cluster, store) + + conf := config.ReplicationModeConfig{ + ReplicationMode: "majority", + DRAutoSync: config.DRAutoSyncReplicationConfig{ + WaitStoreTimeout: typeutil.NewDuration(time.Minute), + }, + } + check := func() { + output, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "show", "replication-mode") + re.NoError(err) + var conf2 config.ReplicationModeConfig + re.NoError(json.Unmarshal(output, &conf2)) + re.Equal(conf, conf2) + } + + check() + + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "replication-mode", "dr-auto-sync") + re.NoError(err) + conf.ReplicationMode = "dr-auto-sync" + check() + + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "replication-mode", "dr-auto-sync", "label-key", "foobar") + re.NoError(err) + conf.DRAutoSync.LabelKey = "foobar" + check() + + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "replication-mode", "dr-auto-sync", "primary-replicas", "5") + re.NoError(err) + conf.DRAutoSync.PrimaryReplicas = 5 + check() + + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "replication-mode", "dr-auto-sync", "wait-store-timeout", "10m") + re.NoError(err) + conf.DRAutoSync.WaitStoreTimeout = typeutil.NewDuration(time.Minute * 10) + check() +} + +func (suite *configTestSuite) TestUpdateDefaultReplicaConfig() { + suite.env.RunTestBasedOnMode(suite.checkUpdateDefaultReplicaConfig) +} + +func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + store := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + } + pdTests.MustPutStore(re, cluster, store) + checkMaxReplicas := func(expect uint64) { + args := []string{"-u", pdAddr, "config", "show", "replication"} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + replicationCfg := sc.ReplicationConfig{} + re.NoError(json.Unmarshal(output, &replicationCfg)) + return replicationCfg.MaxReplicas == expect + }) + } + + checkLocationLabels := func(expect int) { + args := []string{"-u", pdAddr, "config", "show", "replication"} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + replicationCfg := sc.ReplicationConfig{} + re.NoError(json.Unmarshal(output, &replicationCfg)) + return len(replicationCfg.LocationLabels) == expect + }) + } + + checkIsolationLevel := func(expect string) { + args := []string{"-u", pdAddr, "config", "show", "replication"} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + replicationCfg := sc.ReplicationConfig{} + re.NoError(json.Unmarshal(output, &replicationCfg)) + return replicationCfg.IsolationLevel == expect + }) + } + + checkRuleCount := func(expect int) { + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + rule := placement.Rule{} + re.NoError(json.Unmarshal(output, &rule)) + return rule.Count == expect + }) + } + + checkRuleLocationLabels := func(expect int) { + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + rule := placement.Rule{} + re.NoError(json.Unmarshal(output, &rule)) + return len(rule.LocationLabels) == expect + }) + } + + checkRuleIsolationLevel := func(expect string) { + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--group", placement.DefaultGroupID, "--id", placement.DefaultRuleID} + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + rule := placement.Rule{} + re.NoError(json.Unmarshal(output, &rule)) + return rule.IsolationLevel == expect + }) + } + + // update successfully when placement rules is not enabled. + output, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "max-replicas", "2") + re.NoError(err) + re.Contains(string(output), "Success!") + checkMaxReplicas(2) + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "location-labels", "zone,host") + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "isolation-level", "zone") + re.NoError(err) + re.Contains(string(output), "Success!") + checkLocationLabels(2) + checkRuleLocationLabels(2) + checkIsolationLevel("zone") + checkRuleIsolationLevel("zone") + + // update successfully when only one default rule exists. + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "enable") + re.NoError(err) + re.Contains(string(output), "Success!") + + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "max-replicas", "3") + re.NoError(err) + re.Contains(string(output), "Success!") + checkMaxReplicas(3) + checkRuleCount(3) + + // We need to change isolation first because we will validate + // if the location label contains the isolation level when setting location labels. + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "isolation-level", "host") + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "location-labels", "host") + re.NoError(err) + re.Contains(string(output), "Success!") + checkLocationLabels(1) + checkRuleLocationLabels(1) + checkIsolationLevel("host") + checkRuleIsolationLevel("host") + + // update unsuccessfully when many rule exists. + fname := suite.T().TempDir() + rules := []placement.Rule{ + { + GroupID: placement.DefaultGroupID, + ID: "test1", + Role: "voter", + Count: 1, + }, + } + b, err := json.Marshal(rules) + re.NoError(err) + os.WriteFile(fname, b, 0600) + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "placement-rules", "save", "--in="+fname) + re.NoError(err) + checkMaxReplicas(3) + checkRuleCount(3) + + _, err = tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "set", "max-replicas", "4") + re.NoError(err) + checkMaxReplicas(4) + checkRuleCount(4) + checkLocationLabels(1) + checkRuleLocationLabels(1) + checkIsolationLevel("host") + checkRuleIsolationLevel("host") +} + +func (suite *configTestSuite) TestPDServerConfig() { + suite.env.RunTestBasedOnMode(suite.checkPDServerConfig) +} + +func (suite *configTestSuite) checkPDServerConfig(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + store := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + } + pdTests.MustPutStore(re, cluster, store) + + output, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "show", "server") + re.NoError(err) + var conf config.PDServerConfig + re.NoError(json.Unmarshal(output, &conf)) + + re.True(conf.UseRegionStorage) + re.Equal(24*time.Hour, conf.MaxResetTSGap.Duration) + re.Equal("table", conf.KeyType) + re.Equal(typeutil.StringSlice([]string{}), conf.RuntimeServices) + re.Equal("", conf.MetricStorage) + if conf.DashboardAddress != "auto" { // dashboard has been assigned + re.Equal(leaderServer.GetAddr(), conf.DashboardAddress) + } + re.Equal(int(3), conf.FlowRoundByDigit) +} + +func (suite *configTestSuite) TestMicroServiceConfig() { + suite.env.RunTestBasedOnMode(suite.checkMicroServiceConfig) +} + +func (suite *configTestSuite) checkMicroServiceConfig(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + store := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + } + pdTests.MustPutStore(re, cluster, store) + svr := leaderServer.GetServer() + output, err := tests.ExecuteCommand(cmd, "-u", pdAddr, "config", "show", "all") + re.NoError(err) + cfg := config.Config{} + re.NoError(json.Unmarshal(output, &cfg)) + re.True(svr.GetMicroServiceConfig().EnableSchedulingFallback) + re.True(cfg.MicroService.EnableSchedulingFallback) + // config set enable-scheduling-fallback + args := []string{"-u", pdAddr, "config", "set", "enable-scheduling-fallback", "false"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.False(svr.GetMicroServiceConfig().EnableSchedulingFallback) +} + +func (suite *configTestSuite) TestRegionRules() { + suite.env.RunTestBasedOnMode(suite.checkRegionRules) +} + +func (suite *configTestSuite) checkRegionRules(cluster *pdTests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() + cmd := ctl.GetRootCmd() + + storeID, regionID := uint64(1), uint64(2) + store := &metapb.Store{ + Id: storeID, + State: metapb.StoreState_Up, + } + pdTests.MustPutStore(re, cluster, store) + pdTests.MustPutRegion(re, cluster, regionID, storeID, []byte{}, []byte{}) + + args := []string{"-u", pdAddr, "config", "placement-rules", "show", "--region=" + strconv.Itoa(int(regionID)), "--detail"} + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + fit := &placement.RegionFit{} + re.NoError(json.Unmarshal(output, fit)) + re.Len(fit.RuleFits, 1) + re.Equal(placement.DefaultGroupID, fit.RuleFits[0].Rule.GroupID) + re.Equal(placement.DefaultRuleID, fit.RuleFits[0].Rule.ID) +} + +func assertBundles(re *require.Assertions, a, b []placement.GroupBundle) { + re.Len(b, len(a)) + for i := 0; i < len(a); i++ { + assertBundle(re, a[i], b[i]) + } +} + +func assertBundle(re *require.Assertions, a, b placement.GroupBundle) { + re.Equal(a.ID, b.ID) + re.Equal(a.Index, b.Index) + re.Equal(a.Override, b.Override) + re.Len(b.Rules, len(a.Rules)) + for i := 0; i < len(a.Rules); i++ { + assertRule(re, a.Rules[i], b.Rules[i]) + } +} + +func assertRule(re *require.Assertions, a, b *placement.Rule) { + re.Equal(a.GroupID, b.GroupID) + re.Equal(a.ID, b.ID) + re.Equal(a.Index, b.Index) + re.Equal(a.Override, b.Override) + re.Equal(a.StartKey, b.StartKey) + re.Equal(a.EndKey, b.EndKey) + re.Equal(a.Role, b.Role) + re.Equal(a.Count, b.Count) + re.Equal(a.LabelConstraints, b.LabelConstraints) + re.Equal(a.LocationLabels, b.LocationLabels) + re.Equal(a.IsolationLevel, b.IsolationLevel) +} diff --git a/tools/pd-ctl/tests/hot/hot_test.go b/tools/pd-ctl/tests/hot/hot_test.go new file mode 100644 index 00000000000..641bab686a4 --- /dev/null +++ b/tools/pd-ctl/tests/hot/hot_test.go @@ -0,0 +1,593 @@ +// Copyright 2019 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hot_test + +import ( + "context" + "encoding/json" + "strconv" + "testing" + "time" + + "github.com/docker/go-units" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/schedule/handler" + "github.com/tikv/pd/pkg/statistics" + "github.com/tikv/pd/pkg/statistics/utils" + "github.com/tikv/pd/pkg/storage" + "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/utils/typeutil" + "github.com/tikv/pd/server" + "github.com/tikv/pd/server/config" + pdTests "github.com/tikv/pd/tests" + ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" + "github.com/tikv/pd/tools/pd-ctl/tests" +) + +type hotTestSuite struct { + suite.Suite + env *pdTests.SchedulingTestEnvironment +} + +func TestHotTestSuite(t *testing.T) { + suite.Run(t, new(hotTestSuite)) +} + +func (suite *hotTestSuite) SetupSuite() { + suite.env = pdTests.NewSchedulingTestEnvironment(suite.T(), + func(conf *config.Config, _ string) { + conf.Schedule.MaxStoreDownTime.Duration = time.Hour + conf.Schedule.HotRegionCacheHitsThreshold = 0 + }, + ) +} + +func (suite *hotTestSuite) TearDownSuite() { + suite.env.Cleanup() +} + +func (suite *hotTestSuite) TearDownTest() { + cleanFunc := func(cluster *pdTests.TestCluster) { + leader := cluster.GetLeaderServer() + hotStat := leader.GetRaftCluster().GetHotStat() + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + hotStat = sche.GetCluster().GetHotStat() + } + hotStat.HotCache.CleanCache() + } + suite.env.RunTestBasedOnMode(cleanFunc) +} + +func (suite *hotTestSuite) TestHot() { + suite.env.RunTestBasedOnMode(suite.checkHot) +} + +func (suite *hotTestSuite) checkHot(cluster *pdTests.TestCluster) { + re := suite.Require() + statistics.Denoising = false + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + store1 := &metapb.Store{ + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + } + store2 := &metapb.Store{ + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + Labels: []*metapb.StoreLabel{{Key: "engine", Value: "tiflash"}}, + } + + pdTests.MustPutStore(re, cluster, store1) + pdTests.MustPutStore(re, cluster, store2) + + // test hot store + leaderServer := cluster.GetLeaderServer() + ss := leaderServer.GetStore(1) + now := time.Now().Unix() + + newStats := typeutil.DeepClone(ss.GetStoreStats(), core.StoreStatsFactory) + bytesWritten := uint64(8 * units.MiB) + bytesRead := uint64(16 * units.MiB) + keysWritten := uint64(2000) + keysRead := uint64(4000) + newStats.BytesWritten = bytesWritten + newStats.BytesRead = bytesRead + newStats.KeysWritten = keysWritten + newStats.KeysRead = keysRead + + rc := leaderServer.GetRaftCluster() + stats := rc.GetStoresStats() + hotStat := rc.GetHotStat() + getHotPeerStat := rc.GetHotPeerStat + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + stats = sche.GetCluster().GetStoresStats() + hotStat = sche.GetCluster().GetHotStat() + getHotPeerStat = sche.GetCluster().GetHotPeerStat + } + + for i := utils.DefaultWriteMfSize; i > 0; i-- { + start := uint64(now - utils.StoreHeartBeatReportInterval*int64(i)) + end := start + utils.StoreHeartBeatReportInterval + newStats.Interval = &pdpb.TimeInterval{StartTimestamp: start, EndTimestamp: end} + stats.Observe(ss.GetID(), newStats) + } + + for i := statistics.RegionsStatsRollingWindowsSize; i > 0; i-- { + stats.ObserveRegionsStats([]uint64{2}, + []float64{float64(bytesWritten)}, + []float64{float64(keysWritten)}) + } + + args := []string{"-u", pdAddr, "hot", "store"} + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + hotStores := handler.HotStoreStats{} + re.NoError(json.Unmarshal(output, &hotStores)) + re.Equal(float64(bytesWritten)/utils.StoreHeartBeatReportInterval, hotStores.BytesWriteStats[1]) + re.Equal(float64(bytesRead)/utils.StoreHeartBeatReportInterval, hotStores.BytesReadStats[1]) + re.Equal(float64(keysWritten)/utils.StoreHeartBeatReportInterval, hotStores.KeysWriteStats[1]) + re.Equal(float64(keysRead)/utils.StoreHeartBeatReportInterval, hotStores.KeysReadStats[1]) + re.Equal(float64(bytesWritten), hotStores.BytesWriteStats[2]) + re.Equal(float64(keysWritten), hotStores.KeysWriteStats[2]) + + // test hot region + args = []string{"-u", pdAddr, "config", "set", "hot-region-cache-hits-threshold", "0"} + _, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + + hotStoreID := store1.Id + count := 0 + testHot := func(hotRegionID, hotStoreID uint64, hotType string) { + args = []string{"-u", pdAddr, "hot", hotType} + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + hotRegion := statistics.StoreHotPeersInfos{} + re.NoError(json.Unmarshal(output, &hotRegion)) + re.Contains(hotRegion.AsLeader, hotStoreID) + re.Equal(count, hotRegion.AsLeader[hotStoreID].Count) + if count > 0 { + re.Equal(hotRegionID, hotRegion.AsLeader[hotStoreID].Stats[count-1].RegionID) + } + } + + regionIDCounter := uint64(1) + testCommand := func(reportIntervals []uint64, hotType string) { + for _, reportInterval := range reportIntervals { + hotRegionID := regionIDCounter + regionIDCounter++ + switch hotType { + case "read": + loads := []float64{ + utils.RegionReadBytes: float64(1000000000 * reportInterval), + utils.RegionReadKeys: float64(1000000000 * reportInterval), + utils.RegionReadQueryNum: float64(1000000000 * reportInterval), + utils.RegionWriteBytes: 0, + utils.RegionWriteKeys: 0, + utils.RegionWriteQueryNum: 0, + } + leader := &metapb.Peer{ + Id: 100 + regionIDCounter, + StoreId: hotStoreID, + } + region := core.NewRegionInfo(&metapb.Region{ + Id: hotRegionID, + }, leader) + checkReadPeerTask := func(cache *statistics.HotPeerCache) { + stats := cache.CheckPeerFlow(region, []*metapb.Peer{leader}, loads, reportInterval) + for _, stat := range stats { + cache.UpdateStat(stat) + } + } + hotStat.CheckReadAsync(checkReadPeerTask) + testutil.Eventually(re, func() bool { + hotPeerStat := getHotPeerStat(utils.Read, hotRegionID, hotStoreID) + return hotPeerStat != nil + }) + if reportInterval >= utils.StoreHeartBeatReportInterval { + count++ + } + testHot(hotRegionID, hotStoreID, "read") + case "write": + pdTests.MustPutRegion( + re, cluster, + hotRegionID, hotStoreID, + []byte("c"), []byte("d"), + core.SetWrittenBytes(1000000000*reportInterval), core.SetReportInterval(0, reportInterval)) + testutil.Eventually(re, func() bool { + hotPeerStat := getHotPeerStat(utils.Write, hotRegionID, hotStoreID) + return hotPeerStat != nil + }) + if reportInterval >= utils.RegionHeartBeatReportInterval { + count++ + } + testHot(hotRegionID, hotStoreID, "write") + } + } + } + reportIntervals := []uint64{ + statistics.HotRegionReportMinInterval, + statistics.HotRegionReportMinInterval + 1, + utils.RegionHeartBeatReportInterval, + utils.RegionHeartBeatReportInterval + 1, + utils.RegionHeartBeatReportInterval * 2, + utils.RegionHeartBeatReportInterval*2 + 1, + } + testCommand(reportIntervals, "write") + count = 0 + reportIntervals = []uint64{ + statistics.HotRegionReportMinInterval, + statistics.HotRegionReportMinInterval + 1, + utils.StoreHeartBeatReportInterval, + utils.StoreHeartBeatReportInterval + 1, + utils.StoreHeartBeatReportInterval * 2, + utils.StoreHeartBeatReportInterval*2 + 1, + } + testCommand(reportIntervals, "read") +} + +func (suite *hotTestSuite) TestHotWithStoreID() { + suite.env.RunTestBasedOnMode(suite.checkHotWithStoreID) +} + +func (suite *hotTestSuite) checkHotWithStoreID(cluster *pdTests.TestCluster) { + re := suite.Require() + statistics.Denoising = false + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + leaderServer := cluster.GetLeaderServer() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + s := &server.GrpcServer{Server: leaderServer.GetServer()} + for _, store := range stores { + resp1, err := s.StoreHeartbeat( + context.Background(), &pdpb.StoreHeartbeatRequest{ + Header: &pdpb.RequestHeader{ClusterId: leaderServer.GetClusterID()}, + Stats: &pdpb.StoreStats{ + StoreId: store.Id, + Capacity: 1000 * units.MiB, + Available: 1000 * units.MiB, + }, + }, + ) + re.NoError(err) + re.Empty(resp1.GetHeader().GetError()) + } + + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(0, utils.RegionHeartBeatReportInterval)) + pdTests.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), core.SetReportInterval(0, utils.RegionHeartBeatReportInterval)) + pdTests.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f"), core.SetWrittenBytes(9000000000), core.SetReportInterval(0, utils.RegionHeartBeatReportInterval)) + + getHotPeerStat := leaderServer.GetRaftCluster().GetHotPeerStat + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + getHotPeerStat = sche.GetCluster().GetHotPeerStat + } + + testutil.Eventually(re, func() bool { + return getHotPeerStat(utils.Write, 1, 1) != nil && + getHotPeerStat(utils.Write, 2, 2) != nil && + getHotPeerStat(utils.Write, 3, 1) != nil + }) + args := []string{"-u", pdAddr, "hot", "write", "1"} + output, err := tests.ExecuteCommand(cmd, args...) + hotRegion := statistics.StoreHotPeersInfos{} + re.NoError(err) + re.NoError(json.Unmarshal(output, &hotRegion)) + re.Len(hotRegion.AsLeader, 1) + re.Equal(2, hotRegion.AsLeader[1].Count) + re.Equal(float64(200000000), hotRegion.AsLeader[1].TotalBytesRate) +} + +func (suite *hotTestSuite) TestHotWithoutHotPeer() { + suite.env.RunTestBasedOnMode(suite.checkHotWithoutHotPeer) +} + +func (suite *hotTestSuite) checkHotWithoutHotPeer(cluster *pdTests.TestCluster) { + re := suite.Require() + statistics.Denoising = false + + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + leaderServer := cluster.GetLeaderServer() + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + timestamp := uint64(time.Now().UnixNano()) + load := 1024.0 + s := &server.GrpcServer{Server: leaderServer.GetServer()} + for _, store := range stores { + for i := 0; i < 5; i++ { + resp1, err := s.StoreHeartbeat( + context.Background(), &pdpb.StoreHeartbeatRequest{ + Header: &pdpb.RequestHeader{ClusterId: leaderServer.GetClusterID()}, + Stats: &pdpb.StoreStats{ + StoreId: store.Id, + BytesRead: uint64(load * utils.StoreHeartBeatReportInterval), + KeysRead: uint64(load * utils.StoreHeartBeatReportInterval), + BytesWritten: uint64(load * utils.StoreHeartBeatReportInterval), + KeysWritten: uint64(load * utils.StoreHeartBeatReportInterval), + Capacity: 1000 * units.MiB, + Available: 1000 * units.MiB, + Interval: &pdpb.TimeInterval{ + StartTimestamp: timestamp + uint64(i*utils.StoreHeartBeatReportInterval), + EndTimestamp: timestamp + uint64((i+1)*utils.StoreHeartBeatReportInterval)}, + }, + }, + ) + re.NoError(err) + re.Empty(resp1.GetHeader().GetError()) + } + } + + { + args := []string{"-u", pdAddr, "hot", "read"} + output, err := tests.ExecuteCommand(cmd, args...) + hotRegion := statistics.StoreHotPeersInfos{} + re.NoError(err) + re.NoError(json.Unmarshal(output, &hotRegion)) + re.NotNil(hotRegion.AsPeer[1]) + re.Zero(hotRegion.AsPeer[1].Count) + re.Zero(hotRegion.AsPeer[1].TotalBytesRate) + re.Equal(load, hotRegion.AsPeer[1].StoreByteRate) + re.Zero(hotRegion.AsLeader[1].Count) + re.Zero(hotRegion.AsLeader[1].TotalBytesRate) + re.Equal(load, hotRegion.AsLeader[1].StoreByteRate) + } + { + args := []string{"-u", pdAddr, "hot", "write"} + output, err := tests.ExecuteCommand(cmd, args...) + hotRegion := statistics.StoreHotPeersInfos{} + re.NoError(err) + re.NoError(json.Unmarshal(output, &hotRegion)) + re.Zero(hotRegion.AsPeer[1].Count) + re.Zero(hotRegion.AsPeer[1].TotalBytesRate) + re.Equal(load, hotRegion.AsPeer[1].StoreByteRate) + re.Zero(hotRegion.AsLeader[1].Count) + re.Zero(hotRegion.AsLeader[1].TotalBytesRate) + re.Zero(hotRegion.AsLeader[1].StoreByteRate) // write leader sum + } +} + +func TestHistoryHotRegions(t *testing.T) { + // TODO: support history hotspot in scheduling server with stateless in the future. + // Ref: https://github.com/tikv/pd/pull/7183 + re := require.New(t) + statistics.Denoising = false + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := pdTests.NewTestCluster(ctx, 1, + func(cfg *config.Config, _ string) { + cfg.Schedule.HotRegionCacheHitsThreshold = 0 + cfg.Schedule.HotRegionsWriteInterval.Duration = 1000 * time.Millisecond + cfg.Schedule.HotRegionsReservedDays = 1 + }, + ) + re.NoError(err) + defer cluster.Destroy() + err = cluster.RunInitialServers() + re.NoError(err) + re.NotEmpty(cluster.WaitLeader()) + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + leaderServer := cluster.GetLeaderServer() + re.NoError(leaderServer.BootstrapCluster()) + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + defer cluster.Destroy() + startTime := time.Now().Unix() + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), + core.SetReportInterval(uint64(startTime-utils.RegionHeartBeatReportInterval), uint64(startTime))) + pdTests.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), + core.SetReportInterval(uint64(startTime-utils.RegionHeartBeatReportInterval), uint64(startTime))) + pdTests.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f"), core.SetWrittenBytes(9000000000), + core.SetReportInterval(uint64(startTime-utils.RegionHeartBeatReportInterval), uint64(startTime))) + pdTests.MustPutRegion(re, cluster, 4, 3, []byte("g"), []byte("h"), core.SetWrittenBytes(9000000000), + core.SetReportInterval(uint64(startTime-utils.RegionHeartBeatReportInterval), uint64(startTime))) + // wait hot scheduler starts + testutil.Eventually(re, func() bool { + hotRegionStorage := leaderServer.GetServer().GetHistoryHotRegionStorage() + iter := hotRegionStorage.NewIterator([]string{utils.Write.String()}, startTime*1000, time.Now().UnixNano()/int64(time.Millisecond)) + next, err := iter.Next() + return err == nil && next != nil + }) + endTime := time.Now().UnixNano() / int64(time.Millisecond) + start := strconv.FormatInt(startTime*1000, 10) + end := strconv.FormatInt(endTime, 10) + args := []string{"-u", pdAddr, "hot", "history", + start, end, + "hot_region_type", "write", + "region_id", "1,2", + "store_id", "1,4", + "is_learner", "false", + } + output, err := tests.ExecuteCommand(cmd, args...) + hotRegions := storage.HistoryHotRegions{} + re.NoError(err) + re.NoError(json.Unmarshal(output, &hotRegions)) + regions := hotRegions.HistoryHotRegion + re.Len(regions, 1) + re.Equal(uint64(1), regions[0].RegionID) + re.Equal(uint64(1), regions[0].StoreID) + re.Equal("write", regions[0].HotRegionType) + args = []string{"-u", pdAddr, "hot", "history", + start, end, + "hot_region_type", "write", + "region_id", "1,2", + "store_id", "1,2", + } + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.NoError(json.Unmarshal(output, &hotRegions)) + regions = hotRegions.HistoryHotRegion + re.Len(regions, 2) + isSort := regions[0].UpdateTime > regions[1].UpdateTime || regions[0].RegionID < regions[1].RegionID + re.True(isSort) + args = []string{"-u", pdAddr, "hot", "history", + start, end, + "hot_region_type", "read", + "is_leader", "false", + "peer_id", "12", + } + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.NoError(json.Unmarshal(output, &hotRegions)) + re.Empty(hotRegions.HistoryHotRegion) + args = []string{"-u", pdAddr, "hot", "history"} + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Error(json.Unmarshal(output, &hotRegions)) + args = []string{"-u", pdAddr, "hot", "history", + start, end, + "region_id", "dada", + } + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Error(json.Unmarshal(output, &hotRegions)) + args = []string{"-u", pdAddr, "hot", "history", + start, end, + "region_ids", "12323", + } + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Error(json.Unmarshal(output, &hotRegions)) +} + +func TestBuckets(t *testing.T) { + // TODO: support forward bucket request in scheduling server in the future. + re := require.New(t) + statistics.Denoising = false + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := pdTests.NewTestCluster(ctx, 1, func(cfg *config.Config, _ string) { cfg.Schedule.HotRegionCacheHitsThreshold = 0 }) + re.NoError(err) + defer cluster.Destroy() + err = cluster.RunInitialServers() + re.NoError(err) + re.NotEmpty(cluster.WaitLeader()) + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + leaderServer := cluster.GetLeaderServer() + re.NoError(leaderServer.BootstrapCluster()) + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + defer cluster.Destroy() + + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetWrittenBytes(3000000000), core.SetReportInterval(0, utils.RegionHeartBeatReportInterval)) + pdTests.MustPutRegion(re, cluster, 2, 2, []byte("c"), []byte("d"), core.SetWrittenBytes(6000000000), core.SetReportInterval(0, utils.RegionHeartBeatReportInterval)) + pdTests.MustPutRegion(re, cluster, 3, 1, []byte("e"), []byte("f"), core.SetWrittenBytes(9000000000), core.SetReportInterval(0, utils.RegionHeartBeatReportInterval)) + + stats := &metapb.BucketStats{ + ReadBytes: []uint64{10 * units.MiB}, + ReadKeys: []uint64{11 * units.MiB}, + ReadQps: []uint64{0}, + WriteKeys: []uint64{12 * units.MiB}, + WriteBytes: []uint64{13 * units.MiB}, + WriteQps: []uint64{0}, + } + buckets := pdTests.MustReportBuckets(re, cluster, 1, []byte("a"), []byte("b"), stats) + args := []string{"-u", pdAddr, "hot", "buckets", "1"} + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + hotBuckets := handler.HotBucketsResponse{} + re.NoError(json.Unmarshal(output, &hotBuckets)) + re.Len(hotBuckets, 1) + re.Len(hotBuckets[1], 1) + item := hotBuckets[1][0] + re.Equal(core.HexRegionKeyStr(buckets.GetKeys()[0]), item.StartKey) + re.Equal(core.HexRegionKeyStr(buckets.GetKeys()[1]), item.EndKey) + re.Equal(1, item.HotDegree) + interval := buckets.GetPeriodInMs() / 1000 + re.Equal(buckets.GetStats().ReadBytes[0]/interval, item.ReadBytes) + re.Equal(buckets.GetStats().ReadKeys[0]/interval, item.ReadKeys) + re.Equal(buckets.GetStats().WriteBytes[0]/interval, item.WriteBytes) + re.Equal(buckets.GetStats().WriteKeys[0]/interval, item.WriteKeys) + + args = []string{"-u", pdAddr, "hot", "buckets", "2"} + output, err = tests.ExecuteCommand(cmd, args...) + re.NoError(err) + hotBuckets = handler.HotBucketsResponse{} + re.NoError(json.Unmarshal(output, &hotBuckets)) + re.Nil(hotBuckets[2]) +} diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go new file mode 100644 index 00000000000..80ac1c7ceca --- /dev/null +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -0,0 +1,913 @@ +// Copyright 2019 TiKV Project Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package scheduler_test + +import ( + "encoding/json" + "fmt" + "reflect" + "strings" + "testing" + "time" + + "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/spf13/cobra" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" + sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/slice" + "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/versioninfo" + pdTests "github.com/tikv/pd/tests" + ctl "github.com/tikv/pd/tools/pd-ctl/pdctl" + "github.com/tikv/pd/tools/pd-ctl/tests" +) + +type schedulerTestSuite struct { + suite.Suite + env *pdTests.SchedulingTestEnvironment + defaultSchedulers []string +} + +func TestSchedulerTestSuite(t *testing.T) { + suite.Run(t, new(schedulerTestSuite)) +} + +func (suite *schedulerTestSuite) SetupSuite() { + re := suite.Require() + re.NoError(failpoint.Enable("github.com/tikv/pd/server/cluster/skipStoreConfigSync", `return(true)`)) + suite.defaultSchedulers = []string{ + "balance-leader-scheduler", + "balance-region-scheduler", + "balance-hot-region-scheduler", + "evict-slow-store-scheduler", + } +} + +func (suite *schedulerTestSuite) SetupTest() { + // use a new environment to avoid affecting other tests + suite.env = pdTests.NewSchedulingTestEnvironment(suite.T()) +} + +func (suite *schedulerTestSuite) TearDownSuite() { + re := suite.Require() + suite.env.Cleanup() + re.NoError(failpoint.Disable("github.com/tikv/pd/server/cluster/skipStoreConfigSync")) +} + +func (suite *schedulerTestSuite) TearDownTest() { + cleanFunc := func(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + var currentSchedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, ¤tSchedulers) + for _, scheduler := range suite.defaultSchedulers { + if slice.NoneOf(currentSchedulers, func(i int) bool { + return currentSchedulers[i] == scheduler + }) { + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", scheduler}, nil) + re.Contains(echo, "Success!") + } + } + for _, scheduler := range currentSchedulers { + if slice.NoneOf(suite.defaultSchedulers, func(i int) bool { + return suite.defaultSchedulers[i] == scheduler + }) { + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", scheduler}, nil) + re.Contains(echo, "Success!") + } + } + } + suite.env.RunTestBasedOnMode(cleanFunc) + suite.env.Cleanup() +} + +func (suite *schedulerTestSuite) TestScheduler() { + suite.env.RunTestBasedOnMode(suite.checkScheduler) +} + +func (suite *schedulerTestSuite) checkScheduler(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + + mustUsage := func(args []string) { + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + re.Contains(string(output), "Usage") + } + + checkSchedulerConfigCommand := func(expectedConfig map[string]any, schedulerName string) { + testutil.Eventually(re, func() bool { + configInfo := make(map[string]any) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName}, &configInfo) + return reflect.DeepEqual(expectedConfig["store-id-ranges"], configInfo["store-id-ranges"]) + }) + } + + leaderServer := cluster.GetLeaderServer() + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + // note: because pdqsort is a unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + + // scheduler show command + expected := map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, nil, expected) + + // scheduler delete command + args := []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + + // avoid the influence of the scheduler order + schedulers := []string{"evict-leader-scheduler", "grant-leader-scheduler", "evict-leader-scheduler", "grant-leader-scheduler"} + + checkStorePause := func(changedStores []uint64, schedulerName string) { + status := func() string { + switch schedulerName { + case "evict-leader-scheduler": + return "paused" + case "grant-leader-scheduler": + return "resumed" + default: + re.Fail(fmt.Sprintf("unknown scheduler %s", schedulerName)) + return "" + } + }() + for _, store := range stores { + isStorePaused := !cluster.GetLeaderServer().GetRaftCluster().GetStore(store.GetId()).AllowLeaderTransfer() + if slice.AnyOf(changedStores, func(i int) bool { + return store.GetId() == changedStores[i] + }) { + re.True(isStorePaused, + fmt.Sprintf("store %d should be %s with %s", store.GetId(), status, schedulerName)) + } else { + re.False(isStorePaused, + fmt.Sprintf("store %d should not be %s with %s", store.GetId(), status, schedulerName)) + } + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + re.Equal(isStorePaused, !sche.GetCluster().GetStore(store.GetId()).AllowLeaderTransfer()) + } + } + } + + for idx := range schedulers { + checkStorePause([]uint64{}, schedulers[idx]) + + // will fail because the scheduler is not existed + args = []string{"-u", pdAddr, "scheduler", "config", schedulers[idx], "add-store", "3"} + output := mustExec(re, cmd, args, nil) + re.Contains(output, fmt.Sprintf("Unable to update config: scheduler %s does not exist.", schedulers[idx])) + + // scheduler add command + args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + + // scheduler config show command + expectedConfig := make(map[string]any) + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2}, schedulers[idx]) + + // scheduler config update command + args = []string{"-u", pdAddr, "scheduler", "config", schedulers[idx], "add-store", "3"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + + // check update success + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}, "3": []any{map[string]any{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2, 3}, schedulers[idx]) + + // scheduler delete command + args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx]} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + checkStorePause([]uint64{}, schedulers[idx]) + + // scheduler add command + args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "2"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + checkStorePause([]uint64{2}, schedulers[idx]) + + // scheduler add command twice + args = []string{"-u", pdAddr, "scheduler", "add", schedulers[idx], "4"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + + // check add success + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}, "4": []any{map[string]any{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2, 4}, schedulers[idx]) + + // scheduler remove command [old] + args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-4"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + schedulers[idx]: true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + + // check remove success + expectedConfig["store-id-ranges"] = map[string]any{"2": []any{map[string]any{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + checkStorePause([]uint64{2}, schedulers[idx]) + + // scheduler remove command, when remove the last store, it should remove whole scheduler + args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx] + "-2"} + expected = map[string]bool{ + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, args, expected) + checkStorePause([]uint64{}, schedulers[idx]) + } + + // test remove and add scheduler + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}, nil) + re.Equal("Success! The scheduler is created.\n", echo) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "2"}, nil) + re.Equal("Success! The scheduler has been applied to the store.\n", echo) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-2"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { // wait for removed scheduler to be synced to scheduling server. + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) + return strings.Contains(echo, "[404] scheduler not found") + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + re.Contains(echo, "Unable to update config: scheduler evict-leader-scheduler does not exist.") + + // test remove and add + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) + re.Contains(echo, "Success") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) + re.Contains(echo, "Success") + + // test show scheduler with paused and disabled status. + checkSchedulerWithStatusCommand := func(status string, expected []string) { + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) + return reflect.DeepEqual(expected, schedulers) + }) + } + + // test scatter range scheduler + for _, name := range []string{ + "test", "test#", "?test", + /* TODO: to handle case like "tes&t", we need to modify the server's JSON render to unescape the HTML characters */ + } { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "scatter-range-scheduler", "--format=raw", "a", "b", name}, nil) + re.Contains(echo, "Success!") + schedulerName := fmt.Sprintf("scatter-range-scheduler-%s", name) + // test show scheduler + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, schedulerName) + }) + // test remove scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, schedulerName) + }) + } + + mustUsage([]string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler"}) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + re.Contains(echo, "Success!") + checkSchedulerWithStatusCommand("paused", []string{ + "balance-leader-scheduler", + }) + result := make(map[string]any) + testutil.Eventually(re, func() bool { + mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", "balance-leader-scheduler"}, &result) + return len(result) != 0 && result["status"] == "paused" && result["summary"] == "" + }, testutil.WithWaitFor(30*time.Second)) + + mustUsage([]string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler", "60"}) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerWithStatusCommand("paused", []string{}) + + // set label scheduler to disabled manually. + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "label-scheduler"}, nil) + re.Contains(echo, "Success!") + cfg := leaderServer.GetServer().GetScheduleConfig() + origin := cfg.Schedulers + cfg.Schedulers = sc.SchedulerConfigs{{Type: "label", Disable: true}} + err := leaderServer.GetServer().SetScheduleConfig(*cfg) + re.NoError(err) + checkSchedulerWithStatusCommand("disabled", []string{"label-scheduler"}) + // reset Schedulers in ScheduleConfig + cfg.Schedulers = origin + err = leaderServer.GetServer().SetScheduleConfig(*cfg) + re.NoError(err) + checkSchedulerWithStatusCommand("disabled", []string{}) +} + +func (suite *schedulerTestSuite) TestSchedulerConfig() { + suite.env.RunTestBasedOnMode(suite.checkSchedulerConfig) +} + +func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + + // test evict-slow-store && evict-slow-trend schedulers config + evictSlownessSchedulers := []string{"evict-slow-store-scheduler", "evict-slow-trend-scheduler"} + for _, schedulerName := range evictSlownessSchedulers { + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", schedulerName}, nil) + if strings.Contains(echo, "Success!") { + re.Contains(echo, "Success!") + } else { + re.Contains(echo, "scheduler existed") + } + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, schedulerName) + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "set", "recovery-duration", "100"}, nil) + re.Contains(echo, "Success! Config updated.") + conf := make(map[string]any) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) + return conf["recovery-duration"] == 100. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, schedulerName) + }) + } + // test shuffle region config + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "add", "shuffle-region-scheduler"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "shuffle-region-scheduler": true, + }) + var roles []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) + re.Equal([]string{"leader", "follower", "learner"}, roles) + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "set-roles", "learner"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler", "show-roles"}, &roles) + return reflect.DeepEqual([]string{"learner"}, roles) + }) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-region-scheduler"}, &roles) + re.Equal([]string{"learner"}, roles) + + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "shuffle-region-scheduler"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + }) + + // test grant hot region scheduler config + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "grant-hot-region-scheduler": true, + }) + var conf3 map[string]any + expected3 := map[string]any{ + "store-id": []any{float64(1), float64(2), float64(3)}, + "store-leader-id": float64(1), + } + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) + re.Equal(expected3, conf3) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil) + re.Contains(echo, "Success!") + expected3["store-leader-id"] = float64(2) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) + return reflect.DeepEqual(expected3, conf3) + }) + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "grant-hot-region-scheduler"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + }) + + // test shuffle hot region scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "shuffle-hot-region-scheduler"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, "shuffle-hot-region-scheduler") + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "set", "limit", "127"}, nil) + re.Contains(echo, "Success!") + conf := make(map[string]any) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "shuffle-hot-region-scheduler", "show"}, &conf) + return conf["limit"] == 127. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "shuffle-hot-region-scheduler"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, "shuffle-hot-region-scheduler") + }) + + // test evict leader scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, "evict-leader-scheduler") + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler", "set", "batch", "5"}, nil) + re.Contains(echo, "Success!") + conf = make(map[string]any) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, &conf) + return conf["batch"] == 5. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, "evict-leader-scheduler") + }) + + // test balance leader config + conf = make(map[string]any) + conf1 := make(map[string]any) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "show"}, &conf) + re.Equal(4., conf["batch"]) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "set", "batch", "3"}, nil) + re.Contains(echo, "Success!") + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) + return conf1["batch"] == 3. + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) + re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) + re.Contains(echo, "404") + re.Contains(echo, "PD:scheduler:ErrSchedulerNotFound]scheduler not found") + // The scheduling service need time to sync from PD. + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, nil) + return strings.Contains(echo, "404") && strings.Contains(echo, "scheduler not found") + }) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") +} + +func (suite *schedulerTestSuite) TestHotRegionSchedulerConfig() { + suite.env.RunTestBasedOnMode(suite.checkHotRegionSchedulerConfig) +} + +func (suite *schedulerTestSuite) checkHotRegionSchedulerConfig(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + leaderServer := cluster.GetLeaderServer() + // test hot region config + expected1 := map[string]any{ + "min-hot-byte-rate": float64(100), + "min-hot-key-rate": float64(10), + "min-hot-query-rate": float64(10), + "src-tolerance-ratio": 1.05, + "dst-tolerance-ratio": 1.05, + "read-priorities": []any{"byte", "key"}, + "write-leader-priorities": []any{"key", "byte"}, + "write-peer-priorities": []any{"byte", "key"}, + "strict-picking-store": "true", + "rank-formula-version": "v2", + "split-thresholds": 0.2, + "history-sample-duration": "5m0s", + "history-sample-interval": "30s", + } + checkHotSchedulerConfig := func(expect map[string]any) { + testutil.Eventually(re, func() bool { + var conf1 map[string]any + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + return reflect.DeepEqual(expect, conf1) + }) + } + // scheduler show command + expected := map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "balance-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + } + checkSchedulerCommand(re, cmd, pdAddr, nil, expected) + var conf map[string]any + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) + re.Equal(expected1, conf) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "show"}, &conf) + re.Equal(expected1, conf) + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "src-tolerance-ratio", "1.02"}, nil) + re.Contains(echo, "Success!") + expected1["src-tolerance-ratio"] = 1.02 + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "disabled", "true"}, nil) + re.Contains(echo, "Failed!") + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,key"}, nil) + re.Contains(echo, "Success!") + expected1["read-priorities"] = []any{"byte", "key"} + checkHotSchedulerConfig(expected1) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,byte"}, nil) + re.Contains(echo, "Success!") + expected1["read-priorities"] = []any{"key", "byte"} + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "foo,bar"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", ""}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,byte"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key,byte"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + + // write-priorities is divided into write-leader-priorities and write-peer-priorities + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-priorities", "key,byte"}, nil) + re.Contains(echo, "Failed!") + re.Contains(echo, "Config item is not found.") + checkHotSchedulerConfig(expected1) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v0"}, nil) + re.Contains(echo, "Failed!") + checkHotSchedulerConfig(expected1) + expected1["rank-formula-version"] = "v2" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v2"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + expected1["rank-formula-version"] = "v1" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v1"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["forbid-rw-type"] = "read" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "forbid-rw-type", "read"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-duration"] = "1m0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "1m"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-interval"] = "1s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "1s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-duration"] = "0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-duration", "0s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + expected1["history-sample-interval"] = "0s" + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "history-sample-interval", "0s"}, nil) + re.Contains(echo, "Success!") + checkHotSchedulerConfig(expected1) + + // test compatibility + re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) + for _, store := range stores { + version := versioninfo.HotScheduleWithQuery + store.Version = versioninfo.MinSupportedVersion(version).String() + store.LastHeartbeat = time.Now().UnixNano() + pdTests.MustPutStore(re, cluster, store) + } + re.Equal("5.2.0", leaderServer.GetClusterVersion().String()) + // After upgrading, we can use query. + expected1["write-leader-priorities"] = []any{"query", "byte"} + checkHotSchedulerConfig(expected1) + // cannot set qps as write-peer-priorities + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) + re.Contains(echo, "query is not allowed to be set in priorities for write-peer-priorities") + checkHotSchedulerConfig(expected1) +} + +func (suite *schedulerTestSuite) TestSchedulerDiagnostic() { + suite.env.RunTestBasedOnMode(suite.checkSchedulerDiagnostic) +} + +func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + checkSchedulerDescribeCommand := func(schedulerName, expectedStatus, expectedSummary string) { + result := make(map[string]any) + testutil.Eventually(re, func() bool { + mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", schedulerName}, &result) + return len(result) != 0 && expectedStatus == result["status"] && expectedSummary == result["summary"] + }, testutil.WithTickInterval(50*time.Millisecond)) + } + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + + echo := mustExec(re, cmd, []string{"-u", pdAddr, "config", "set", "enable-diagnostic", "true"}, nil) + re.Contains(echo, "Success!") + checkSchedulerDescribeCommand("balance-region-scheduler", "pending", "1 store(s) RegionNotMatchRule; ") + + // scheduler delete command + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerDescribeCommand("balance-region-scheduler", "disabled", "") + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) + re.Contains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) + re.Contains(echo, "Success!") + checkSchedulerDescribeCommand("balance-leader-scheduler", "normal", "") +} + +func (suite *schedulerTestSuite) TestEvictLeaderScheduler() { + suite.env.RunTestBasedOnMode(suite.checkEvictLeaderScheduler) +} + +func (suite *schedulerTestSuite) checkEvictLeaderScheduler(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) + output, err := tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "2"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + testutil.Eventually(re, func() bool { + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}...) + return err == nil && strings.Contains(string(output), "Success!") + }) + testutil.Eventually(re, func() bool { + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "show"}...) + return err == nil && !strings.Contains(string(output), "evict-leader-scheduler") + }) +} + +func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) string { + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + if v == nil { + return string(output) + } + re.NoError(json.Unmarshal(output, v), string(output)) + return "" +} + +func mightExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) { + output, err := tests.ExecuteCommand(cmd, args...) + re.NoError(err) + if v == nil { + return + } + json.Unmarshal(output, v) +} + +func checkSchedulerCommand(re *require.Assertions, cmd *cobra.Command, pdAddr string, args []string, expected map[string]bool) { + if args != nil { + echo := mustExec(re, cmd, args, nil) + re.Contains(echo, "Success!") + } + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) + if len(schedulers) != len(expected) { + return false + } + for _, scheduler := range schedulers { + if _, ok := expected[scheduler]; !ok { + return false + } + } + return true + }) +}