From 79076795f1019bf87388b55d3ec7126f25eed151 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 25 Sep 2024 16:58:23 +0800 Subject: [PATCH] scheduler: add conflict detect for grant hot leader scheduler (#4903) ref tikv/pd#4399 Signed-off-by: lhy1024 --- server/api/scheduler.go | 38 +++++ tools/pd-ctl/pdctl/command/scheduler.go | 7 +- .../pd-ctl/tests/scheduler/scheduler_test.go | 133 +++++++++++++----- 3 files changed, 139 insertions(+), 39 deletions(-) diff --git a/server/api/scheduler.go b/server/api/scheduler.go index c96e4c123de..3d69674db48 100644 --- a/server/api/scheduler.go +++ b/server/api/scheduler.go @@ -24,7 +24,9 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/utils/constant" "github.com/tikv/pd/pkg/schedule/types" + "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/server" "github.com/unrolled/render" @@ -143,6 +145,15 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques } collector(strconv.FormatUint(limit, 10)) case types.GrantHotRegionScheduler: + isExist, err := h.isSchedulerExist(types.BalanceHotRegionScheduler) + if err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + if isExist { + h.r.JSON(w, http.StatusBadRequest, "balance-hot-region-scheduler is running, please remove it first") + return + } leaderID, ok := input["store-leader-id"].(string) if !ok { h.r.JSON(w, http.StatusBadRequest, "missing leader id") @@ -155,6 +166,16 @@ func (h *schedulerHandler) CreateScheduler(w http.ResponseWriter, r *http.Reques } collector(leaderID) collector(peerIDs) + case types.BalanceHotRegionScheduler: + isExist, err := h.isSchedulerExist(types.GrantHotRegionScheduler) + if err != nil { + h.r.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + if isExist { + h.r.JSON(w, http.StatusBadRequest, "grant-hot-region-scheduler is running, please remove it first") + return + } } if err := h.AddScheduler(tp, args...); err != nil { @@ -246,6 +267,23 @@ func (h *schedulerHandler) PauseOrResumeScheduler(w http.ResponseWriter, r *http h.r.JSON(w, http.StatusOK, "Pause or resume the scheduler successfully.") } +func (h *schedulerHandler) isSchedulerExist(scheduler types.CheckerSchedulerType) (bool, error) { + rc, err := h.GetRaftCluster() + if err != nil { + return false, err + } + if rc.IsServiceIndependent(constant.SchedulingServiceName) { + handlers := rc.GetSchedulerHandlers() + _, ok := handlers[scheduler.String()] + return ok, nil + } + schedulers := rc.GetSchedulers() + if slice.Contains(schedulers, scheduler.String()) { + return !rc.GetSchedulerConfig().IsSchedulerDisabled(scheduler), nil + } + return false, nil +} + type schedulerConfigHandler struct { svr *server.Server rd *render.Render diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 9c38266fdb8..d03fdbbed94 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -355,9 +355,10 @@ func NewSplitBucketSchedulerCommand() *cobra.Command { // NewGrantHotRegionSchedulerCommand returns a command to add a grant-hot-region-scheduler. func NewGrantHotRegionSchedulerCommand() *cobra.Command { c := &cobra.Command{ - Use: "grant-hot-region-scheduler ", - Short: "add a scheduler to grant hot region to fixed stores", - Run: addSchedulerForGrantHotRegionCommandFunc, + Use: "grant-hot-region-scheduler ", + Short: `add a scheduler to grant hot region to fixed stores. + Note: If there is balance-hot-region-scheduler running, please remove it first, otherwise grant-hot-region-scheduler will not work.`, + Run: addSchedulerForGrantHotRegionCommandFunc, } return c } diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index 9de1195bed3..332fe79614e 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -490,34 +490,6 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust "balance-hot-region-scheduler": true, }) - // test grant hot region scheduler config - checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{ - "balance-region-scheduler": true, - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "grant-hot-region-scheduler": true, - }) - var conf3 map[string]any - expected3 := map[string]any{ - "store-id": []any{float64(1), float64(2), float64(3)}, - "store-leader-id": float64(1), - } - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) - re.Equal(expected3, conf3) - - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil) - re.Contains(echo, "Success!") - expected3["store-leader-id"] = float64(2) - testutil.Eventually(re, func() bool { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) - return reflect.DeepEqual(expected3, conf3) - }) - checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "grant-hot-region-scheduler"}, map[string]bool{ - "balance-region-scheduler": true, - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - }) - // test shuffle hot region scheduler echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "shuffle-hot-region-scheduler"}, nil) re.Contains(echo, "Success!") @@ -587,6 +559,103 @@ func (suite *schedulerTestSuite) checkSchedulerConfig(cluster *pdTests.TestClust re.Contains(echo, "Success!") } +func (suite *schedulerTestSuite) TestGrantLeaderScheduler() { + suite.env.RunTestBasedOnMode(suite.checkGrantLeaderScheduler) +} + +func (suite *schedulerTestSuite) checkGrantLeaderScheduler(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + // note: because pdqsort is an unstable sort algorithm, set ApproximateSize for this region. + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) + + suite.checkDefaultSchedulers(re, cmd, pdAddr) + + // case 1: add grant-hot-region-scheduler when balance-hot-region-scheduler is running + echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, nil) + re.Contains(echo, "balance-hot-region-scheduler is running, please remove it first") + + // case 2: add grant-hot-region-scheduler when balance-hot-region-scheduler is paused + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-hot-region-scheduler", "60"}, nil) + re.Contains(echo, "Success!") + suite.checkDefaultSchedulers(re, cmd, pdAddr) + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, nil) + re.Contains(echo, "balance-hot-region-scheduler is running, please remove it first") + + // case 3: add grant-hot-region-scheduler when balance-hot-region-scheduler is disabled + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "evict-slow-store-scheduler": true, + }) + + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "add", "grant-hot-region-scheduler", "1", "1,2,3"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "grant-hot-region-scheduler": true, + "evict-slow-store-scheduler": true, + }) + + // case 4: test grant-hot-region-scheduler config + var conf3 map[string]any + expected3 := map[string]any{ + "store-id": []any{float64(1), float64(2), float64(3)}, + "store-leader-id": float64(1), + } + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) + re.Equal(expected3, conf3) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil) + re.Contains(echo, "Success!") + expected3["store-leader-id"] = float64(2) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) + return reflect.DeepEqual(expected3, conf3) + }) + + // case 5: remove grant-hot-region-scheduler + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) + re.Contains(echo, "grant-hot-region-scheduler is running, please remove it first") + + checkSchedulerCommand(re, cmd, pdAddr, []string{"-u", pdAddr, "scheduler", "remove", "grant-hot-region-scheduler"}, map[string]bool{ + "balance-region-scheduler": true, + "balance-leader-scheduler": true, + "evict-slow-store-scheduler": true, + }) + + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-hot-region-scheduler"}, nil) + re.Contains(echo, "Success!") + suite.checkDefaultSchedulers(re, cmd, pdAddr) +} + func (suite *schedulerTestSuite) TestHotRegionSchedulerConfig() { suite.env.RunTestBasedOnMode(suite.checkHotRegionSchedulerConfig) } @@ -650,14 +719,6 @@ func (suite *schedulerTestSuite) checkHotRegionSchedulerConfig(cluster *pdTests. return reflect.DeepEqual(expect, conf1) }) } - // scheduler show command - expected := map[string]bool{ - "balance-region-scheduler": true, - "balance-leader-scheduler": true, - "balance-hot-region-scheduler": true, - "evict-slow-store-scheduler": true, - } - checkSchedulerCommand(re, cmd, pdAddr, nil, expected) var conf map[string]any mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "list"}, &conf) re.Equal(expected1, conf)