From 4511fe91d747397538061ef648f310981c667c6d Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Sat, 14 Sep 2024 14:06:19 +0800 Subject: [PATCH 1/7] fix build args error Signed-off-by: Ryan Leung --- pkg/schedule/schedulers/grant_leader.go | 1 + plugin/scheduler_example/evict_leader.go | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/schedule/schedulers/grant_leader.go b/pkg/schedule/schedulers/grant_leader.go index 5dbb6eef5f6..1d3d1c47837 100644 --- a/pkg/schedule/schedulers/grant_leader.go +++ b/pkg/schedule/schedulers/grant_leader.go @@ -271,6 +271,7 @@ func (handler *grantLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R err := handler.config.buildWithArgs(args) if err != nil { + handler.config.removeStore(id) handler.rd.JSON(w, http.StatusBadRequest, err.Error()) return } diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index c20cfd41814..51a5a2b5aed 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -275,11 +275,18 @@ func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R err := handler.config.BuildWithArgs(args) if err != nil { + handler.config.mu.Lock() + handler.config.cluster.ResumeLeaderTransfer(id) + handler.config.mu.Unlock() handler.rd.JSON(w, http.StatusBadRequest, err.Error()) return } err = handler.config.Persist() if err != nil { + handler.config.mu.Lock() + delete(handler.config.StoreIDWitRanges, id) + handler.config.cluster.ResumeLeaderTransfer(id) + handler.config.mu.Unlock() handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } @@ -309,14 +316,10 @@ func (handler *evictLeaderHandler) deleteConfig(w http.ResponseWriter, r *http.R delete(handler.config.StoreIDWitRanges, id) handler.config.cluster.ResumeLeaderTransfer(id) - handler.config.mu.Unlock() if err := handler.config.Persist(); err != nil { - handler.config.mu.Lock() handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - handler.config.mu.Lock() - var resp any if len(handler.config.StoreIDWitRanges) == 0 { resp = noStoreInSchedulerInfo From 922e1d0689133ab977d3fdab4918f0e3232f0f91 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Sat, 14 Sep 2024 14:49:05 +0800 Subject: [PATCH 2/7] add a test Signed-off-by: Ryan Leung --- pkg/schedule/schedulers/evict_leader.go | 3 ++ .../pd-ctl/tests/scheduler/scheduler_test.go | 54 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/pkg/schedule/schedulers/evict_leader.go b/pkg/schedule/schedulers/evict_leader.go index a7d656a3e42..02906a9eb71 100644 --- a/pkg/schedule/schedulers/evict_leader.go +++ b/pkg/schedule/schedulers/evict_leader.go @@ -408,6 +408,7 @@ func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R batchFloat, ok := input["batch"].(float64) if ok { if batchFloat < 1 || batchFloat > 10 { + handler.config.delete(id) handler.rd.JSON(w, http.StatusBadRequest, "batch is invalid, it should be in [1, 10]") return } @@ -417,6 +418,7 @@ func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R ranges, ok := (input["ranges"]).([]string) if ok { if !inputHasStoreID { + handler.config.delete(id) handler.rd.JSON(w, http.StatusInternalServerError, errs.ErrSchedulerConfig.FastGenByArgs("id")) return } @@ -426,6 +428,7 @@ func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R newRanges, err = getKeyRanges(ranges) if err != nil { + handler.config.delete(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index f680a4bd2e7..198e745a514 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -813,6 +813,60 @@ func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *pdTests.TestC checkSchedulerDescribeCommand("balance-leader-scheduler", "normal", "") } +func (suite *schedulerTestSuite) TestEvictLeaderScheduler() { + suite.env.RunTestInPDMode(suite.checkEvictLeaderScheduler) +} + +func (suite *schedulerTestSuite) checkEvictLeaderScheduler(cluster *pdTests.TestCluster) { + re := suite.Require() + pdAddr := cluster.GetConfig().GetClientURL() + cmd := ctl.GetRootCmd() + + stores := []*metapb.Store{ + { + Id: 1, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 2, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 3, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + { + Id: 4, + State: metapb.StoreState_Up, + LastHeartbeat: time.Now().UnixNano(), + }, + } + for _, store := range stores { + pdTests.MustPutStore(re, cluster, store) + } + + pdTests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b")) + output, err := tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "2"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}...) + re.NoError(err) + re.Contains(string(output), "Success!") + +} + func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) string { output, err := tests.ExecuteCommand(cmd, args...) re.NoError(err) From f273a7bbfa1e3009100b05282b772541f41fddc5 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Sat, 14 Sep 2024 14:52:35 +0800 Subject: [PATCH 3/7] leave a FIXME Signed-off-by: Ryan Leung --- tools/pd-ctl/tests/scheduler/scheduler_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pd-ctl/tests/scheduler/scheduler_test.go b/tools/pd-ctl/tests/scheduler/scheduler_test.go index 198e745a514..de39ef00333 100644 --- a/tools/pd-ctl/tests/scheduler/scheduler_test.go +++ b/tools/pd-ctl/tests/scheduler/scheduler_test.go @@ -814,6 +814,7 @@ func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *pdTests.TestC } func (suite *schedulerTestSuite) TestEvictLeaderScheduler() { + // FIXME: API mode may have the problem suite.env.RunTestInPDMode(suite.checkEvictLeaderScheduler) } @@ -864,7 +865,6 @@ func (suite *schedulerTestSuite) checkEvictLeaderScheduler(cluster *pdTests.Test output, err = tests.ExecuteCommand(cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}...) re.NoError(err) re.Contains(string(output), "Success!") - } func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v any) string { From e2ccf3fb6154fb1b0ce84086ebde49749f3d6236 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Sat, 14 Sep 2024 15:07:20 +0800 Subject: [PATCH 4/7] fix Signed-off-by: Ryan Leung --- pkg/schedule/schedulers/evict_leader.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/schedule/schedulers/evict_leader.go b/pkg/schedule/schedulers/evict_leader.go index 02906a9eb71..ca8eabfb9b1 100644 --- a/pkg/schedule/schedulers/evict_leader.go +++ b/pkg/schedule/schedulers/evict_leader.go @@ -103,6 +103,12 @@ func (conf *evictLeaderSchedulerConfig) removeStoreLocked(id uint64) (bool, erro return false, errs.ErrScheduleConfigNotExist.FastGenByArgs() } +func (conf *evictLeaderSchedulerConfig) removeStore(id uint64) { + conf.Lock() + defer conf.Unlock() + conf.removeStoreLocked(id) +} + func (conf *evictLeaderSchedulerConfig) resetStoreLocked(id uint64, keyRange []core.KeyRange) { if err := conf.cluster.PauseLeaderTransfer(id); err != nil { log.Error("pause leader transfer failed", zap.Uint64("store-id", id), errs.ZapError(err)) @@ -408,7 +414,7 @@ func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R batchFloat, ok := input["batch"].(float64) if ok { if batchFloat < 1 || batchFloat > 10 { - handler.config.delete(id) + handler.config.removeStore(id) handler.rd.JSON(w, http.StatusBadRequest, "batch is invalid, it should be in [1, 10]") return } @@ -418,7 +424,7 @@ func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R ranges, ok := (input["ranges"]).([]string) if ok { if !inputHasStoreID { - handler.config.delete(id) + handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, errs.ErrSchedulerConfig.FastGenByArgs("id")) return } @@ -428,7 +434,7 @@ func (handler *evictLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R newRanges, err = getKeyRanges(ranges) if err != nil { - handler.config.delete(id) + handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } From fce9f6ecefd9b5baad3206e5bc2b2ac7b2f63580 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Sat, 14 Sep 2024 15:09:52 +0800 Subject: [PATCH 5/7] fix Signed-off-by: Ryan Leung --- plugin/scheduler_example/evict_leader.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 51a5a2b5aed..1f02e72452c 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -308,7 +308,7 @@ func (handler *evictLeaderHandler) deleteConfig(w http.ResponseWriter, r *http.R handler.config.mu.Lock() defer handler.config.mu.Unlock() - _, exists := handler.config.StoreIDWitRanges[id] + ranges, exists := handler.config.StoreIDWitRanges[id] if !exists { handler.rd.JSON(w, http.StatusInternalServerError, errors.New("the config does not exist")) return @@ -317,6 +317,8 @@ func (handler *evictLeaderHandler) deleteConfig(w http.ResponseWriter, r *http.R handler.config.cluster.ResumeLeaderTransfer(id) if err := handler.config.Persist(); err != nil { + handler.config.StoreIDWitRanges[id] = ranges + handler.config.cluster.PauseLeaderTransfer(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } From 4a9aab068383b572deaab3147aabc4e94f4424e6 Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Sat, 14 Sep 2024 15:12:23 +0800 Subject: [PATCH 6/7] fix Signed-off-by: Ryan Leung --- pkg/schedule/schedulers/grant_leader.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/schedule/schedulers/grant_leader.go b/pkg/schedule/schedulers/grant_leader.go index 1d3d1c47837..b566c8e243c 100644 --- a/pkg/schedule/schedulers/grant_leader.go +++ b/pkg/schedule/schedulers/grant_leader.go @@ -271,13 +271,13 @@ func (handler *grantLeaderHandler) updateConfig(w http.ResponseWriter, r *http.R err := handler.config.buildWithArgs(args) if err != nil { - handler.config.removeStore(id) + _, _ = handler.config.removeStore(id) handler.rd.JSON(w, http.StatusBadRequest, err.Error()) return } err = handler.config.persist() if err != nil { - handler.config.removeStore(id) + _, _ = handler.config.removeStore(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } From f8ff32c49187d9504138d41694e861c3ec87507a Mon Sep 17 00:00:00 2001 From: Ryan Leung Date: Sat, 14 Sep 2024 15:21:12 +0800 Subject: [PATCH 7/7] fix Signed-off-by: Ryan Leung --- pkg/schedule/schedulers/evict_leader.go | 3 ++- plugin/scheduler_example/evict_leader.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/schedule/schedulers/evict_leader.go b/pkg/schedule/schedulers/evict_leader.go index ca8eabfb9b1..322e6627fee 100644 --- a/pkg/schedule/schedulers/evict_leader.go +++ b/pkg/schedule/schedulers/evict_leader.go @@ -106,7 +106,8 @@ func (conf *evictLeaderSchedulerConfig) removeStoreLocked(id uint64) (bool, erro func (conf *evictLeaderSchedulerConfig) removeStore(id uint64) { conf.Lock() defer conf.Unlock() - conf.removeStoreLocked(id) + // if the store is not existed, no need to resume leader transfer + _, _ = conf.removeStoreLocked(id) } func (conf *evictLeaderSchedulerConfig) resetStoreLocked(id uint64, keyRange []core.KeyRange) { diff --git a/plugin/scheduler_example/evict_leader.go b/plugin/scheduler_example/evict_leader.go index 1f02e72452c..cd30ef85d11 100644 --- a/plugin/scheduler_example/evict_leader.go +++ b/plugin/scheduler_example/evict_leader.go @@ -318,7 +318,7 @@ func (handler *evictLeaderHandler) deleteConfig(w http.ResponseWriter, r *http.R if err := handler.config.Persist(); err != nil { handler.config.StoreIDWitRanges[id] = ranges - handler.config.cluster.PauseLeaderTransfer(id) + _ = handler.config.cluster.PauseLeaderTransfer(id) handler.rd.JSON(w, http.StatusInternalServerError, err.Error()) return }