From 459671f6c6696d3c692ce6911c6f9f7ac8a55cd3 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 26 Oct 2023 10:54:33 +0800 Subject: [PATCH 001/116] mcs: add timeout when call `alloc` of api server (#7254) close tikv/pd#7207 Signed-off-by: lhy1024 --- pkg/mcs/scheduling/server/cluster.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index e4983eca7ea..09ae4ede528 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -52,7 +52,10 @@ type Cluster struct { running atomic.Bool } -const regionLabelGCInterval = time.Hour +const ( + regionLabelGCInterval = time.Hour + requestTimeout = 3 * time.Second +) // NewCluster creates a new cluster. func NewCluster(parentCtx context.Context, persistConfig *config.PersistConfig, storage storage.Storage, basicCluster *core.BasicCluster, hbStreams *hbstream.HeartbeatStreams, clusterID uint64, checkMembershipCh chan struct{}) (*Cluster, error) { @@ -199,7 +202,9 @@ func (c *Cluster) AllocID() (uint64, error) { if err != nil { return 0, err } - resp, err := client.AllocID(c.ctx, &pdpb.AllocIDRequest{Header: &pdpb.RequestHeader{ClusterId: c.clusterID}}) + ctx, cancel := context.WithTimeout(c.ctx, requestTimeout) + defer cancel() + resp, err := client.AllocID(ctx, &pdpb.AllocIDRequest{Header: &pdpb.RequestHeader{ClusterId: c.clusterID}}) if err != nil { c.checkMembershipCh <- struct{}{} return 0, err From bfc988bb37a738b3bba2e85903d304af5d8532d6 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 26 Oct 2023 12:18:33 +0800 Subject: [PATCH 002/116] api: fix cannot dump trace (#7255) close tikv/pd#7253 api: fix cannot dump trace with dashboard Signed-off-by: nolouch --- server/util.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/util.go b/server/util.go index 654b424465e..f88d0146a7f 100644 --- a/server/util.go +++ b/server/util.go @@ -17,6 +17,7 @@ package server import ( "context" "net/http" + "net/http/pprof" "path/filepath" "strings" @@ -121,8 +122,13 @@ func combineBuilderServerHTTPService(ctx context.Context, svr *Server, serviceBu userHandlers[pathPrefix] = handler } } + apiService.UseHandler(router) userHandlers[pdAPIPrefix] = apiService + + // fix issue https://github.com/tikv/pd/issues/7253 + // FIXME: remove me after upgrade + userHandlers["/debug/pprof/trace"] = http.HandlerFunc(pprof.Trace) return userHandlers, nil } From 4549e2352bd28905cfc34b93ea30e945f7b3154e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 26 Oct 2023 14:03:33 +0800 Subject: [PATCH 003/116] *: check whether region is nil (#7263) close tikv/pd#7261 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/schedule/schedulers/hot_region.go | 3 +++ pkg/schedule/schedulers/split_bucket.go | 5 ++++- pkg/statistics/region_collection.go | 6 +++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go index c353621bb7f..4806180e450 100644 --- a/pkg/schedule/schedulers/hot_region.go +++ b/pkg/schedule/schedulers/hot_region.go @@ -758,6 +758,9 @@ func (bs *balanceSolver) tryAddPendingInfluence() bool { dstStoreID := uint64(0) if isSplit { region := bs.GetRegion(bs.ops[0].RegionID()) + if region == nil { + return false + } for id := range region.GetStoreIDs() { srcStoreIDs = append(srcStoreIDs, id) } diff --git a/pkg/schedule/schedulers/split_bucket.go b/pkg/schedule/schedulers/split_bucket.go index 5e75bded9b4..a08c84372b5 100644 --- a/pkg/schedule/schedulers/split_bucket.go +++ b/pkg/schedule/schedulers/split_bucket.go @@ -262,6 +262,9 @@ func (s *splitBucketScheduler) splitBucket(plan *splitBucketPlan) []*operator.Op } if splitBucket != nil { region := plan.cluster.GetRegion(splitBucket.RegionID) + if region == nil { + return nil + } splitKey := make([][]byte, 0) if bytes.Compare(region.GetStartKey(), splitBucket.StartKey) < 0 { splitKey = append(splitKey, splitBucket.StartKey) @@ -269,7 +272,7 @@ func (s *splitBucketScheduler) splitBucket(plan *splitBucketPlan) []*operator.Op if bytes.Compare(region.GetEndKey(), splitBucket.EndKey) > 0 { splitKey = append(splitKey, splitBucket.EndKey) } - op, err := operator.CreateSplitRegionOperator(SplitBucketType, plan.cluster.GetRegion(splitBucket.RegionID), operator.OpSplit, + op, err := operator.CreateSplitRegionOperator(SplitBucketType, region, operator.OpSplit, pdpb.CheckPolicy_USEKEY, splitKey) if err != nil { splitBucketCreateOpeartorFailCounter.Inc() diff --git a/pkg/statistics/region_collection.go b/pkg/statistics/region_collection.go index c79eb0a3132..26cbea9ef92 100644 --- a/pkg/statistics/region_collection.go +++ b/pkg/statistics/region_collection.go @@ -119,7 +119,11 @@ func (r *RegionStatistics) GetRegionStatsByType(typ RegionStatisticType) []*core defer r.RUnlock() res := make([]*core.RegionInfo, 0, len(r.stats[typ])) for regionID := range r.stats[typ] { - res = append(res, r.rip.GetRegion(regionID).Clone()) + region := r.rip.GetRegion(regionID) + if region == nil { + continue + } + res = append(res, region.Clone()) } return res } From 89128f10f5206f57e3f60cba40797a5eb0dc19c7 Mon Sep 17 00:00:00 2001 From: tongjian <1045931706@qq.com> Date: Thu, 26 Oct 2023 14:50:03 +0800 Subject: [PATCH 004/116] ctl: sort gc safepoint in pd-ctl (#7233) close tikv/pd#7262 Signed-off-by: bufferflies <1045931706@qq.com> Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- server/api/service_gc_safepoint.go | 7 ++++--- server/api/service_gc_safepoint_test.go | 4 ++-- .../pdctl/command/gc_safepoint_command.go | 18 +++++++++++++++++- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/server/api/service_gc_safepoint.go b/server/api/service_gc_safepoint.go index b26edaba07d..270edca58bf 100644 --- a/server/api/service_gc_safepoint.go +++ b/server/api/service_gc_safepoint.go @@ -35,8 +35,9 @@ func newServiceGCSafepointHandler(svr *server.Server, rd *render.Render) *servic } } +// ListServiceGCSafepoint is the response for list service GC safepoint. // NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. -type listServiceGCSafepoint struct { +type ListServiceGCSafepoint struct { ServiceGCSafepoints []*endpoint.ServiceSafePoint `json:"service_gc_safe_points"` GCSafePoint uint64 `json:"gc_safe_point"` } @@ -44,7 +45,7 @@ type listServiceGCSafepoint struct { // @Tags service_gc_safepoint // @Summary Get all service GC safepoint. // @Produce json -// @Success 200 {array} listServiceGCSafepoint +// @Success 200 {array} ListServiceGCSafepoint // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /gc/safepoint [get] func (h *serviceGCSafepointHandler) GetGCSafePoint(w http.ResponseWriter, r *http.Request) { @@ -59,7 +60,7 @@ func (h *serviceGCSafepointHandler) GetGCSafePoint(w http.ResponseWriter, r *htt h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - list := listServiceGCSafepoint{ + list := ListServiceGCSafepoint{ GCSafePoint: gcSafepoint, ServiceGCSafepoints: ssps, } diff --git a/server/api/service_gc_safepoint_test.go b/server/api/service_gc_safepoint_test.go index 517a94c2e23..3df9102d116 100644 --- a/server/api/service_gc_safepoint_test.go +++ b/server/api/service_gc_safepoint_test.go @@ -58,7 +58,7 @@ func (suite *serviceGCSafepointTestSuite) TestServiceGCSafepoint() { sspURL := suite.urlPrefix + "/gc/safepoint" storage := suite.svr.GetStorage() - list := &listServiceGCSafepoint{ + list := &ListServiceGCSafepoint{ ServiceGCSafepoints: []*endpoint.ServiceSafePoint{ { ServiceID: "a", @@ -87,7 +87,7 @@ func (suite *serviceGCSafepointTestSuite) TestServiceGCSafepoint() { res, err := testDialClient.Get(sspURL) suite.NoError(err) defer res.Body.Close() - listResp := &listServiceGCSafepoint{} + listResp := &ListServiceGCSafepoint{} err = apiutil.ReadJSON(res.Body, listResp) suite.NoError(err) suite.Equal(list, listResp) diff --git a/tools/pd-ctl/pdctl/command/gc_safepoint_command.go b/tools/pd-ctl/pdctl/command/gc_safepoint_command.go index 619cf5a928b..80c6328e955 100644 --- a/tools/pd-ctl/pdctl/command/gc_safepoint_command.go +++ b/tools/pd-ctl/pdctl/command/gc_safepoint_command.go @@ -15,9 +15,12 @@ package command import ( + "encoding/json" "net/http" + "sort" "github.com/spf13/cobra" + "github.com/tikv/pd/server/api" ) var ( @@ -52,7 +55,20 @@ func showSSPs(cmd *cobra.Command, args []string) { cmd.Printf("Failed to get service GC safepoint: %s\n", err) return } - cmd.Println(r) + var safepoint api.ListServiceGCSafepoint + if err := json.Unmarshal([]byte(r), &safepoint); err != nil { + cmd.Printf("Failed to unmarshal service GC safepoint: %s\n", err) + return + } + sort.Slice(safepoint.ServiceGCSafepoints, func(i, j int) bool { + return safepoint.ServiceGCSafepoints[i].SafePoint < safepoint.ServiceGCSafepoints[j].SafePoint + }) + data, err := json.MarshalIndent(safepoint, "", " ") + if err != nil { + cmd.Printf("Failed to marshal service GC safepoint: %s\n", err) + return + } + cmd.Println(string(data)) } func deleteSSP(cmd *cobra.Command, args []string) { From f9b476d809938e8b23c803ac4619a6a87e7ec03d Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 26 Oct 2023 17:03:33 +0800 Subject: [PATCH 005/116] checker: add disconnected check when fix orphan peers (#7240) close tikv/pd#7249 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/schedule/checker/rule_checker.go | 13 +++++++++++++ server/cluster/cluster_test.go | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pkg/schedule/checker/rule_checker.go b/pkg/schedule/checker/rule_checker.go index b0537bf9ce4..7012359ca36 100644 --- a/pkg/schedule/checker/rule_checker.go +++ b/pkg/schedule/checker/rule_checker.go @@ -479,6 +479,13 @@ loopFits: hasUnhealthyFit = true break loopFits } + // avoid to meet down store when fix orpahn peers, + // Isdisconnected is more strictly than IsUnhealthy. + if c.cluster.GetStore(p.GetStoreId()).IsDisconnected() { + hasUnhealthyFit = true + pinDownPeer = p + break loopFits + } } } @@ -491,6 +498,9 @@ loopFits: // try to use orphan peers to replace unhealthy down peers. for _, orphanPeer := range fit.OrphanPeers { if pinDownPeer != nil { + if pinDownPeer.GetId() == orphanPeer.GetId() { + continue + } // make sure the orphan peer is healthy. if isUnhealthyPeer(orphanPeer.GetId()) { continue @@ -514,6 +524,9 @@ loopFits: return operator.CreatePromoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) case orphanPeerRole == metapb.PeerRole_Voter && destRole == metapb.PeerRole_Learner: return operator.CreateDemoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) + case orphanPeerRole == metapb.PeerRole_Voter && destRole == metapb.PeerRole_Voter && + c.cluster.GetStore(pinDownPeer.GetStoreId()).IsDisconnected() && !dstStore.IsDisconnected(): + return operator.CreateRemovePeerOperator("remove-replaced-orphan-peer", c.cluster, 0, region, pinDownPeer.GetStoreId()) default: // destRole should not same with orphanPeerRole. if role is same, it fit with orphanPeer should be better than now. // destRole never be leader, so we not consider it. diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index b7b9dcfb736..89c9ea32f19 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -2796,7 +2796,7 @@ func TestReplica(t *testing.T) { re.NoError(tc.addLeaderRegion(2, 1, 2, 3, 4)) region = tc.GetRegion(2) re.NoError(dispatchHeartbeat(co, region, stream)) - region = waitRemovePeer(re, stream, region, 4) + region = waitRemovePeer(re, stream, region, 3) // store3 is down, we should remove it firstly. re.NoError(dispatchHeartbeat(co, region, stream)) waitNoResponse(re, stream) From cbdae92fc7c6b185cecb7d167a9e20547d88f350 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 26 Oct 2023 23:51:27 +0800 Subject: [PATCH 006/116] mcs: support config http interface in scheduling server Signed-off-by: lhy1024 --- errors.toml | 10 +++ pkg/errs/errno.go | 6 ++ pkg/mcs/scheduling/server/apis/v1/api.go | 54 ++++++++++++ pkg/mcs/scheduling/server/config/config.go | 13 ++- pkg/mcs/scheduling/server/server.go | 25 ++++++ server/api/config.go | 84 ++++++++++++++++++- tests/integrations/mcs/scheduling/api_test.go | 79 +++++++++++++++++ .../mcs/scheduling/server_test.go | 2 +- 8 files changed, 268 insertions(+), 5 deletions(-) diff --git a/errors.toml b/errors.toml index 1b96de8a209..1d10d40d294 100644 --- a/errors.toml +++ b/errors.toml @@ -496,6 +496,16 @@ error = ''' init file log error, %s ''' +["PD:mcs:ErrNotFoundSchedulingAddr"] +error = ''' +cannot find scheduling address +''' + +["PD:mcs:ErrSchedulingServer"] +error = ''' +scheduling server meets %v +''' + ["PD:member:ErrCheckCampaign"] error = ''' check campaign failed diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 181dfc9b393..e5bac8519be 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -403,3 +403,9 @@ var ( ErrDeleteReservedGroup = errors.Normalize("cannot delete reserved group", errors.RFCCodeText("PD:resourcemanager:ErrDeleteReservedGroup")) ErrInvalidGroup = errors.Normalize("invalid group settings, please check the group name, priority and the number of resources", errors.RFCCodeText("PD:resourcemanager:ErrInvalidGroup")) ) + +// Micro service errors +var ( + ErrNotFoundSchedulingAddr = errors.Normalize("cannot find scheduling address", errors.RFCCodeText("PD:mcs:ErrNotFoundSchedulingAddr")) + ErrSchedulingServer = errors.Normalize("scheduling server meets %v", errors.RFCCodeText("PD:mcs:ErrSchedulingServer")) +) diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index 39be00ef9a0..7dc8a346002 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -110,6 +110,7 @@ func NewService(srv *scheserver.Service) *Service { rd: createIndentRender(), } s.RegisterAdminRouter() + s.RegisterConfigRouter() s.RegisterOperatorsRouter() s.RegisterSchedulersRouter() s.RegisterCheckersRouter() @@ -123,6 +124,15 @@ func (s *Service) RegisterAdminRouter() { router.PUT("/log", changeLogLevel) } +// RegisterConfigRouter registers the router of the config handler. +func (s *Service) RegisterConfigRouter() { + router := s.root.Group("config") + router.GET("", getConfig) + router.GET("/schedule", getScheduleConfig) + router.GET("/replicate", getReplicationConfig) + router.GET("/store", getStoreConfig) +} + // RegisterSchedulersRouter registers the router of the schedulers handler. func (s *Service) RegisterSchedulersRouter() { router := s.root.Group("schedulers") @@ -176,6 +186,50 @@ func changeLogLevel(c *gin.Context) { c.String(http.StatusOK, "The log level is updated.") } +// @Tags config +// @Summary Get full config. +// @Produce json +// @Success 200 {object} config.Config +// @Router /config [get] +func getConfig(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cfg := svr.GetConfig() + c.IndentedJSON(http.StatusOK, cfg) +} + +// @Tags config +// @Summary Get schedule config. +// @Produce json +// @Success 200 {object} sc.ScheduleConfig +// @Router /config/schedule [get] +func getScheduleConfig(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cfg := svr.GetScheduleConfig() + c.IndentedJSON(http.StatusOK, cfg) +} + +// @Tags config +// @Summary Get replication config. +// @Produce json +// @Success 200 {object} sc.ReplicationConfig +// @Router /config/replicate [get] +func getReplicationConfig(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cfg := svr.GetReplicationConfig() + c.IndentedJSON(http.StatusOK, cfg) +} + +// @Tags config +// @Summary Get store config. +// @Produce json +// @Success 200 {object} sc.StoreConfig +// @Router /config/store [get] +func getStoreConfig(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cfg := svr.GetStoreConfig() + c.IndentedJSON(http.StatusOK, cfg) +} + // @Tags operators // @Summary Get an operator by ID. // @Param region_id path int true "A Region's Id" diff --git a/pkg/mcs/scheduling/server/config/config.go b/pkg/mcs/scheduling/server/config/config.go index 4f9caca41e6..772eab835f1 100644 --- a/pkg/mcs/scheduling/server/config/config.go +++ b/pkg/mcs/scheduling/server/config/config.go @@ -61,9 +61,9 @@ type Config struct { Metric metricutil.MetricConfig `toml:"metric" json:"metric"` // Log related config. - Log log.Config `toml:"log" json:"log"` - Logger *zap.Logger - LogProps *log.ZapProperties + Log log.Config `toml:"log" json:"log"` + Logger *zap.Logger `json:"-"` + LogProps *log.ZapProperties `json:"-"` Security configutil.SecurityConfig `toml:"security" json:"security"` @@ -195,6 +195,13 @@ func (c *Config) validate() error { return nil } +// Clone creates a copy of current config. +func (c *Config) Clone() *Config { + cfg := &Config{} + *cfg = *c + return cfg +} + // PersistConfig wraps all configurations that need to persist to storage and // allows to access them safely. type PersistConfig struct { diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 9caae932037..2a83082c03b 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -46,6 +46,7 @@ import ( "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/member" "github.com/tikv/pd/pkg/schedule" + sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/hbstream" "github.com/tikv/pd/pkg/schedule/schedulers" "github.com/tikv/pd/pkg/storage/endpoint" @@ -510,6 +511,30 @@ func (s *Server) GetPersistConfig() *config.PersistConfig { return s.persistConfig } +// GetConfig gets the config. +func (s *Server) GetConfig() *config.Config { + cfg := s.cfg.Clone() + cfg.Schedule = *s.persistConfig.GetScheduleConfig().Clone() + cfg.Replication = *s.persistConfig.GetReplicationConfig().Clone() + cfg.ClusterVersion = *s.persistConfig.GetClusterVersion() + return cfg +} + +// GetScheduleConfig gets the schedule config. +func (s *Server) GetScheduleConfig() *sc.ScheduleConfig { + return s.persistConfig.GetScheduleConfig().Clone() +} + +// GetReplicationConfig gets the replication config. +func (s *Server) GetReplicationConfig() *sc.ReplicationConfig { + return s.persistConfig.GetReplicationConfig().Clone() +} + +// GetStoreConfig gets the store config. +func (s *Server) GetStoreConfig() *sc.StoreConfig { + return s.persistConfig.GetStoreConfig().Clone() +} + // CreateServer creates the Server func CreateServer(ctx context.Context, cfg *config.Config) *Server { svr := &Server{ diff --git a/server/api/config.go b/server/api/config.go index c63bd953c37..9947e952582 100644 --- a/server/api/config.go +++ b/server/api/config.go @@ -27,6 +27,8 @@ import ( "github.com/pingcap/errcode" "github.com/pingcap/errors" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/utils" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/pkg/utils/jsonutil" @@ -60,7 +62,36 @@ func newConfHandler(svr *server.Server, rd *render.Render) *confHandler { // @Router /config [get] func (h *confHandler) GetConfig(w http.ResponseWriter, r *http.Request) { cfg := h.svr.GetConfig() - cfg.Schedule.MaxMergeRegionKeys = cfg.Schedule.GetMaxMergeRegionKeys() + if h.svr.IsAPIServiceMode() { + b, err := h.GetSchedulingServerConfig("config/schedule") + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + var scheduleCfg sc.ScheduleConfig + err = json.Unmarshal(b, &scheduleCfg) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + cfg.Schedule = scheduleCfg + b, err = h.GetSchedulingServerConfig("config/replicate") + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + var replicationCfg sc.ReplicationConfig + err = json.Unmarshal(b, &replicationCfg) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + cfg.Replication = replicationCfg + // TODO: will we support config/store? + // TODO: after scheduler-config is supported, we need to merge the config. + } else { + cfg.Schedule.MaxMergeRegionKeys = cfg.Schedule.GetMaxMergeRegionKeys() + } h.rd.JSON(w, http.StatusOK, cfg) } @@ -301,6 +332,21 @@ func getConfigMap(cfg map[string]interface{}, key []string, value interface{}) m // @Success 200 {object} sc.ScheduleConfig // @Router /config/schedule [get] func (h *confHandler) GetScheduleConfig(w http.ResponseWriter, r *http.Request) { + if h.svr.IsAPIServiceMode() { + b, err := h.GetSchedulingServerConfig("config/schedule") + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + var cfg sc.ScheduleConfig + err = json.Unmarshal(b, &cfg) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + h.rd.JSON(w, http.StatusOK, cfg) + return + } cfg := h.svr.GetScheduleConfig() cfg.MaxMergeRegionKeys = cfg.GetMaxMergeRegionKeys() h.rd.JSON(w, http.StatusOK, cfg) @@ -364,6 +410,21 @@ func (h *confHandler) SetScheduleConfig(w http.ResponseWriter, r *http.Request) // @Success 200 {object} sc.ReplicationConfig // @Router /config/replicate [get] func (h *confHandler) GetReplicationConfig(w http.ResponseWriter, r *http.Request) { + if h.svr.IsAPIServiceMode() { + b, err := h.GetSchedulingServerConfig("config/replicate") + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + var cfg sc.ReplicationConfig + err = json.Unmarshal(b, &cfg) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + h.rd.JSON(w, http.StatusOK, cfg) + return + } h.rd.JSON(w, http.StatusOK, h.svr.GetReplicationConfig()) } @@ -505,3 +566,24 @@ func (h *confHandler) SetReplicationModeConfig(w http.ResponseWriter, r *http.Re func (h *confHandler) GetPDServerConfig(w http.ResponseWriter, r *http.Request) { h.rd.JSON(w, http.StatusOK, h.svr.GetPDServerConfig()) } + +func (h *confHandler) GetSchedulingServerConfig(path string) ([]byte, error) { + addr, ok := h.svr.GetServicePrimaryAddr(h.svr.Context(), utils.SchedulingServiceName) + if !ok { + return nil, errs.ErrNotFoundSchedulingAddr.FastGenByArgs() + } + url := fmt.Sprintf("%s/scheduling/api/v1/%s", addr, path) + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return nil, err + } + resp, err := h.svr.GetHTTPClient().Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, errs.ErrSchedulingServer.FastGenByArgs(resp.StatusCode) + } + return io.ReadAll(resp.Body) +} diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index 5284913813c..568520a77d5 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -9,8 +9,11 @@ import ( "time" "github.com/pingcap/failpoint" + "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" _ "github.com/tikv/pd/pkg/mcs/scheduling/server/apis/v1" + "github.com/tikv/pd/pkg/mcs/scheduling/server/config" + sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/handler" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/storage" @@ -218,3 +221,79 @@ func (suite *apiTestSuite) TestAPIForward() { testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) re.NoError(err) } + +func (suite *apiTestSuite) TestConfig() { + re := suite.Require() + s, cleanup := tests.StartSingleSchedulingTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) + defer cleanup() + testutil.Eventually(re, func() bool { + return s.IsServing() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + addr := s.GetAddr() + urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/config", addr) + + var cfg config.Config + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + suite.Equal(cfg.GetListenAddr(), s.GetConfig().GetListenAddr()) + suite.Equal(cfg.Schedule.LeaderScheduleLimit, s.GetConfig().Schedule.LeaderScheduleLimit) + suite.Equal(cfg.Schedule.EnableCrossTableMerge, s.GetConfig().Schedule.EnableCrossTableMerge) + suite.Equal(cfg.Replication.MaxReplicas, s.GetConfig().Replication.MaxReplicas) + suite.Equal(cfg.Replication.LocationLabels, s.GetConfig().Replication.LocationLabels) + suite.Equal(cfg.DataDir, s.GetConfig().DataDir) + + var scheduleCfg sc.ScheduleConfig + testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/schedule", &scheduleCfg) + suite.Equal(scheduleCfg.LeaderScheduleLimit, s.GetScheduleConfig().LeaderScheduleLimit) + suite.Equal(scheduleCfg.EnableCrossTableMerge, s.GetScheduleConfig().EnableCrossTableMerge) + + var replicationCfg sc.ReplicationConfig + testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/replicate", &replicationCfg) + suite.Equal(replicationCfg.MaxReplicas, s.GetReplicationConfig().MaxReplicas) + suite.Equal(replicationCfg.LocationLabels, s.GetReplicationConfig().LocationLabels) + + var storeCfg sc.StoreConfig + testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/store", &storeCfg) + suite.Equal(storeCfg.Coprocessor.RegionMaxKeys, s.GetStoreConfig().Coprocessor.RegionMaxKeys) + suite.Equal(storeCfg.Coprocessor.RegionSplitKeys, s.GetStoreConfig().Coprocessor.RegionSplitKeys) +} + +func TestConfigForward(t *testing.T) { + re := require.New(t) + checkConfigForward := func(cluster *tests.TestCluster) { + sche := cluster.GetSchedulingPrimaryServer() + var cfg map[string]interface{} + addr := cluster.GetLeaderServer().GetAddr() + + // Test config + urlPrefix := fmt.Sprintf("%s/pd/api/v1/config", addr) + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], float64(sche.GetScheduleConfig().LeaderScheduleLimit)) + // Test to change config + sche.GetPersistConfig().GetScheduleConfig().LeaderScheduleLimit = 100 + re.Equal(100, int(sche.GetScheduleConfig().LeaderScheduleLimit)) + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], float64(sche.GetScheduleConfig().LeaderScheduleLimit)) + + // Test schedule + urlPrefix = fmt.Sprintf("%s/pd/api/v1/config/schedule", addr) + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(cfg["leader-schedule-limit"], float64(sche.GetScheduleConfig().LeaderScheduleLimit)) + // Test to change config + sche.GetPersistConfig().GetScheduleConfig().LeaderScheduleLimit = 4 + re.Equal(4, int(sche.GetScheduleConfig().LeaderScheduleLimit)) + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(cfg["leader-schedule-limit"], float64(sche.GetScheduleConfig().LeaderScheduleLimit)) + + // Test replicate + urlPrefix = fmt.Sprintf("%s/pd/api/v1/config/replicate", addr) + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(cfg["max-replicas"], float64(sche.GetReplicationConfig().MaxReplicas)) + // Test to change config + sche.GetPersistConfig().GetReplicationConfig().MaxReplicas = 5 + re.Equal(5, int(sche.GetReplicationConfig().MaxReplicas)) + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(cfg["max-replicas"], float64(sche.GetReplicationConfig().MaxReplicas)) + } + env := tests.NewSchedulingTestEnvironment(t) + env.RunTestInAPIMode(checkConfigForward) +} diff --git a/tests/integrations/mcs/scheduling/server_test.go b/tests/integrations/mcs/scheduling/server_test.go index 85cf84361b4..a359e1d023a 100644 --- a/tests/integrations/mcs/scheduling/server_test.go +++ b/tests/integrations/mcs/scheduling/server_test.go @@ -509,6 +509,6 @@ func checkOperatorFail(re *require.Assertions, oc *operator.Controller, op *oper func waitSyncFinish(re *require.Assertions, tc *tests.TestSchedulingCluster, typ storelimit.Type, expectedLimit float64) { testutil.Eventually(re, func() bool { - return tc.GetPrimaryServer().GetPersistConfig().GetStoreLimitByType(2, typ) == expectedLimit + return tc.GetPrimaryServer().GetCluster().GetSharedConfig().GetStoreLimitByType(2, typ) == expectedLimit }) } From 42782d4413d7344cfca136eca80689460fc84d56 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 27 Oct 2023 00:02:04 +0800 Subject: [PATCH 007/116] ci: run `make check` with longer timeout (#7271) ref tikv/pd#4399 Signed-off-by: lhy1024 --- .github/workflows/check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index c0bdb7ea492..e2bf99c026f 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -6,7 +6,7 @@ concurrency: jobs: statics: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 20 steps: - uses: actions/setup-go@v3 with: From 092ec86d1b4a26468ec7d803c41abe473ea07c4c Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 27 Oct 2023 00:47:04 +0800 Subject: [PATCH 008/116] test: make TestOperatorTestSuite more stable (#7268) close tikv/pd#7171 Signed-off-by: lhy1024 --- pkg/mcs/scheduling/server/server.go | 6 --- .../mcs/scheduling/server_test.go | 2 +- tests/pdctl/operator/operator_test.go | 8 ++++ tests/pdctl/scheduler/scheduler_test.go | 4 +- tests/server/api/operator_test.go | 42 ++++++++++++++++--- 5 files changed, 47 insertions(+), 15 deletions(-) diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 9caae932037..5e2ed58a009 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -504,12 +504,6 @@ func (s *Server) stopWatcher() { s.metaWatcher.Close() } -// GetPersistConfig returns the persist config. -// It's used to test. -func (s *Server) GetPersistConfig() *config.PersistConfig { - return s.persistConfig -} - // CreateServer creates the Server func CreateServer(ctx context.Context, cfg *config.Config) *Server { svr := &Server{ diff --git a/tests/integrations/mcs/scheduling/server_test.go b/tests/integrations/mcs/scheduling/server_test.go index 85cf84361b4..a359e1d023a 100644 --- a/tests/integrations/mcs/scheduling/server_test.go +++ b/tests/integrations/mcs/scheduling/server_test.go @@ -509,6 +509,6 @@ func checkOperatorFail(re *require.Assertions, oc *operator.Controller, op *oper func waitSyncFinish(re *require.Assertions, tc *tests.TestSchedulingCluster, typ storelimit.Type, expectedLimit float64) { testutil.Eventually(re, func() bool { - return tc.GetPrimaryServer().GetPersistConfig().GetStoreLimitByType(2, typ) == expectedLimit + return tc.GetPrimaryServer().GetCluster().GetSharedConfig().GetStoreLimitByType(2, typ) == expectedLimit }) } diff --git a/tests/pdctl/operator/operator_test.go b/tests/pdctl/operator/operator_test.go index 1752c28a3c0..8bb034993fa 100644 --- a/tests/pdctl/operator/operator_test.go +++ b/tests/pdctl/operator/operator_test.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/core" + "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" "github.com/tikv/pd/tests/pdctl" @@ -221,6 +222,13 @@ func (suite *operatorTestSuite) checkOperator(cluster *tests.TestCluster) { _, err = pdctl.ExecuteCommand(cmd, "config", "set", "enable-placement-rules", "true") re.NoError(err) + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + // wait for the scheduler server to update the config + testutil.Eventually(re, func() bool { + return sche.GetCluster().GetCheckerConfig().IsPlacementRulesEnabled() + }) + } + output, err = pdctl.ExecuteCommand(cmd, "operator", "add", "transfer-region", "1", "2", "3") re.NoError(err) re.Contains(string(output), "not supported") diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index b3d9f356ad1..fe58e304791 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -472,10 +472,8 @@ func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *tests.TestClu result := make(map[string]interface{}) testutil.Eventually(re, func() bool { mightExec(re, cmd, []string{"-u", pdAddr, "scheduler", "describe", schedulerName}, &result) - return len(result) != 0 + return len(result) != 0 && expectedStatus == result["status"] && expectedSummary == result["summary"] }, testutil.WithTickInterval(50*time.Millisecond)) - re.Equal(expectedStatus, result["status"]) - re.Equal(expectedSummary, result["summary"]) } stores := []*metapb.Store{ diff --git a/tests/server/api/operator_test.go b/tests/server/api/operator_test.go index 64ed5114646..83ab0f3c7ed 100644 --- a/tests/server/api/operator_test.go +++ b/tests/server/api/operator_test.go @@ -15,6 +15,7 @@ package api import ( + "encoding/json" "errors" "fmt" "net/http" @@ -73,6 +74,7 @@ func (suite *operatorTestSuite) TestOperator() { func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { re := suite.Require() + suite.pauseRuleChecker(cluster) stores := []*metapb.Store{ { Id: 1, @@ -106,6 +108,8 @@ func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { ConfVer: 1, Version: 1, }, + StartKey: []byte("a"), + EndKey: []byte("b"), } regionInfo := core.NewRegionInfo(region, peer1) tests.MustPutRegionInfo(re, cluster, regionInfo) @@ -176,6 +180,7 @@ func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestCluster) { re := suite.Require() + suite.pauseRuleChecker(cluster) r1 := core.NewTestRegionInfo(10, 1, []byte(""), []byte("b"), core.SetWrittenBytes(1000), core.SetReadBytes(1000), core.SetRegionConfVer(1), core.SetRegionVersion(1)) tests.MustPutRegionInfo(re, cluster, r1) r2 := core.NewTestRegionInfo(20, 1, []byte("b"), []byte("c"), core.SetWrittenBytes(2000), core.SetReadBytes(0), core.SetRegionConfVer(2), core.SetRegionVersion(3)) @@ -201,6 +206,7 @@ func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestClus func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *tests.TestCluster) { re := suite.Require() + suite.pauseRuleChecker(cluster) stores := []*metapb.Store{ { Id: 1, @@ -239,6 +245,8 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te ConfVer: 1, Version: 1, }, + StartKey: []byte("a"), + EndKey: []byte("b"), } tests.MustPutRegionInfo(re, cluster, core.NewRegionInfo(region, peer1)) @@ -408,13 +416,24 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te }, } svr := cluster.GetLeaderServer() + url := fmt.Sprintf("%s/pd/api/v1/config", svr.GetAddr()) for _, testCase := range testCases { suite.T().Log(testCase.name) - // TODO: remove this after we can sync this config to all servers. - if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { - sche.GetCluster().GetSchedulerConfig().SetPlacementRuleEnabled(testCase.placementRuleEnable) + data := make(map[string]interface{}) + if testCase.placementRuleEnable { + data["enable-placement-rules"] = "true" } else { - svr.GetRaftCluster().GetOpts().SetPlacementRuleEnabled(testCase.placementRuleEnable) + data["enable-placement-rules"] = "false" + } + reqData, e := json.Marshal(data) + re.NoError(e) + err := tu.CheckPostJSON(testDialClient, url, reqData, tu.StatusOK(re)) + re.NoError(err) + if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { + // wait for the scheduler server to update the config + tu.Eventually(re, func() bool { + return sche.GetCluster().GetCheckerConfig().IsPlacementRulesEnabled() == testCase.placementRuleEnable + }) } manager := svr.GetRaftCluster().GetRuleManager() if sche := cluster.GetSchedulingPrimaryServer(); sche != nil { @@ -436,7 +455,6 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te err = manager.DeleteRule("pd", "default") suite.NoError(err) } - var err error if testCase.expectedError == nil { err = tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/operators", urlPrefix), testCase.input, tu.StatusOK(re)) } else { @@ -457,3 +475,17 @@ func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *te suite.NoError(err) } } + +// pauseRuleChecker will pause rule checker to avoid unexpected operator. +func (suite *operatorTestSuite) pauseRuleChecker(cluster *tests.TestCluster) { + re := suite.Require() + checkerName := "rule" + addr := cluster.GetLeaderServer().GetAddr() + resp := make(map[string]interface{}) + url := fmt.Sprintf("%s/pd/api/v1/checker/%s", addr, checkerName) + err := tu.CheckPostJSON(testDialClient, url, []byte(`{"delay":1000}`), tu.StatusOK(re)) + re.NoError(err) + err = tu.ReadGetJSON(re, testDialClient, url, &resp) + re.NoError(err) + re.True(resp["paused"].(bool)) +} From 67ecd76f9cfcc9d56124b4c6b7a83191bdff66aa Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 27 Oct 2023 13:24:33 +0800 Subject: [PATCH 009/116] mcs: fix checkMembershipCh blocking (#7272) close tikv/pd#7207 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/mcs/scheduling/server/cluster.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index 09ae4ede528..0c4e781e747 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -206,7 +206,7 @@ func (c *Cluster) AllocID() (uint64, error) { defer cancel() resp, err := client.AllocID(ctx, &pdpb.AllocIDRequest{Header: &pdpb.RequestHeader{ClusterId: c.clusterID}}) if err != nil { - c.checkMembershipCh <- struct{}{} + c.triggerMembershipCheck() return 0, err } return resp.GetId(), nil @@ -215,12 +215,19 @@ func (c *Cluster) AllocID() (uint64, error) { func (c *Cluster) getAPIServerLeaderClient() (pdpb.PDClient, error) { cli := c.apiServerLeader.Load() if cli == nil { - c.checkMembershipCh <- struct{}{} + c.triggerMembershipCheck() return nil, errors.New("API server leader is not found") } return cli.(pdpb.PDClient), nil } +func (c *Cluster) triggerMembershipCheck() { + select { + case c.checkMembershipCh <- struct{}{}: + default: // avoid blocking + } +} + // SwitchAPIServerLeader switches the API server leader. func (c *Cluster) SwitchAPIServerLeader(new pdpb.PDClient) bool { old := c.apiServerLeader.Load() From 744e51d33acf6132b7a741486648ae8bb8ce9bba Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 27 Oct 2023 15:05:03 +0800 Subject: [PATCH 010/116] core: fix datarace in RegionsInfo (#7281) close tikv/pd#7273 Signed-off-by: lhy1024 --- pkg/core/region.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/core/region.go b/pkg/core/region.go index 6875844b7a6..2ac323a1272 100644 --- a/pkg/core/region.go +++ b/pkg/core/region.go @@ -1339,8 +1339,8 @@ func (r *RegionsInfo) GetStoreWriteRate(storeID uint64) (bytesRate, keysRate flo // GetClusterNotFromStorageRegionsCnt gets the total count of regions that not loaded from storage anymore func (r *RegionsInfo) GetClusterNotFromStorageRegionsCnt() int { - r.st.RLock() - defer r.st.RUnlock() + r.t.RLock() + defer r.t.RUnlock() return r.tree.notFromStorageRegionsCnt } From ce0bb5dc398bd56a1f557e632e2f0473a4cb53a9 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Mon, 30 Oct 2023 15:09:35 +0800 Subject: [PATCH 011/116] mcs: support admin/cache http interface in scheduling server (#7279) ref tikv/pd#5839 Signed-off-by: lhy1024 --- errors.toml | 10 ++++ pkg/errs/errno.go | 6 ++ pkg/mcs/scheduling/server/apis/v1/api.go | 51 +++++++++++++++- pkg/mcs/scheduling/server/cluster.go | 10 ++++ server/api/admin.go | 54 +++++++++++++++-- tests/integrations/mcs/scheduling/api_test.go | 60 +++++++++++++++++++ 6 files changed, 185 insertions(+), 6 deletions(-) diff --git a/errors.toml b/errors.toml index 1b96de8a209..1d10d40d294 100644 --- a/errors.toml +++ b/errors.toml @@ -496,6 +496,16 @@ error = ''' init file log error, %s ''' +["PD:mcs:ErrNotFoundSchedulingAddr"] +error = ''' +cannot find scheduling address +''' + +["PD:mcs:ErrSchedulingServer"] +error = ''' +scheduling server meets %v +''' + ["PD:member:ErrCheckCampaign"] error = ''' check campaign failed diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 181dfc9b393..e5bac8519be 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -403,3 +403,9 @@ var ( ErrDeleteReservedGroup = errors.Normalize("cannot delete reserved group", errors.RFCCodeText("PD:resourcemanager:ErrDeleteReservedGroup")) ErrInvalidGroup = errors.Normalize("invalid group settings, please check the group name, priority and the number of resources", errors.RFCCodeText("PD:resourcemanager:ErrInvalidGroup")) ) + +// Micro service errors +var ( + ErrNotFoundSchedulingAddr = errors.Normalize("cannot find scheduling address", errors.RFCCodeText("PD:mcs:ErrNotFoundSchedulingAddr")) + ErrSchedulingServer = errors.Normalize("scheduling server meets %v", errors.RFCCodeText("PD:mcs:ErrSchedulingServer")) +) diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index 39be00ef9a0..d0acdf39a09 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -15,7 +15,6 @@ package apis import ( - "fmt" "net/http" "strconv" "sync" @@ -26,6 +25,7 @@ import ( "github.com/gin-gonic/gin" "github.com/joho/godotenv" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/errs" scheserver "github.com/tikv/pd/pkg/mcs/scheduling/server" mcsutils "github.com/tikv/pd/pkg/mcs/utils" sche "github.com/tikv/pd/pkg/schedule/core" @@ -121,6 +121,8 @@ func NewService(srv *scheserver.Service) *Service { func (s *Service) RegisterAdminRouter() { router := s.root.Group("admin") router.PUT("/log", changeLogLevel) + router.DELETE("cache/regions", deleteAllRegionCache) + router.DELETE("cache/regions/:id", deleteRegionCacheByID) } // RegisterSchedulersRouter registers the router of the schedulers handler. @@ -160,6 +162,11 @@ func (s *Service) RegisterOperatorsRouter() { router.GET("/records", getOperatorRecords) } +// @Tags admin +// @Summary Change the log level. +// @Produce json +// @Success 200 {string} string "The log level is updated." +// @Router /admin/log [put] func changeLogLevel(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) var level string @@ -176,6 +183,46 @@ func changeLogLevel(c *gin.Context) { c.String(http.StatusOK, "The log level is updated.") } +// @Tags admin +// @Summary Drop all regions from cache. +// @Produce json +// @Success 200 {string} string "All regions are removed from server cache." +// @Router /admin/cache/regions [delete] +func deleteAllRegionCache(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cluster := svr.GetCluster() + if cluster == nil { + c.String(http.StatusInternalServerError, errs.ErrNotBootstrapped.GenWithStackByArgs().Error()) + return + } + cluster.DropCacheAllRegion() + c.String(http.StatusOK, "All regions are removed from server cache.") +} + +// @Tags admin +// @Summary Drop a specific region from cache. +// @Param id path integer true "Region Id" +// @Produce json +// @Success 200 {string} string "The region is removed from server cache." +// @Failure 400 {string} string "The input is invalid." +// @Router /admin/cache/regions/{id} [delete] +func deleteRegionCacheByID(c *gin.Context) { + svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) + cluster := svr.GetCluster() + if cluster == nil { + c.String(http.StatusInternalServerError, errs.ErrNotBootstrapped.GenWithStackByArgs().Error()) + return + } + regionIDStr := c.Param("id") + regionID, err := strconv.ParseUint(regionIDStr, 10, 64) + if err != nil { + c.String(http.StatusBadRequest, err.Error()) + return + } + cluster.DropCacheRegion(regionID) + c.String(http.StatusOK, "The region is removed from server cache.") +} + // @Tags operators // @Summary Get an operator by ID. // @Param region_id path int true "A Region's Id" @@ -475,7 +522,7 @@ func getHotRegions(typ utils.RWType, c *gin.Context) { for _, storeID := range storeIDs { id, err := strconv.ParseUint(storeID, 10, 64) if err != nil { - c.String(http.StatusBadRequest, fmt.Sprintf("invalid store id: %s", storeID)) + c.String(http.StatusBadRequest, errs.ErrInvalidStoreID.FastGenByArgs(storeID).Error()) return } _, err = handler.GetStore(id) diff --git a/pkg/mcs/scheduling/server/cluster.go b/pkg/mcs/scheduling/server/cluster.go index 0c4e781e747..028c2a12b37 100644 --- a/pkg/mcs/scheduling/server/cluster.go +++ b/pkg/mcs/scheduling/server/cluster.go @@ -593,3 +593,13 @@ func (c *Cluster) processRegionHeartbeat(region *core.RegionInfo) error { func (c *Cluster) IsPrepared() bool { return c.coordinator.GetPrepareChecker().IsPrepared() } + +// DropCacheAllRegion removes all cached regions. +func (c *Cluster) DropCacheAllRegion() { + c.ResetRegionCache() +} + +// DropCacheRegion removes a region from the cache. +func (c *Cluster) DropCacheRegion(id uint64) { + c.RemoveRegionIfExist(id) +} diff --git a/server/api/admin.go b/server/api/admin.go index 7a1dfb0f1e8..246c9239f59 100644 --- a/server/api/admin.go +++ b/server/api/admin.go @@ -16,6 +16,7 @@ package api import ( "encoding/json" + "fmt" "io" "net/http" "strconv" @@ -24,6 +25,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/core" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/utils/apiutil" "github.com/tikv/pd/server" "github.com/unrolled/render" @@ -59,7 +61,11 @@ func (h *adminHandler) DeleteRegionCache(w http.ResponseWriter, r *http.Request) return } rc.DropCacheRegion(regionID) - h.rd.JSON(w, http.StatusOK, "The region is removed from server cache.") + if h.svr.IsAPIServiceMode() { + err = h.DeleteRegionCacheInSchedulingServer(regionID) + } + msg := "The region is removed from server cache." + h.rd.JSON(w, http.StatusOK, h.buildMsg(msg, err)) } // @Tags admin @@ -95,8 +101,11 @@ func (h *adminHandler) DeleteRegionStorage(w http.ResponseWriter, r *http.Reques } // Remove region from cache. rc.DropCacheRegion(regionID) - - h.rd.JSON(w, http.StatusOK, "The region is removed from server cache and region meta storage.") + if h.svr.IsAPIServiceMode() { + err = h.DeleteRegionCacheInSchedulingServer(regionID) + } + msg := "The region is removed from server cache and region meta storage." + h.rd.JSON(w, http.StatusOK, h.buildMsg(msg, err)) } // @Tags admin @@ -105,9 +114,14 @@ func (h *adminHandler) DeleteRegionStorage(w http.ResponseWriter, r *http.Reques // @Success 200 {string} string "All regions are removed from server cache." // @Router /admin/cache/regions [delete] func (h *adminHandler) DeleteAllRegionCache(w http.ResponseWriter, r *http.Request) { + var err error rc := getCluster(r) rc.DropCacheAllRegion() - h.rd.JSON(w, http.StatusOK, "All regions are removed from server cache.") + if h.svr.IsAPIServiceMode() { + err = h.DeleteRegionCacheInSchedulingServer() + } + msg := "All regions are removed from server cache." + h.rd.JSON(w, http.StatusOK, h.buildMsg(msg, err)) } // Intentionally no swagger mark as it is supposed to be only used in @@ -200,3 +214,35 @@ func (h *adminHandler) RecoverAllocID(w http.ResponseWriter, r *http.Request) { _ = h.rd.Text(w, http.StatusOK, "") } + +func (h *adminHandler) DeleteRegionCacheInSchedulingServer(id ...uint64) error { + addr, ok := h.svr.GetServicePrimaryAddr(h.svr.Context(), utils.SchedulingServiceName) + if !ok { + return errs.ErrNotFoundSchedulingAddr.FastGenByArgs() + } + var idStr string + if len(id) > 0 { + idStr = strconv.FormatUint(id[0], 10) + } + url := fmt.Sprintf("%s/scheduling/api/v1/admin/cache/regions/%s", addr, idStr) + req, err := http.NewRequest(http.MethodDelete, url, nil) + if err != nil { + return err + } + resp, err := h.svr.GetHTTPClient().Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return errs.ErrSchedulingServer.FastGenByArgs(resp.StatusCode) + } + return nil +} + +func (h *adminHandler) buildMsg(msg string, err error) string { + if h.svr.IsAPIServiceMode() && err != nil { + return fmt.Sprintf("This operation was executed in API server but needs to be re-executed on scheduling server due to the following error: %s", err.Error()) + } + return msg +} diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index 5284913813c..d6028204325 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -9,7 +9,9 @@ import ( "time" "github.com/pingcap/failpoint" + "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/core" _ "github.com/tikv/pd/pkg/mcs/scheduling/server/apis/v1" "github.com/tikv/pd/pkg/schedule/handler" "github.com/tikv/pd/pkg/statistics" @@ -218,3 +220,61 @@ func (suite *apiTestSuite) TestAPIForward() { testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) re.NoError(err) } + +func TestAdminRegionCache(t *testing.T) { + re := require.New(t) + checkAdminRegionCache := func(cluster *tests.TestCluster) { + r1 := core.NewTestRegionInfo(10, 1, []byte(""), []byte("b"), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r1) + r2 := core.NewTestRegionInfo(20, 1, []byte("b"), []byte("c"), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r2) + r3 := core.NewTestRegionInfo(30, 1, []byte("c"), []byte(""), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r3) + + schedulingServer := cluster.GetSchedulingPrimaryServer() + re.Equal(3, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + + addr := schedulingServer.GetAddr() + urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/admin/cache/regions", addr) + err := testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "30"), testutil.StatusOK(re)) + re.NoError(err) + re.Equal(2, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + + err = testutil.CheckDelete(testDialClient, urlPrefix, testutil.StatusOK(re)) + re.NoError(err) + re.Equal(0, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + } + env := tests.NewSchedulingTestEnvironment(t) + env.RunTestInAPIMode(checkAdminRegionCache) +} + +func TestAdminRegionCacheForward(t *testing.T) { + re := require.New(t) + checkAdminRegionCache := func(cluster *tests.TestCluster) { + r1 := core.NewTestRegionInfo(10, 1, []byte(""), []byte("b"), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r1) + r2 := core.NewTestRegionInfo(20, 1, []byte("b"), []byte("c"), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r2) + r3 := core.NewTestRegionInfo(30, 1, []byte("c"), []byte(""), core.SetRegionConfVer(100), core.SetRegionVersion(100)) + tests.MustPutRegionInfo(re, cluster, r3) + + apiServer := cluster.GetLeaderServer().GetServer() + schedulingServer := cluster.GetSchedulingPrimaryServer() + re.Equal(3, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + re.Equal(3, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) + + addr := cluster.GetLeaderServer().GetAddr() + urlPrefix := fmt.Sprintf("%s/pd/api/v1/admin/cache/region", addr) + err := testutil.CheckDelete(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "30"), testutil.StatusOK(re)) + re.NoError(err) + re.Equal(2, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + re.Equal(2, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) + + err = testutil.CheckDelete(testDialClient, urlPrefix+"s", testutil.StatusOK(re)) + re.NoError(err) + re.Equal(0, schedulingServer.GetCluster().GetRegionCount([]byte{}, []byte{})) + re.Equal(0, apiServer.GetRaftCluster().GetRegionCount([]byte{}, []byte{}).Count) + } + env := tests.NewSchedulingTestEnvironment(t) + env.RunTestInAPIMode(checkAdminRegionCache) +} From ded917b47a4ca57ed614a8cf5e5a1b77ce549368 Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Tue, 31 Oct 2023 17:54:37 +0800 Subject: [PATCH 012/116] config: fix `Clone` in `RatelimitConfig` (#7289) close tikv/pd#7288 Signed-off-by: Cabinfever_B --- server/config/config_test.go | 26 ++++++++++++++++++++++ server/config/service_middleware_config.go | 10 +++++++++ 2 files changed, 36 insertions(+) diff --git a/server/config/config_test.go b/server/config/config_test.go index 75e69c26d5c..07cdc966409 100644 --- a/server/config/config_test.go +++ b/server/config/config_test.go @@ -26,6 +26,7 @@ import ( "github.com/BurntSushi/toml" "github.com/spf13/pflag" "github.com/stretchr/testify/require" + "github.com/tikv/pd/pkg/ratelimit" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/configutil" @@ -479,3 +480,28 @@ func newTestScheduleOption() (*PersistOptions, error) { opt := NewPersistOptions(cfg) return opt, nil } + +func TestRateLimitClone(t *testing.T) { + re := require.New(t) + cfg := &RateLimitConfig{ + EnableRateLimit: defaultEnableRateLimitMiddleware, + LimiterConfig: make(map[string]ratelimit.DimensionConfig), + } + clone := cfg.Clone() + clone.LimiterConfig["test"] = ratelimit.DimensionConfig{ + ConcurrencyLimit: 200, + } + dc := cfg.LimiterConfig["test"] + re.Equal(dc.ConcurrencyLimit, uint64(0)) + + gCfg := &GRPCRateLimitConfig{ + EnableRateLimit: defaultEnableGRPCRateLimitMiddleware, + LimiterConfig: make(map[string]ratelimit.DimensionConfig), + } + gClone := gCfg.Clone() + gClone.LimiterConfig["test"] = ratelimit.DimensionConfig{ + ConcurrencyLimit: 300, + } + gdc := gCfg.LimiterConfig["test"] + re.Equal(gdc.ConcurrencyLimit, uint64(0)) +} diff --git a/server/config/service_middleware_config.go b/server/config/service_middleware_config.go index ef0b04b2abd..b13e3398ac5 100644 --- a/server/config/service_middleware_config.go +++ b/server/config/service_middleware_config.go @@ -78,7 +78,12 @@ type RateLimitConfig struct { // Clone returns a cloned rate limit config. func (c *RateLimitConfig) Clone() *RateLimitConfig { + m := make(map[string]ratelimit.DimensionConfig, len(c.LimiterConfig)) + for k, v := range c.LimiterConfig { + m[k] = v + } cfg := *c + cfg.LimiterConfig = m return &cfg } @@ -92,6 +97,11 @@ type GRPCRateLimitConfig struct { // Clone returns a cloned rate limit config. func (c *GRPCRateLimitConfig) Clone() *GRPCRateLimitConfig { + m := make(map[string]ratelimit.DimensionConfig, len(c.LimiterConfig)) + for k, v := range c.LimiterConfig { + m[k] = v + } cfg := *c + cfg.LimiterConfig = m return &cfg } From 4e45e951c3d351fef247310ef569efb2b912850a Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 1 Nov 2023 13:56:07 +0800 Subject: [PATCH 013/116] mcs: support scheduler config forward and enable some tests (#7256) ref tikv/pd#5839 Signed-off-by: lhy1024 Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- pkg/mcs/scheduling/server/apis/v1/api.go | 57 +++++ pkg/schedule/handler/handler.go | 45 +++- pkg/schedule/schedulers/scheduler.go | 18 ++ .../schedulers/scheduler_controller.go | 5 + .../schedulers/shuffle_region_config.go | 1 + pkg/utils/testutil/api_check.go | 16 +- server/api/server.go | 6 + server/config/persist_options.go | 5 +- server/server.go | 15 +- tests/integrations/mcs/scheduling/api_test.go | 21 ++ tests/pdctl/scheduler/scheduler_test.go | 231 ++++++++++-------- tests/server/api/scheduler_test.go | 101 ++++---- tools/pd-ctl/pdctl/command/scheduler.go | 14 +- 13 files changed, 350 insertions(+), 185 deletions(-) diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index d0acdf39a09..356dc5a7f42 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -31,6 +31,7 @@ import ( sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/handler" "github.com/tikv/pd/pkg/schedule/operator" + "github.com/tikv/pd/pkg/schedule/schedulers" "github.com/tikv/pd/pkg/statistics/utils" "github.com/tikv/pd/pkg/storage" "github.com/tikv/pd/pkg/utils/apiutil" @@ -130,6 +131,8 @@ func (s *Service) RegisterSchedulersRouter() { router := s.root.Group("schedulers") router.GET("", getSchedulers) router.GET("/diagnostic/:name", getDiagnosticResult) + router.GET("/config", getSchedulerConfig) + router.GET("/config/:name/list", getSchedulerConfigByName) // TODO: in the future, we should split pauseOrResumeScheduler to two different APIs. // And we need to do one-to-two forwarding in the API middleware. router.POST("/:name", pauseOrResumeScheduler) @@ -432,6 +435,60 @@ func getSchedulers(c *gin.Context) { c.IndentedJSON(http.StatusOK, output) } +// @Tags schedulers +// @Summary List all scheduler configs. +// @Produce json +// @Success 200 {object} map[string]interface{} +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /schedulers/config/ [get] +func getSchedulerConfig(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + sc, err := handler.GetSchedulersController() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + sches, configs, err := sc.GetAllSchedulerConfigs() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + c.IndentedJSON(http.StatusOK, schedulers.ToPayload(sches, configs)) +} + +// @Tags schedulers +// @Summary List scheduler config by name. +// @Produce json +// @Success 200 {object} map[string]interface{} +// @Failure 404 {string} string scheduler not found +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /schedulers/config/{name}/list [get] +func getSchedulerConfigByName(c *gin.Context) { + handler := c.MustGet(handlerKey).(*handler.Handler) + sc, err := handler.GetSchedulersController() + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + handlers := sc.GetSchedulerHandlers() + name := c.Param("name") + if _, ok := handlers[name]; !ok { + c.String(http.StatusNotFound, errs.ErrSchedulerNotFound.GenWithStackByArgs().Error()) + return + } + isDisabled, err := sc.IsSchedulerDisabled(name) + if err != nil { + c.String(http.StatusInternalServerError, err.Error()) + return + } + if isDisabled { + c.String(http.StatusNotFound, errs.ErrSchedulerNotFound.GenWithStackByArgs().Error()) + return + } + c.Request.URL.Path = "/list" + handlers[name].ServeHTTP(c.Writer, c.Request) +} + // @Tags schedulers // @Summary List schedulers diagnostic result. // @Produce json diff --git a/pkg/schedule/handler/handler.go b/pkg/schedule/handler/handler.go index fca43f3eeeb..45b0eaf502f 100644 --- a/pkg/schedule/handler/handler.go +++ b/pkg/schedule/handler/handler.go @@ -765,13 +765,22 @@ func (h *Handler) GetCheckerStatus(name string) (map[string]bool, error) { }, nil } -// GetSchedulerNames returns all names of schedulers. -func (h *Handler) GetSchedulerNames() ([]string, error) { +// GetSchedulersController returns controller of schedulers. +func (h *Handler) GetSchedulersController() (*schedulers.Controller, error) { co := h.GetCoordinator() if co == nil { return nil, errs.ErrNotBootstrapped.GenWithStackByArgs() } - return co.GetSchedulersController().GetSchedulerNames(), nil + return co.GetSchedulersController(), nil +} + +// GetSchedulerNames returns all names of schedulers. +func (h *Handler) GetSchedulerNames() ([]string, error) { + sc, err := h.GetSchedulersController() + if err != nil { + return nil, err + } + return sc.GetSchedulerNames(), nil } type schedulerPausedPeriod struct { @@ -782,11 +791,10 @@ type schedulerPausedPeriod struct { // GetSchedulerByStatus returns all names of schedulers by status. func (h *Handler) GetSchedulerByStatus(status string, needTS bool) (interface{}, error) { - co := h.GetCoordinator() - if co == nil { - return nil, errs.ErrNotBootstrapped.GenWithStackByArgs() + sc, err := h.GetSchedulersController() + if err != nil { + return nil, err } - sc := co.GetSchedulersController() schedulers := sc.GetSchedulerNames() switch status { case "paused": @@ -837,7 +845,20 @@ func (h *Handler) GetSchedulerByStatus(status string, needTS bool) (interface{}, } return disabledSchedulers, nil default: - return schedulers, nil + // The default scheduler could not be deleted in scheduling server, + // so schedulers could only be disabled. + // We should not return the disabled schedulers here. + var enabledSchedulers []string + for _, scheduler := range schedulers { + disabled, err := sc.IsSchedulerDisabled(scheduler) + if err != nil { + return nil, err + } + if !disabled { + enabledSchedulers = append(enabledSchedulers, scheduler) + } + } + return enabledSchedulers, nil } } @@ -861,11 +882,11 @@ func (h *Handler) GetDiagnosticResult(name string) (*schedulers.DiagnosticResult // t == 0 : resume scheduler. // t > 0 : scheduler delays t seconds. func (h *Handler) PauseOrResumeScheduler(name string, t int64) (err error) { - co := h.GetCoordinator() - if co == nil { - return errs.ErrNotBootstrapped.GenWithStackByArgs() + sc, err := h.GetSchedulersController() + if err != nil { + return err } - if err = co.GetSchedulersController().PauseOrResumeScheduler(name, t); err != nil { + if err = sc.PauseOrResumeScheduler(name, t); err != nil { if t == 0 { log.Error("can not resume scheduler", zap.String("scheduler-name", name), errs.ZapError(err)) } else { diff --git a/pkg/schedule/schedulers/scheduler.go b/pkg/schedule/schedulers/scheduler.go index ba02c280d40..9262f7d0a65 100644 --- a/pkg/schedule/schedulers/scheduler.go +++ b/pkg/schedule/schedulers/scheduler.go @@ -66,6 +66,24 @@ func DecodeConfig(data []byte, v interface{}) error { return nil } +// ToPayload returns the payload of config. +func ToPayload(sches, configs []string) map[string]interface{} { + payload := make(map[string]interface{}) + for i, sche := range sches { + var config interface{} + err := DecodeConfig([]byte(configs[i]), &config) + if err != nil { + log.Error("failed to decode scheduler config", + zap.String("config", configs[i]), + zap.String("scheduler", sche), + errs.ZapError(err)) + continue + } + payload[sche] = config + } + return payload +} + // ConfigDecoder used to decode the config. type ConfigDecoder func(v interface{}) error diff --git a/pkg/schedule/schedulers/scheduler_controller.go b/pkg/schedule/schedulers/scheduler_controller.go index 0f2264392aa..79c8cbfbc92 100644 --- a/pkg/schedule/schedulers/scheduler_controller.go +++ b/pkg/schedule/schedulers/scheduler_controller.go @@ -418,6 +418,11 @@ func (c *Controller) CheckTransferWitnessLeader(region *core.RegionInfo) { } } +// GetAllSchedulerConfigs returns all scheduler configs. +func (c *Controller) GetAllSchedulerConfigs() ([]string, []string, error) { + return c.storage.LoadAllSchedulerConfigs() +} + // ScheduleController is used to manage a scheduler. type ScheduleController struct { Scheduler diff --git a/pkg/schedule/schedulers/shuffle_region_config.go b/pkg/schedule/schedulers/shuffle_region_config.go index f503a6f67c7..7d04879c992 100644 --- a/pkg/schedule/schedulers/shuffle_region_config.go +++ b/pkg/schedule/schedulers/shuffle_region_config.go @@ -69,6 +69,7 @@ func (conf *shuffleRegionSchedulerConfig) IsRoleAllow(role string) bool { func (conf *shuffleRegionSchedulerConfig) ServeHTTP(w http.ResponseWriter, r *http.Request) { router := mux.NewRouter() + router.HandleFunc("/list", conf.handleGetRoles).Methods(http.MethodGet) router.HandleFunc("/roles", conf.handleGetRoles).Methods(http.MethodGet) router.HandleFunc("/roles", conf.handleSetRoles).Methods(http.MethodPost) router.ServeHTTP(w, r) diff --git a/pkg/utils/testutil/api_check.go b/pkg/utils/testutil/api_check.go index 84af97f828d..4ce5e859f3f 100644 --- a/pkg/utils/testutil/api_check.go +++ b/pkg/utils/testutil/api_check.go @@ -37,29 +37,29 @@ func StatusOK(re *require.Assertions) func([]byte, int, http.Header) { // StatusNotOK is used to check whether http response code is not equal http.StatusOK. func StatusNotOK(re *require.Assertions) func([]byte, int, http.Header) { - return func(_ []byte, i int, _ http.Header) { - re.NotEqual(http.StatusOK, i) + return func(resp []byte, i int, _ http.Header) { + re.NotEqual(http.StatusOK, i, "resp: "+string(resp)) } } // ExtractJSON is used to check whether given data can be extracted successfully. func ExtractJSON(re *require.Assertions, data interface{}) func([]byte, int, http.Header) { - return func(res []byte, _ int, _ http.Header) { - re.NoError(json.Unmarshal(res, data)) + return func(resp []byte, _ int, _ http.Header) { + re.NoError(json.Unmarshal(resp, data), "resp: "+string(resp)) } } // StringContain is used to check whether response context contains given string. func StringContain(re *require.Assertions, sub string) func([]byte, int, http.Header) { - return func(res []byte, _ int, _ http.Header) { - re.Contains(string(res), sub) + return func(resp []byte, _ int, _ http.Header) { + re.Contains(string(resp), sub, "resp: "+string(resp)) } } // StringEqual is used to check whether response context equal given string. func StringEqual(re *require.Assertions, str string) func([]byte, int, http.Header) { - return func(res []byte, _ int, _ http.Header) { - re.Contains(string(res), str) + return func(resp []byte, _ int, _ http.Header) { + re.Contains(string(resp), str, "resp: "+string(resp)) } } diff --git a/server/api/server.go b/server/api/server.go index ee301ea54c8..ae877b8407c 100644 --- a/server/api/server.go +++ b/server/api/server.go @@ -52,6 +52,7 @@ func NewHandler(_ context.Context, svr *server.Server) (http.Handler, apiutil.AP // "/schedulers", http.MethodGet // "/schedulers/{name}", http.MethodPost // "/schedulers/diagnostic/{name}", http.MethodGet + // "/scheduler-config", http.MethodGet // "/hotspot/regions/read", http.MethodGet // "/hotspot/regions/write", http.MethodGet // "/hotspot/regions/history", http.MethodGet @@ -90,6 +91,11 @@ func NewHandler(_ context.Context, svr *server.Server) (http.Handler, apiutil.AP scheapi.APIPathPrefix+"/schedulers", mcs.SchedulingServiceName, []string{http.MethodGet}), + serverapi.MicroserviceRedirectRule( + prefix+"/scheduler-config", + scheapi.APIPathPrefix+"/schedulers/config", + mcs.SchedulingServiceName, + []string{http.MethodGet}), serverapi.MicroserviceRedirectRule( prefix+"/schedulers/", // Note: this means "/schedulers/{name}" scheapi.APIPathPrefix+"/schedulers", diff --git a/server/config/persist_options.go b/server/config/persist_options.go index c0a0ebf5c47..49a44449a22 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -789,11 +789,10 @@ func (o *PersistOptions) Persist(storage endpoint.ConfigStorage) error { }, StoreConfig: *o.GetStoreConfig(), } - err := storage.SaveConfig(cfg) failpoint.Inject("persistFail", func() { - err = errors.New("fail to persist") + failpoint.Return(errors.New("fail to persist")) }) - return err + return storage.SaveConfig(cfg) } // Reload reloads the configuration from the storage. diff --git a/server/server.go b/server/server.go index 160609e37a7..9cd7f18578e 100644 --- a/server/server.go +++ b/server/server.go @@ -948,20 +948,7 @@ func (s *Server) GetConfig() *config.Config { if err != nil { return cfg } - payload := make(map[string]interface{}) - for i, sche := range sches { - var config interface{} - err := schedulers.DecodeConfig([]byte(configs[i]), &config) - if err != nil { - log.Error("failed to decode scheduler config", - zap.String("config", configs[i]), - zap.String("scheduler", sche), - errs.ZapError(err)) - continue - } - payload[sche] = config - } - cfg.Schedule.SchedulersPayload = payload + cfg.Schedule.SchedulersPayload = schedulers.ToPayload(sches, configs) return cfg } diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index d6028204325..3793c09d883 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -172,6 +172,9 @@ func (suite *apiTestSuite) TestAPIForward() { // "/schedulers", http.MethodGet // "/schedulers/{name}", http.MethodPost // "/schedulers/diagnostic/{name}", http.MethodGet + // "/scheduler-config/", http.MethodGet + // "/scheduler-config/{name}/list", http.MethodGet + // "/scheduler-config/{name}/roles", http.MethodGet // Should not redirect: // "/schedulers", http.MethodPost // "/schedulers/{name}", http.MethodDelete @@ -191,6 +194,24 @@ func (suite *apiTestSuite) TestAPIForward() { testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) suite.NoError(err) + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "scheduler-config"), &resp, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + suite.NoError(err) + re.Contains(resp, "balance-leader-scheduler") + re.Contains(resp, "balance-witness-scheduler") + re.Contains(resp, "balance-hot-region-scheduler") + + schedulers := []string{ + "balance-leader-scheduler", + "balance-witness-scheduler", + "balance-hot-region-scheduler", + } + for _, schedulerName := range schedulers { + err = testutil.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s/%s/%s/%s", urlPrefix, "scheduler-config", schedulerName, "list"), &resp, + testutil.WithHeader(re, apiutil.ForwardToMicroServiceHeader, "true")) + suite.NoError(err) + } + err = testutil.CheckPostJSON(testDialClient, fmt.Sprintf("%s/%s", urlPrefix, "schedulers"), pauseArgs, testutil.WithoutHeader(re, apiutil.ForwardToMicroServiceHeader)) re.NoError(err) diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index fe58e304791..3554b828269 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -17,6 +17,8 @@ package scheduler_test import ( "context" "encoding/json" + "reflect" + "strings" "testing" "time" @@ -43,8 +45,7 @@ func TestSchedulerTestSuite(t *testing.T) { func (suite *schedulerTestSuite) TestScheduler() { env := tests.NewSchedulingTestEnvironment(suite.T()) - // Fixme: use RunTestInTwoModes when sync deleted scheduler is supported. - env.RunTestInPDMode(suite.checkScheduler) + env.RunTestInTwoModes(suite.checkScheduler) env.RunTestInTwoModes(suite.checkSchedulerDiagnostic) } @@ -86,17 +87,27 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { if args != nil { mustExec(re, cmd, args, nil) } - var schedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) - for _, scheduler := range schedulers { - re.True(expected[scheduler]) - } + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, &schedulers) + if len(schedulers) != len(expected) { + return false + } + for _, scheduler := range schedulers { + if _, ok := expected[scheduler]; !ok { + return false + } + } + return true + }) } checkSchedulerConfigCommand := func(expectedConfig map[string]interface{}, schedulerName string) { - configInfo := make(map[string]interface{}) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName}, &configInfo) - re.Equal(expectedConfig, configInfo) + testutil.Eventually(re, func() bool { + configInfo := make(map[string]interface{}) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName}, &configInfo) + return reflect.DeepEqual(expectedConfig, configInfo) + }) } leaderServer := cluster.GetLeaderServer() @@ -106,7 +117,6 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { // note: because pdqsort is a unstable sort algorithm, set ApproximateSize for this region. tests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) - time.Sleep(3 * time.Second) // scheduler show command expected := map[string]bool{ @@ -120,7 +130,6 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { // scheduler delete command args := []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"} - time.Sleep(10 * time.Second) expected = map[string]bool{ "balance-leader-scheduler": true, "balance-hot-region-scheduler": true, @@ -160,8 +169,11 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { checkSchedulerCommand(args, expected) // check update success - expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}, "3": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} - checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + // FIXME: remove this check after scheduler config is updated + if cluster.GetSchedulingPrimaryServer() == nil && schedulers[idx] == "grant-leader-scheduler" { + expectedConfig["store-id-ranges"] = map[string]interface{}{"2": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}, "3": []interface{}{map[string]interface{}{"end-key": "", "start-key": ""}}} + checkSchedulerConfigCommand(expectedConfig, schedulers[idx]) + } // scheduler delete command args = []string{"-u", pdAddr, "scheduler", "remove", schedulers[idx]} @@ -261,26 +273,33 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler", "set", "2", "1,2,3"}, nil) expected3["store-leader-id"] = float64(2) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) - re.Equal(expected3, conf3) + // FIXME: remove this check after scheduler config is updated + if cluster.GetSchedulingPrimaryServer() == nil { // "grant-hot-region-scheduler" + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "grant-hot-region-scheduler"}, &conf3) + re.Equal(expected3, conf3) + } - // test balance region config + // test remove and add scheduler echo := mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) re.NotContains(echo, "Success!") + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-region-scheduler"}, nil) + re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "evict-leader-scheduler", "1"}, nil) re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) re.Contains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "evict-leader-scheduler-1"}, nil) re.Contains(echo, "404") + testutil.Eventually(re, func() bool { // wait for removed scheduler to be synced to scheduling server. + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) + return strings.Contains(echo, "[404] scheduler not found") + }) // test hot region config - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "evict-leader-scheduler"}, nil) - re.Contains(echo, "[404] scheduler not found") expected1 := map[string]interface{}{ "min-hot-byte-rate": float64(100), "min-hot-key-rate": float64(10), @@ -311,74 +330,77 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "src-tolerance-ratio", "1.02"}, nil) expected1["src-tolerance-ratio"] = 1.02 var conf1 map[string]interface{} - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,key"}, nil) - expected1["read-priorities"] = []interface{}{"byte", "key"} - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,byte"}, nil) - expected1["read-priorities"] = []interface{}{"key", "byte"} - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "foo,bar"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", ""}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,byte"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key,byte"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - - // write-priorities is divided into write-leader-priorities and write-peer-priorities - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-priorities", "key,byte"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v0"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - expected1["rank-formula-version"] = "v2" - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v2"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - expected1["rank-formula-version"] = "v1" - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v1"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - - expected1["forbid-rw-type"] = "read" - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "forbid-rw-type", "read"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(expected1, conf1) - - // test compatibility - re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) - for _, store := range stores { - version := versioninfo.HotScheduleWithQuery - store.Version = versioninfo.MinSupportedVersion(version).String() - tests.MustPutStore(re, cluster, store) + // FIXME: remove this check after scheduler config is updated + if cluster.GetSchedulingPrimaryServer() == nil { // "balance-hot-region-scheduler" + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,key"}, nil) + expected1["read-priorities"] = []interface{}{"byte", "key"} + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,byte"}, nil) + expected1["read-priorities"] = []interface{}{"key", "byte"} + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "foo,bar"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", ""}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "byte,byte"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "read-priorities", "key,key,byte"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + // write-priorities is divided into write-leader-priorities and write-peer-priorities + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-priorities", "key,byte"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v0"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + expected1["rank-formula-version"] = "v2" + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v2"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + expected1["rank-formula-version"] = "v1" + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "rank-formula-version", "v1"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + expected1["forbid-rw-type"] = "read" + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "forbid-rw-type", "read"}, nil) + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(expected1, conf1) + + // test compatibility + re.Equal("2.0.0", leaderServer.GetClusterVersion().String()) + for _, store := range stores { + version := versioninfo.HotScheduleWithQuery + store.Version = versioninfo.MinSupportedVersion(version).String() + tests.MustPutStore(re, cluster, store) + } + re.Equal("5.2.0", leaderServer.GetClusterVersion().String()) + // After upgrading, we should not use query. + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(conf1["read-priorities"], []interface{}{"key", "byte"}) + // cannot set qps as write-peer-priorities + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) + re.Contains(echo, "query is not allowed to be set in priorities for write-peer-priorities") + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) + re.Equal(conf1["write-peer-priorities"], []interface{}{"byte", "key"}) } - re.Equal("5.2.0", leaderServer.GetClusterVersion().String()) - // After upgrading, we should not use query. - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(conf1["read-priorities"], []interface{}{"key", "byte"}) - // cannot set qps as write-peer-priorities - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler", "set", "write-peer-priorities", "query,byte"}, nil) - re.Contains(echo, "query is not allowed to be set in priorities for write-peer-priorities") - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-hot-region-scheduler"}, &conf1) - re.Equal(conf1["write-peer-priorities"], []interface{}{"byte", "key"}) // test remove and add echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-hot-region-scheduler"}, nil) @@ -412,24 +434,33 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { for _, schedulerName := range evictSlownessSchedulers { echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", schedulerName}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - re.Contains(echo, schedulerName) + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return strings.Contains(echo, schedulerName) + }) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "set", "recovery-duration", "100"}, nil) re.Contains(echo, "Success!") conf = make(map[string]interface{}) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) - re.Equal(100., conf["recovery-duration"]) + // FIXME: remove this check after scheduler config is updated + if cluster.GetSchedulingPrimaryServer() == nil && schedulerName == "evict-slow-store-scheduler" { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", schedulerName, "show"}, &conf) + re.Equal(100., conf["recovery-duration"]) + } echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", schedulerName}, nil) re.Contains(echo, "Success!") - echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) - re.NotContains(echo, schedulerName) + testutil.Eventually(re, func() bool { + echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show"}, nil) + return !strings.Contains(echo, schedulerName) + }) } // test show scheduler with paused and disabled status. checkSchedulerWithStatusCommand := func(status string, expected []string) { - var schedulers []string - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) - re.Equal(expected, schedulers) + testutil.Eventually(re, func() bool { + var schedulers []string + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "show", "--status", status}, &schedulers) + return reflect.DeepEqual(expected, schedulers) + }) } mustUsage([]string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler"}) @@ -504,18 +535,14 @@ func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *tests.TestClu // note: because pdqsort is a unstable sort algorithm, set ApproximateSize for this region. tests.MustPutRegion(re, cluster, 1, 1, []byte("a"), []byte("b"), core.SetApproximateSize(10)) - time.Sleep(3 * time.Second) echo := mustExec(re, cmd, []string{"-u", pdAddr, "config", "set", "enable-diagnostic", "true"}, nil) re.Contains(echo, "Success!") checkSchedulerDescribeCommand("balance-region-scheduler", "pending", "1 store(s) RegionNotMatchRule; ") // scheduler delete command - // Fixme: use RunTestInTwoModes when sync deleted scheduler is supported. - if sche := cluster.GetSchedulingPrimaryServer(); sche == nil { - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) - checkSchedulerDescribeCommand("balance-region-scheduler", "disabled", "") - } + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-region-scheduler"}, nil) + checkSchedulerDescribeCommand("balance-region-scheduler", "disabled", "") mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "pause", "balance-leader-scheduler", "60"}, nil) mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "resume", "balance-leader-scheduler"}, nil) @@ -528,7 +555,7 @@ func mustExec(re *require.Assertions, cmd *cobra.Command, args []string, v inter if v == nil { return string(output) } - re.NoError(json.Unmarshal(output, v)) + re.NoError(json.Unmarshal(output, v), string(output)) return "" } diff --git a/tests/server/api/scheduler_test.go b/tests/server/api/scheduler_test.go index 95c4d936a8c..9db94e8562d 100644 --- a/tests/server/api/scheduler_test.go +++ b/tests/server/api/scheduler_test.go @@ -23,8 +23,10 @@ import ( "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" + "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" sc "github.com/tikv/pd/pkg/schedule/config" + "github.com/tikv/pd/pkg/slice" tu "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/server" "github.com/tikv/pd/tests" @@ -41,13 +43,12 @@ func TestScheduleTestSuite(t *testing.T) { } func (suite *scheduleTestSuite) TestScheduler() { - // Fixme: use RunTestInTwoModes when sync deleted scheduler is supported. env := tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInPDMode(suite.checkOriginAPI) + env.RunTestInTwoModes(suite.checkOriginAPI) env = tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInPDMode(suite.checkAPI) + env.RunTestInTwoModes(suite.checkAPI) env = tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInPDMode(suite.checkDisable) + env.RunTestInTwoModes(suite.checkDisable) } func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { @@ -71,7 +72,7 @@ func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { re := suite.Require() suite.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re))) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(re, urlPrefix, "evict-leader-scheduler") resp := make(map[string]interface{}) listURL := fmt.Sprintf("%s%s%s/%s/list", leaderAddr, apiPrefix, server.SchedulerConfigHandlerPath, "evict-leader-scheduler") suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) @@ -83,20 +84,20 @@ func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { suite.NoError(err) suite.NoError(failpoint.Enable("github.com/tikv/pd/pkg/schedule/schedulers/persistFail", "return(true)")) suite.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusNotOK(re))) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(re, urlPrefix, "evict-leader-scheduler") resp = make(map[string]interface{}) suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) suite.Len(resp["store-id-ranges"], 1) suite.NoError(failpoint.Disable("github.com/tikv/pd/pkg/schedule/schedulers/persistFail")) suite.NoError(tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re))) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(re, urlPrefix, "evict-leader-scheduler") resp = make(map[string]interface{}) suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) suite.Len(resp["store-id-ranges"], 2) deleteURL := fmt.Sprintf("%s/%s", urlPrefix, "evict-leader-scheduler-1") err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) suite.NoError(err) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(re, urlPrefix, "evict-leader-scheduler") resp1 := make(map[string]interface{}) suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp1)) suite.Len(resp1["store-id-ranges"], 1) @@ -104,11 +105,11 @@ func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { suite.NoError(failpoint.Enable("github.com/tikv/pd/server/config/persistFail", "return(true)")) err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusInternalServerError)) suite.NoError(err) - suite.Len(suite.getSchedulers(urlPrefix), 1) + suite.assertSchedulerExists(re, urlPrefix, "evict-leader-scheduler") suite.NoError(failpoint.Disable("github.com/tikv/pd/server/config/persistFail")) err = tu.CheckDelete(testDialClient, deleteURL, tu.StatusOK(re)) suite.NoError(err) - suite.Empty(suite.getSchedulers(urlPrefix)) + suite.assertNoScheduler(re, urlPrefix, "evict-leader-scheduler") suite.NoError(tu.CheckGetJSON(testDialClient, listURL, nil, tu.Status(re, http.StatusNotFound))) err = tu.CheckDelete(testDialClient, deleteURL, tu.Status(re, http.StatusNotFound)) suite.NoError(err) @@ -230,23 +231,27 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { suite.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) resp = make(map[string]interface{}) suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - for key := range expectMap { - suite.Equal(expectMap[key], resp[key], "key %s", key) + // FIXME: remove this check after scheduler config is updated + if cluster.GetSchedulingPrimaryServer() == nil { // "balance-hot-region-scheduler" + for key := range expectMap { + suite.Equal(expectMap[key], resp[key], "key %s", key) + } + + // update again + err = tu.CheckPostJSON(testDialClient, updateURL, body, + tu.StatusOK(re), + tu.StringEqual(re, "Config is the same with origin, so do nothing.")) + suite.NoError(err) + // config item not found + dataMap = map[string]interface{}{} + dataMap["error"] = 3 + body, err = json.Marshal(dataMap) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, updateURL, body, + tu.Status(re, http.StatusBadRequest), + tu.StringEqual(re, "Config item is not found.")) + suite.NoError(err) } - // update again - err = tu.CheckPostJSON(testDialClient, updateURL, body, - tu.StatusOK(re), - tu.StringEqual(re, "Config is the same with origin, so do nothing.")) - suite.NoError(err) - // config item not found - dataMap = map[string]interface{}{} - dataMap["error"] = 3 - body, err = json.Marshal(dataMap) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, updateURL, body, - tu.Status(re, http.StatusBadRequest), - tu.StringEqual(re, "Config item is not found.")) - suite.NoError(err) }, }, { @@ -468,6 +473,7 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { testCase.extraTestFunc(testCase.createdName) } suite.deleteScheduler(urlPrefix, testCase.createdName) + suite.assertNoScheduler(re, urlPrefix, testCase.createdName) } // test pause and resume all schedulers. @@ -482,6 +488,7 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { body, err := json.Marshal(input) suite.NoError(err) suite.addScheduler(urlPrefix, body) + suite.assertSchedulerExists(re, urlPrefix, testCase.createdName) // wait for scheduler to be synced. if testCase.extraTestFunc != nil { testCase.extraTestFunc(testCase.createdName) } @@ -545,6 +552,7 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { createdName = testCase.name } suite.deleteScheduler(urlPrefix, createdName) + suite.assertNoScheduler(re, urlPrefix, createdName) } } @@ -581,16 +589,8 @@ func (suite *scheduleTestSuite) checkDisable(cluster *tests.TestCluster) { err = tu.CheckPostJSON(testDialClient, u, body, tu.StatusOK(re)) suite.NoError(err) - var schedulers []string - err = tu.ReadGetJSON(re, testDialClient, urlPrefix, &schedulers) - suite.NoError(err) - suite.Len(schedulers, 1) - suite.Equal(name, schedulers[0]) - - err = tu.ReadGetJSON(re, testDialClient, fmt.Sprintf("%s?status=disabled", urlPrefix), &schedulers) - suite.NoError(err) - suite.Len(schedulers, 1) - suite.Equal(name, schedulers[0]) + suite.assertNoScheduler(re, urlPrefix, name) + suite.assertSchedulerExists(re, fmt.Sprintf("%s?status=disabled", urlPrefix), name) // reset schedule config scheduleConfig.Schedulers = originSchedulers @@ -600,6 +600,7 @@ func (suite *scheduleTestSuite) checkDisable(cluster *tests.TestCluster) { suite.NoError(err) suite.deleteScheduler(urlPrefix, name) + suite.assertNoScheduler(re, urlPrefix, name) } func (suite *scheduleTestSuite) addScheduler(urlPrefix string, body []byte) { @@ -614,12 +615,17 @@ func (suite *scheduleTestSuite) deleteScheduler(urlPrefix string, createdName st } func (suite *scheduleTestSuite) testPauseOrResume(urlPrefix string, name, createdName string, body []byte) { + re := suite.Require() if createdName == "" { createdName = name } - re := suite.Require() - err := tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re)) - suite.NoError(err) + var schedulers []string + tu.ReadGetJSON(suite.Require(), testDialClient, urlPrefix, &schedulers) + if !slice.Contains(schedulers, createdName) { + err := tu.CheckPostJSON(testDialClient, urlPrefix, body, tu.StatusOK(re)) + re.NoError(err) + } + suite.assertSchedulerExists(re, urlPrefix, createdName) // wait for scheduler to be synced. // test pause. input := make(map[string]interface{}) @@ -655,9 +661,20 @@ func (suite *scheduleTestSuite) testPauseOrResume(urlPrefix string, name, create suite.False(isPaused) } -func (suite *scheduleTestSuite) getSchedulers(urlPrefix string) (resp []string) { - tu.ReadGetJSON(suite.Require(), testDialClient, urlPrefix, &resp) - return +func (suite *scheduleTestSuite) assertSchedulerExists(re *require.Assertions, urlPrefix string, scheduler string) { + var schedulers []string + tu.Eventually(re, func() bool { + tu.ReadGetJSON(suite.Require(), testDialClient, urlPrefix, &schedulers) + return slice.Contains(schedulers, scheduler) + }) +} + +func (suite *scheduleTestSuite) assertNoScheduler(re *require.Assertions, urlPrefix string, scheduler string) { + var schedulers []string + tu.Eventually(re, func() bool { + tu.ReadGetJSON(suite.Require(), testDialClient, urlPrefix, &schedulers) + return !slice.Contains(schedulers, scheduler) + }) } func (suite *scheduleTestSuite) isSchedulerPaused(urlPrefix, name string) bool { diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index 4349735f06d..526ff2646dc 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -745,11 +745,17 @@ func showShuffleRegionSchedulerRolesCommandFunc(cmd *cobra.Command, args []strin if p == "show-roles" { p = cmd.Parent().Name() } - path := path.Join(schedulerConfigPrefix, p, "roles") - r, err := doRequest(cmd, path, http.MethodGet, http.Header{}) + url := path.Join(schedulerConfigPrefix, p, "list") + r, err := doRequest(cmd, url, http.MethodGet, http.Header{}) if err != nil { - cmd.Println(err) - return + // try to use old api + var err2 error + url := path.Join(schedulerConfigPrefix, p, "roles") + r, err2 = doRequest(cmd, url, http.MethodGet, http.Header{}) + if err2 != nil { + cmd.Println(err, err2) + return + } } cmd.Println(r) } From a1a1eea8dafd7918d583378790a4bb6c39a21f97 Mon Sep 17 00:00:00 2001 From: Yongbo Jiang Date: Wed, 1 Nov 2023 16:42:37 +0800 Subject: [PATCH 014/116] resourcemanager: change param name (#7293) ref tikv/pd#4399 Signed-off-by: Cabinfever_B --- .../resourcemanager/server/resource_group.go | 4 +- .../resourcemanager/server/token_buckets.go | 42 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pkg/mcs/resourcemanager/server/resource_group.go b/pkg/mcs/resourcemanager/server/resource_group.go index 863cfd19026..fc3a58cab51 100644 --- a/pkg/mcs/resourcemanager/server/resource_group.go +++ b/pkg/mcs/resourcemanager/server/resource_group.go @@ -138,7 +138,7 @@ func FromProtoResourceGroup(group *rmpb.ResourceGroup) *ResourceGroup { // RequestRU requests the RU of the resource group. func (rg *ResourceGroup) RequestRU( now time.Time, - neededTokens float64, + requiredToken float64, targetPeriodMs, clientUniqueID uint64, ) *rmpb.GrantedRUTokenBucket { rg.Lock() @@ -147,7 +147,7 @@ func (rg *ResourceGroup) RequestRU( if rg.RUSettings == nil || rg.RUSettings.RU.Settings == nil { return nil } - tb, trickleTimeMs := rg.RUSettings.RU.request(now, neededTokens, targetPeriodMs, clientUniqueID) + tb, trickleTimeMs := rg.RUSettings.RU.request(now, requiredToken, targetPeriodMs, clientUniqueID) return &rmpb.GrantedRUTokenBucket{GrantedTokens: tb, TrickleTimeMs: trickleTimeMs} } diff --git a/pkg/mcs/resourcemanager/server/token_buckets.go b/pkg/mcs/resourcemanager/server/token_buckets.go index 5efab52fe68..a0acba3b54d 100644 --- a/pkg/mcs/resourcemanager/server/token_buckets.go +++ b/pkg/mcs/resourcemanager/server/token_buckets.go @@ -268,7 +268,7 @@ func (gtb *GroupTokenBucket) init(now time.Time, clientID uint64) { } // updateTokens updates the tokens and settings. -func (gtb *GroupTokenBucket) updateTokens(now time.Time, burstLimit int64, clientUniqueID uint64, consumptionToken float64) { +func (gtb *GroupTokenBucket) updateTokens(now time.Time, burstLimit int64, clientUniqueID uint64, requiredToken float64) { var elapseTokens float64 if !gtb.Initialized { gtb.init(now, clientUniqueID) @@ -288,21 +288,21 @@ func (gtb *GroupTokenBucket) updateTokens(now time.Time, burstLimit int64, clien gtb.Tokens = burst } // Balance each slots. - gtb.balanceSlotTokens(clientUniqueID, gtb.Settings, consumptionToken, elapseTokens) + gtb.balanceSlotTokens(clientUniqueID, gtb.Settings, requiredToken, elapseTokens) } // request requests tokens from the corresponding slot. func (gtb *GroupTokenBucket) request(now time.Time, - neededTokens float64, + requiredToken float64, targetPeriodMs, clientUniqueID uint64, ) (*rmpb.TokenBucket, int64) { burstLimit := gtb.Settings.GetBurstLimit() - gtb.updateTokens(now, burstLimit, clientUniqueID, neededTokens) + gtb.updateTokens(now, burstLimit, clientUniqueID, requiredToken) slot, ok := gtb.tokenSlots[clientUniqueID] if !ok { return &rmpb.TokenBucket{Settings: &rmpb.TokenLimitSettings{BurstLimit: burstLimit}}, 0 } - res, trickleDuration := slot.assignSlotTokens(neededTokens, targetPeriodMs) + res, trickleDuration := slot.assignSlotTokens(requiredToken, targetPeriodMs) // Update bucket to record all tokens. gtb.Tokens -= slot.lastTokenCapacity - slot.tokenCapacity slot.lastTokenCapacity = slot.tokenCapacity @@ -310,24 +310,24 @@ func (gtb *GroupTokenBucket) request(now time.Time, return res, trickleDuration } -func (ts *TokenSlot) assignSlotTokens(neededTokens float64, targetPeriodMs uint64) (*rmpb.TokenBucket, int64) { +func (ts *TokenSlot) assignSlotTokens(requiredToken float64, targetPeriodMs uint64) (*rmpb.TokenBucket, int64) { var res rmpb.TokenBucket burstLimit := ts.settings.GetBurstLimit() res.Settings = &rmpb.TokenLimitSettings{BurstLimit: burstLimit} // If BurstLimit < 0, just return. if burstLimit < 0 { - res.Tokens = neededTokens + res.Tokens = requiredToken return &res, 0 } // FillRate is used for the token server unavailable in abnormal situation. - if neededTokens <= 0 { + if requiredToken <= 0 { return &res, 0 } // If the current tokens can directly meet the requirement, returns the need token. - if ts.tokenCapacity >= neededTokens { - ts.tokenCapacity -= neededTokens + if ts.tokenCapacity >= requiredToken { + ts.tokenCapacity -= requiredToken // granted the total request tokens - res.Tokens = neededTokens + res.Tokens = requiredToken return &res, 0 } @@ -336,7 +336,7 @@ func (ts *TokenSlot) assignSlotTokens(neededTokens float64, targetPeriodMs uint6 hasRemaining := false if ts.tokenCapacity > 0 { grantedTokens = ts.tokenCapacity - neededTokens -= grantedTokens + requiredToken -= grantedTokens ts.tokenCapacity = 0 hasRemaining = true } @@ -373,36 +373,36 @@ func (ts *TokenSlot) assignSlotTokens(neededTokens float64, targetPeriodMs uint6 for i := 1; i < loanCoefficient; i++ { p[i] = float64(loanCoefficient-i)*float64(fillRate)*targetPeriodTimeSec + p[i-1] } - for i := 0; i < loanCoefficient && neededTokens > 0 && trickleTime < targetPeriodTimeSec; i++ { + for i := 0; i < loanCoefficient && requiredToken > 0 && trickleTime < targetPeriodTimeSec; i++ { loan := -ts.tokenCapacity if loan >= p[i] { continue } roundReserveTokens := p[i] - loan fillRate := float64(loanCoefficient-i) * float64(fillRate) - if roundReserveTokens > neededTokens { - ts.tokenCapacity -= neededTokens - grantedTokens += neededTokens + if roundReserveTokens > requiredToken { + ts.tokenCapacity -= requiredToken + grantedTokens += requiredToken trickleTime += grantedTokens / fillRate - neededTokens = 0 + requiredToken = 0 } else { roundReserveTime := roundReserveTokens / fillRate if roundReserveTime+trickleTime >= targetPeriodTimeSec { roundTokens := (targetPeriodTimeSec - trickleTime) * fillRate - neededTokens -= roundTokens + requiredToken -= roundTokens ts.tokenCapacity -= roundTokens grantedTokens += roundTokens trickleTime = targetPeriodTimeSec } else { grantedTokens += roundReserveTokens - neededTokens -= roundReserveTokens + requiredToken -= roundReserveTokens ts.tokenCapacity -= roundReserveTokens trickleTime += roundReserveTime } } } - if neededTokens > 0 && grantedTokens < defaultReserveRatio*float64(fillRate)*targetPeriodTimeSec { - reservedTokens := math.Min(neededTokens+grantedTokens, defaultReserveRatio*float64(fillRate)*targetPeriodTimeSec) + if requiredToken > 0 && grantedTokens < defaultReserveRatio*float64(fillRate)*targetPeriodTimeSec { + reservedTokens := math.Min(requiredToken+grantedTokens, defaultReserveRatio*float64(fillRate)*targetPeriodTimeSec) ts.tokenCapacity -= reservedTokens - grantedTokens grantedTokens = reservedTokens } From bd1240636d1f79c3b9fe99f4380c8db4d2698374 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 1 Nov 2023 22:52:13 +0800 Subject: [PATCH 015/116] move and fix test Signed-off-by: lhy1024 --- pkg/mcs/scheduling/server/apis/v1/api.go | 13 +- pkg/mcs/scheduling/server/server.go | 8 + server/api/config.go | 22 +- server/api/config_test.go | 440 --------------- tests/integrations/mcs/scheduling/api_test.go | 76 +-- tests/pdctl/config/config_test.go | 74 +-- tests/pdctl/scheduler/scheduler_test.go | 12 +- tests/server/api/operator_test.go | 33 +- tests/server/api/scheduler_test.go | 25 +- tests/server/config/config_test.go | 528 +++++++++++++++++- 10 files changed, 673 insertions(+), 558 deletions(-) delete mode 100644 server/api/config_test.go diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index 6546bf97b92..7a75d04f483 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -196,7 +196,6 @@ func changeLogLevel(c *gin.Context) { c.String(http.StatusOK, "The log level is updated.") } -<<<<<<< HEAD // @Tags config // @Summary Get full config. // @Produce json @@ -205,24 +204,26 @@ func changeLogLevel(c *gin.Context) { func getConfig(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) cfg := svr.GetConfig() + cfg.Schedule.MaxMergeRegionKeys = cfg.Schedule.GetMaxMergeRegionKeys() c.IndentedJSON(http.StatusOK, cfg) } // @Tags config // @Summary Get schedule config. // @Produce json -// @Success 200 {object} sc.ScheduleConfig +// @Success 200 {object} config.ScheduleConfig // @Router /config/schedule [get] func getScheduleConfig(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) cfg := svr.GetScheduleConfig() + cfg.MaxMergeRegionKeys = cfg.GetMaxMergeRegionKeys() c.IndentedJSON(http.StatusOK, cfg) } // @Tags config // @Summary Get replication config. // @Produce json -// @Success 200 {object} sc.ReplicationConfig +// @Success 200 {object} config.ReplicationConfig // @Router /config/replicate [get] func getReplicationConfig(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) @@ -233,13 +234,14 @@ func getReplicationConfig(c *gin.Context) { // @Tags config // @Summary Get store config. // @Produce json -// @Success 200 {object} sc.StoreConfig +// @Success 200 {object} config.StoreConfig // @Router /config/store [get] func getStoreConfig(c *gin.Context) { svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) cfg := svr.GetStoreConfig() c.IndentedJSON(http.StatusOK, cfg) -======= +} + // @Tags admin // @Summary Drop all regions from cache. // @Produce json @@ -278,7 +280,6 @@ func deleteRegionCacheByID(c *gin.Context) { } cluster.DropCacheRegion(regionID) c.String(http.StatusOK, "The region is removed from server cache.") ->>>>>>> a1a1eea8dafd7918d583378790a4bb6c39a21f97 } // @Tags operators diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index 2a83082c03b..d692e24e505 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -517,6 +517,14 @@ func (s *Server) GetConfig() *config.Config { cfg.Schedule = *s.persistConfig.GetScheduleConfig().Clone() cfg.Replication = *s.persistConfig.GetReplicationConfig().Clone() cfg.ClusterVersion = *s.persistConfig.GetClusterVersion() + if s.storage == nil { + return cfg + } + sches, configs, err := s.storage.LoadAllSchedulerConfigs() + if err != nil { + return cfg + } + cfg.Schedule.SchedulersPayload = schedulers.ToPayload(sches, configs) return cfg } diff --git a/server/api/config.go b/server/api/config.go index 9947e952582..6b3fbdd9df5 100644 --- a/server/api/config.go +++ b/server/api/config.go @@ -63,32 +63,20 @@ func newConfHandler(svr *server.Server, rd *render.Render) *confHandler { func (h *confHandler) GetConfig(w http.ResponseWriter, r *http.Request) { cfg := h.svr.GetConfig() if h.svr.IsAPIServiceMode() { - b, err := h.GetSchedulingServerConfig("config/schedule") - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - var scheduleCfg sc.ScheduleConfig - err = json.Unmarshal(b, &scheduleCfg) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - cfg.Schedule = scheduleCfg - b, err = h.GetSchedulingServerConfig("config/replicate") + b, err := h.GetSchedulingServerConfig("config") if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - var replicationCfg sc.ReplicationConfig - err = json.Unmarshal(b, &replicationCfg) + var configSchedulingServer config.Config + err = json.Unmarshal(b, &configSchedulingServer) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - cfg.Replication = replicationCfg + cfg.Schedule = configSchedulingServer.Schedule + cfg.Replication = configSchedulingServer.Replication // TODO: will we support config/store? - // TODO: after scheduler-config is supported, we need to merge the config. } else { cfg.Schedule.MaxMergeRegionKeys = cfg.Schedule.GetMaxMergeRegionKeys() } diff --git a/server/api/config_test.go b/server/api/config_test.go deleted file mode 100644 index fbfb3f94518..00000000000 --- a/server/api/config_test.go +++ /dev/null @@ -1,440 +0,0 @@ -// Copyright 2016 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package api - -import ( - "encoding/json" - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/suite" - sc "github.com/tikv/pd/pkg/schedule/config" - tu "github.com/tikv/pd/pkg/utils/testutil" - "github.com/tikv/pd/pkg/utils/typeutil" - "github.com/tikv/pd/pkg/versioninfo" - "github.com/tikv/pd/server" - "github.com/tikv/pd/server/config" -) - -type configTestSuite struct { - suite.Suite - svr *server.Server - cleanup tu.CleanupFunc - urlPrefix string -} - -func TestConfigTestSuite(t *testing.T) { - suite.Run(t, new(configTestSuite)) -} - -func (suite *configTestSuite) SetupSuite() { - re := suite.Require() - suite.svr, suite.cleanup = mustNewServer(re, func(cfg *config.Config) { - cfg.Replication.EnablePlacementRules = false - }) - server.MustWaitLeader(re, []*server.Server{suite.svr}) - - addr := suite.svr.GetAddr() - suite.urlPrefix = fmt.Sprintf("%s%s/api/v1", addr, apiPrefix) -} - -func (suite *configTestSuite) TearDownSuite() { - suite.cleanup() -} - -func (suite *configTestSuite) TestConfigAll() { - re := suite.Require() - addr := fmt.Sprintf("%s/config", suite.urlPrefix) - cfg := &config.Config{} - err := tu.ReadGetJSON(re, testDialClient, addr, cfg) - suite.NoError(err) - - // the original way - r := map[string]int{"max-replicas": 5} - postData, err := json.Marshal(r) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - l := map[string]interface{}{ - "location-labels": "zone,rack", - "region-schedule-limit": 10, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - l = map[string]interface{}{ - "metric-storage": "http://127.0.0.1:9090", - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - newCfg := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, newCfg) - suite.NoError(err) - cfg.Replication.MaxReplicas = 5 - cfg.Replication.LocationLabels = []string{"zone", "rack"} - cfg.Schedule.RegionScheduleLimit = 10 - cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:9090" - suite.Equal(newCfg, cfg) - - // the new way - l = map[string]interface{}{ - "schedule.tolerant-size-ratio": 2.5, - "schedule.enable-tikv-split-region": "false", - "replication.location-labels": "idc,host", - "pd-server.metric-storage": "http://127.0.0.1:1234", - "log.level": "warn", - "cluster-version": "v4.0.0-beta", - "replication-mode.replication-mode": "dr-auto-sync", - "replication-mode.dr-auto-sync.label-key": "foobar", - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - newCfg1 := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, newCfg1) - suite.NoError(err) - cfg.Schedule.EnableTiKVSplitRegion = false - cfg.Schedule.TolerantSizeRatio = 2.5 - cfg.Replication.LocationLabels = []string{"idc", "host"} - cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:1234" - cfg.Log.Level = "warn" - cfg.ReplicationMode.DRAutoSync.LabelKey = "foobar" - cfg.ReplicationMode.ReplicationMode = "dr-auto-sync" - v, err := versioninfo.ParseVersion("v4.0.0-beta") - suite.NoError(err) - cfg.ClusterVersion = *v - suite.Equal(cfg, newCfg1) - - // revert this to avoid it affects TestConfigTTL - l["schedule.enable-tikv-split-region"] = "true" - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - // illegal prefix - l = map[string]interface{}{ - "replicate.max-replicas": 1, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, - tu.StatusNotOK(re), - tu.StringContain(re, "not found")) - suite.NoError(err) - - // update prefix directly - l = map[string]interface{}{ - "replication-mode": nil, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, - tu.StatusNotOK(re), - tu.StringContain(re, "cannot update config prefix")) - suite.NoError(err) - - // config item not found - l = map[string]interface{}{ - "schedule.region-limit": 10, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringContain(re, "not found")) - suite.NoError(err) -} - -func (suite *configTestSuite) TestConfigSchedule() { - re := suite.Require() - addr := fmt.Sprintf("%s/config/schedule", suite.urlPrefix) - scheduleConfig := &sc.ScheduleConfig{} - suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig)) - scheduleConfig.MaxStoreDownTime.Duration = time.Second - postData, err := json.Marshal(scheduleConfig) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - scheduleConfig1 := &sc.ScheduleConfig{} - suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig1)) - suite.Equal(*scheduleConfig1, *scheduleConfig) -} - -func (suite *configTestSuite) TestConfigReplication() { - re := suite.Require() - addr := fmt.Sprintf("%s/config/replicate", suite.urlPrefix) - rc := &sc.ReplicationConfig{} - err := tu.ReadGetJSON(re, testDialClient, addr, rc) - suite.NoError(err) - - rc.MaxReplicas = 5 - rc1 := map[string]int{"max-replicas": 5} - postData, err := json.Marshal(rc1) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - rc.LocationLabels = []string{"zone", "rack"} - rc2 := map[string]string{"location-labels": "zone,rack"} - postData, err = json.Marshal(rc2) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - rc.IsolationLevel = "zone" - rc3 := map[string]string{"isolation-level": "zone"} - postData, err = json.Marshal(rc3) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - rc4 := &sc.ReplicationConfig{} - err = tu.ReadGetJSON(re, testDialClient, addr, rc4) - suite.NoError(err) - - suite.Equal(*rc4, *rc) -} - -func (suite *configTestSuite) TestConfigLabelProperty() { - re := suite.Require() - addr := suite.svr.GetAddr() + apiPrefix + "/api/v1/config/label-property" - loadProperties := func() config.LabelPropertyConfig { - var cfg config.LabelPropertyConfig - err := tu.ReadGetJSON(re, testDialClient, addr, &cfg) - suite.NoError(err) - return cfg - } - - cfg := loadProperties() - suite.Empty(cfg) - - cmds := []string{ - `{"type": "foo", "action": "set", "label-key": "zone", "label-value": "cn1"}`, - `{"type": "foo", "action": "set", "label-key": "zone", "label-value": "cn2"}`, - `{"type": "bar", "action": "set", "label-key": "host", "label-value": "h1"}`, - } - for _, cmd := range cmds { - err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) - suite.NoError(err) - } - - cfg = loadProperties() - suite.Len(cfg, 2) - suite.Equal([]config.StoreLabel{ - {Key: "zone", Value: "cn1"}, - {Key: "zone", Value: "cn2"}, - }, cfg["foo"]) - suite.Equal([]config.StoreLabel{{Key: "host", Value: "h1"}}, cfg["bar"]) - - cmds = []string{ - `{"type": "foo", "action": "delete", "label-key": "zone", "label-value": "cn1"}`, - `{"type": "bar", "action": "delete", "label-key": "host", "label-value": "h1"}`, - } - for _, cmd := range cmds { - err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) - suite.NoError(err) - } - - cfg = loadProperties() - suite.Len(cfg, 1) - suite.Equal([]config.StoreLabel{{Key: "zone", Value: "cn2"}}, cfg["foo"]) -} - -func (suite *configTestSuite) TestConfigDefault() { - addr := fmt.Sprintf("%s/config", suite.urlPrefix) - - r := map[string]int{"max-replicas": 5} - postData, err := json.Marshal(r) - suite.NoError(err) - re := suite.Require() - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - l := map[string]interface{}{ - "location-labels": "zone,rack", - "region-schedule-limit": 10, - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - l = map[string]interface{}{ - "metric-storage": "http://127.0.0.1:9090", - } - postData, err = json.Marshal(l) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - - addr = fmt.Sprintf("%s/config/default", suite.urlPrefix) - defaultCfg := &config.Config{} - err = tu.ReadGetJSON(re, testDialClient, addr, defaultCfg) - suite.NoError(err) - - suite.Equal(uint64(3), defaultCfg.Replication.MaxReplicas) - suite.Equal(typeutil.StringSlice([]string{}), defaultCfg.Replication.LocationLabels) - suite.Equal(uint64(2048), defaultCfg.Schedule.RegionScheduleLimit) - suite.Equal("", defaultCfg.PDServerCfg.MetricStorage) -} - -func (suite *configTestSuite) TestConfigPDServer() { - re := suite.Require() - addrPost := fmt.Sprintf("%s/config", suite.urlPrefix) - ms := map[string]interface{}{ - "metric-storage": "", - } - postData, err := json.Marshal(ms) - suite.NoError(err) - suite.NoError(tu.CheckPostJSON(testDialClient, addrPost, postData, tu.StatusOK(re))) - addrGet := fmt.Sprintf("%s/config/pd-server", suite.urlPrefix) - sc := &config.PDServerConfig{} - suite.NoError(tu.ReadGetJSON(re, testDialClient, addrGet, sc)) - suite.Equal(bool(true), sc.UseRegionStorage) - suite.Equal("table", sc.KeyType) - suite.Equal(typeutil.StringSlice([]string{}), sc.RuntimeServices) - suite.Equal("", sc.MetricStorage) - suite.Equal("auto", sc.DashboardAddress) - suite.Equal(int(3), sc.FlowRoundByDigit) - suite.Equal(typeutil.NewDuration(time.Second), sc.MinResolvedTSPersistenceInterval) - suite.Equal(24*time.Hour, sc.MaxResetTSGap.Duration) -} - -var ttlConfig = map[string]interface{}{ - "schedule.max-snapshot-count": 999, - "schedule.enable-location-replacement": false, - "schedule.max-merge-region-size": 999, - "schedule.max-merge-region-keys": 999, - "schedule.scheduler-max-waiting-operator": 999, - "schedule.leader-schedule-limit": 999, - "schedule.region-schedule-limit": 999, - "schedule.hot-region-schedule-limit": 999, - "schedule.replica-schedule-limit": 999, - "schedule.merge-schedule-limit": 999, - "schedule.enable-tikv-split-region": false, -} - -var invalidTTLConfig = map[string]interface{}{ - "schedule.invalid-ttl-config": 0, -} - -func assertTTLConfig( - options *config.PersistOptions, - equality func(interface{}, interface{}, ...interface{}) bool, -) { - equality(uint64(999), options.GetMaxSnapshotCount()) - equality(false, options.IsLocationReplacementEnabled()) - equality(uint64(999), options.GetMaxMergeRegionSize()) - equality(uint64(999), options.GetMaxMergeRegionKeys()) - equality(uint64(999), options.GetSchedulerMaxWaitingOperator()) - equality(uint64(999), options.GetLeaderScheduleLimit()) - equality(uint64(999), options.GetRegionScheduleLimit()) - equality(uint64(999), options.GetHotRegionScheduleLimit()) - equality(uint64(999), options.GetReplicaScheduleLimit()) - equality(uint64(999), options.GetMergeScheduleLimit()) - equality(false, options.IsTikvRegionSplitEnabled()) -} - -func createTTLUrl(url string, ttl int) string { - return fmt.Sprintf("%s/config?ttlSecond=%d", url, ttl) -} - -func (suite *configTestSuite) TestConfigTTL() { - postData, err := json.Marshal(ttlConfig) - suite.NoError(err) - - // test no config and cleaning up - re := suite.Require() - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 0), postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.NotEqual) - - // test time goes by - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.Equal) - time.Sleep(2 * time.Second) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.NotEqual) - - // test cleaning up - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.Equal) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 0), postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.NotEqual) - - postData, err = json.Marshal(invalidTTLConfig) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, - tu.StatusNotOK(re), tu.StringEqual(re, "\"unsupported ttl config schedule.invalid-ttl-config\"\n")) - suite.NoError(err) - - // only set max-merge-region-size - mergeConfig := map[string]interface{}{ - "schedule.max-merge-region-size": 999, - } - postData, err = json.Marshal(mergeConfig) - suite.NoError(err) - - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, tu.StatusOK(re)) - suite.NoError(err) - suite.Equal(uint64(999), suite.svr.GetPersistOptions().GetMaxMergeRegionSize()) - // max-merge-region-keys should keep consistence with max-merge-region-size. - suite.Equal(uint64(999*10000), suite.svr.GetPersistOptions().GetMaxMergeRegionKeys()) - - // on invalid value, we use default config - mergeConfig = map[string]interface{}{ - "schedule.enable-tikv-split-region": "invalid", - } - postData, err = json.Marshal(mergeConfig) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 1), postData, tu.StatusOK(re)) - suite.NoError(err) - suite.True(suite.svr.GetPersistOptions().IsTikvRegionSplitEnabled()) -} - -func (suite *configTestSuite) TestTTLConflict() { - addr := createTTLUrl(suite.urlPrefix, 1) - postData, err := json.Marshal(ttlConfig) - suite.NoError(err) - re := suite.Require() - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) - assertTTLConfig(suite.svr.GetPersistOptions(), suite.Equal) - - cfg := map[string]interface{}{"max-snapshot-count": 30} - postData, err = json.Marshal(cfg) - suite.NoError(err) - addr = fmt.Sprintf("%s/config", suite.urlPrefix) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) - suite.NoError(err) - addr = fmt.Sprintf("%s/config/schedule", suite.urlPrefix) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) - suite.NoError(err) - cfg = map[string]interface{}{"schedule.max-snapshot-count": 30} - postData, err = json.Marshal(cfg) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, createTTLUrl(suite.urlPrefix, 0), postData, tu.StatusOK(re)) - suite.NoError(err) - err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) - suite.NoError(err) -} diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index eb5701d5775..265333ec625 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -245,38 +245,50 @@ func (suite *apiTestSuite) TestAPIForward() { } func (suite *apiTestSuite) TestConfig() { - re := suite.Require() - s, cleanup := tests.StartSingleSchedulingTestServer(suite.ctx, re, suite.backendEndpoints, tempurl.Alloc()) - defer cleanup() - testutil.Eventually(re, func() bool { - return s.IsServing() - }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) - addr := s.GetAddr() - urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/config", addr) - - var cfg config.Config - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - suite.Equal(cfg.GetListenAddr(), s.GetConfig().GetListenAddr()) - suite.Equal(cfg.Schedule.LeaderScheduleLimit, s.GetConfig().Schedule.LeaderScheduleLimit) - suite.Equal(cfg.Schedule.EnableCrossTableMerge, s.GetConfig().Schedule.EnableCrossTableMerge) - suite.Equal(cfg.Replication.MaxReplicas, s.GetConfig().Replication.MaxReplicas) - suite.Equal(cfg.Replication.LocationLabels, s.GetConfig().Replication.LocationLabels) - suite.Equal(cfg.DataDir, s.GetConfig().DataDir) - - var scheduleCfg sc.ScheduleConfig - testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/schedule", &scheduleCfg) - suite.Equal(scheduleCfg.LeaderScheduleLimit, s.GetScheduleConfig().LeaderScheduleLimit) - suite.Equal(scheduleCfg.EnableCrossTableMerge, s.GetScheduleConfig().EnableCrossTableMerge) - - var replicationCfg sc.ReplicationConfig - testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/replicate", &replicationCfg) - suite.Equal(replicationCfg.MaxReplicas, s.GetReplicationConfig().MaxReplicas) - suite.Equal(replicationCfg.LocationLabels, s.GetReplicationConfig().LocationLabels) - - var storeCfg sc.StoreConfig - testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/store", &storeCfg) - suite.Equal(storeCfg.Coprocessor.RegionMaxKeys, s.GetStoreConfig().Coprocessor.RegionMaxKeys) - suite.Equal(storeCfg.Coprocessor.RegionSplitKeys, s.GetStoreConfig().Coprocessor.RegionSplitKeys) + checkConfig := func(cluster *tests.TestCluster) { + re := suite.Require() + s := cluster.GetSchedulingPrimaryServer() + testutil.Eventually(re, func() bool { + return s.IsServing() + }, testutil.WithWaitFor(5*time.Second), testutil.WithTickInterval(50*time.Millisecond)) + addr := s.GetAddr() + urlPrefix := fmt.Sprintf("%s/scheduling/api/v1/config", addr) + + var cfg config.Config + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + suite.Equal(cfg.GetListenAddr(), s.GetConfig().GetListenAddr()) + suite.Equal(cfg.Schedule.LeaderScheduleLimit, s.GetConfig().Schedule.LeaderScheduleLimit) + suite.Equal(cfg.Schedule.EnableCrossTableMerge, s.GetConfig().Schedule.EnableCrossTableMerge) + suite.Equal(cfg.Replication.MaxReplicas, s.GetConfig().Replication.MaxReplicas) + suite.Equal(cfg.Replication.LocationLabels, s.GetConfig().Replication.LocationLabels) + suite.Equal(cfg.DataDir, s.GetConfig().DataDir) + testutil.Eventually(re, func() bool { + // wait for all schedulers to be loaded in scheduling server. + return len(cfg.Schedule.SchedulersPayload) == 5 + }) + suite.Contains(cfg.Schedule.SchedulersPayload, "balance-leader-scheduler") + suite.Contains(cfg.Schedule.SchedulersPayload, "balance-region-scheduler") + suite.Contains(cfg.Schedule.SchedulersPayload, "balance-hot-region-scheduler") + suite.Contains(cfg.Schedule.SchedulersPayload, "balance-witness-scheduler") + suite.Contains(cfg.Schedule.SchedulersPayload, "transfer-witness-leader-scheduler") + + var scheduleCfg sc.ScheduleConfig + testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/schedule", &scheduleCfg) + suite.Equal(scheduleCfg.LeaderScheduleLimit, s.GetScheduleConfig().LeaderScheduleLimit) + suite.Equal(scheduleCfg.EnableCrossTableMerge, s.GetScheduleConfig().EnableCrossTableMerge) + + var replicationCfg sc.ReplicationConfig + testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/replicate", &replicationCfg) + suite.Equal(replicationCfg.MaxReplicas, s.GetReplicationConfig().MaxReplicas) + suite.Equal(replicationCfg.LocationLabels, s.GetReplicationConfig().LocationLabels) + + var storeCfg sc.StoreConfig + testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/store", &storeCfg) + suite.Equal(storeCfg.Coprocessor.RegionMaxKeys, s.GetStoreConfig().Coprocessor.RegionMaxKeys) + suite.Equal(storeCfg.Coprocessor.RegionSplitKeys, s.GetStoreConfig().Coprocessor.RegionSplitKeys) + } + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInAPIMode(checkConfig) } func TestConfigForward(t *testing.T) { diff --git a/tests/pdctl/config/config_test.go b/tests/pdctl/config/config_test.go index 6ed0841bf74..7840b151fb5 100644 --- a/tests/pdctl/config/config_test.go +++ b/tests/pdctl/config/config_test.go @@ -25,6 +25,7 @@ import ( "github.com/coreos/go-semver/semver" "github.com/pingcap/kvproto/pkg/metapb" "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/utils/typeutil" @@ -48,24 +49,29 @@ func (t *testCase) judge(re *require.Assertions, scheduleConfigs ...*sc.Schedule } } -func TestConfig(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +type configTestSuite struct { + suite.Suite +} + +func TestConfigTestSuite(t *testing.T) { + suite.Run(t, new(configTestSuite)) +} + +func (suite *configTestSuite) TestConfig() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkConfig) +} + +func (suite *configTestSuite) checkConfig(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ Id: 1, State: metapb.StoreState_Up, } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) svr := leaderServer.GetServer() tests.MustPutStore(re, cluster, store) defer cluster.Destroy() @@ -648,24 +654,21 @@ func TestReplicationMode(t *testing.T) { check() } -func TestUpdateDefaultReplicaConfig(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestUpdateDefaultReplicaConfig() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkUpdateDefaultReplicaConfig) +} + +func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ Id: 1, State: metapb.StoreState_Up, } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) defer cluster.Destroy() @@ -764,7 +767,7 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) { checkRuleIsolationLevel("host") // update unsuccessfully when many rule exists. - fname := t.TempDir() + fname := suite.T().TempDir() rules := []placement.Rule{ { GroupID: "pd", @@ -791,16 +794,15 @@ func TestUpdateDefaultReplicaConfig(t *testing.T) { checkRuleIsolationLevel("host") } -func TestPDServerConfig(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestPDServerConfig() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkPDServerConfig) +} + +func (suite *configTestSuite) checkPDServerConfig(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ @@ -808,8 +810,6 @@ func TestPDServerConfig(t *testing.T) { State: metapb.StoreState_Up, LastHeartbeat: time.Now().UnixNano(), } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) defer cluster.Destroy() diff --git a/tests/pdctl/scheduler/scheduler_test.go b/tests/pdctl/scheduler/scheduler_test.go index 3554b828269..cd599405124 100644 --- a/tests/pdctl/scheduler/scheduler_test.go +++ b/tests/pdctl/scheduler/scheduler_test.go @@ -46,7 +46,6 @@ func TestSchedulerTestSuite(t *testing.T) { func (suite *schedulerTestSuite) TestScheduler() { env := tests.NewSchedulingTestEnvironment(suite.T()) env.RunTestInTwoModes(suite.checkScheduler) - env.RunTestInTwoModes(suite.checkSchedulerDiagnostic) } func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { @@ -414,8 +413,10 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "show"}, &conf) re.Equal(4., conf["batch"]) mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler", "set", "batch", "3"}, nil) - mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) - re.Equal(3., conf1["batch"]) + testutil.Eventually(re, func() bool { + mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "config", "balance-leader-scheduler"}, &conf1) + return conf1["batch"] == 3. + }) echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "add", "balance-leader-scheduler"}, nil) re.NotContains(echo, "Success!") echo = mustExec(re, cmd, []string{"-u", pdAddr, "scheduler", "remove", "balance-leader-scheduler"}, nil) @@ -494,6 +495,11 @@ func (suite *schedulerTestSuite) checkScheduler(cluster *tests.TestCluster) { checkSchedulerWithStatusCommand("disabled", nil) } +func (suite *schedulerTestSuite) TestSchedulerDiagnostic() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkSchedulerDiagnostic) +} + func (suite *schedulerTestSuite) checkSchedulerDiagnostic(cluster *tests.TestCluster) { re := suite.Require() pdAddr := cluster.GetConfig().GetClientURL() diff --git a/tests/server/api/operator_test.go b/tests/server/api/operator_test.go index 83ab0f3c7ed..908daf21aac 100644 --- a/tests/server/api/operator_test.go +++ b/tests/server/api/operator_test.go @@ -51,7 +51,7 @@ func TestOperatorTestSuite(t *testing.T) { suite.Run(t, new(operatorTestSuite)) } -func (suite *operatorTestSuite) TestOperator() { +func (suite *operatorTestSuite) TestAddRemovePeer() { opts := []tests.ConfigOption{ func(conf *config.Config, serverName string) { conf.Replication.MaxReplicas = 1 @@ -59,17 +59,6 @@ func (suite *operatorTestSuite) TestOperator() { } env := tests.NewSchedulingTestEnvironment(suite.T(), opts...) env.RunTestInTwoModes(suite.checkAddRemovePeer) - - env = tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkMergeRegionOperator) - - opts = []tests.ConfigOption{ - func(conf *config.Config, serverName string) { - conf.Replication.MaxReplicas = 3 - }, - } - env = tests.NewSchedulingTestEnvironment(suite.T(), opts...) - env.RunTestInTwoModes(suite.checkTransferRegionWithPlacementRule) } func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { @@ -178,6 +167,16 @@ func (suite *operatorTestSuite) checkAddRemovePeer(cluster *tests.TestCluster) { suite.NoError(err) } +func (suite *operatorTestSuite) TestMergeRegionOperator() { + opts := []tests.ConfigOption{ + func(conf *config.Config, serverName string) { + conf.Replication.MaxReplicas = 1 + }, + } + env := tests.NewSchedulingTestEnvironment(suite.T(), opts...) + env.RunTestInTwoModes(suite.checkMergeRegionOperator) +} + func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestCluster) { re := suite.Require() suite.pauseRuleChecker(cluster) @@ -204,6 +203,16 @@ func (suite *operatorTestSuite) checkMergeRegionOperator(cluster *tests.TestClus suite.NoError(err) } +func (suite *operatorTestSuite) TestTransferRegionWithPlacementRule() { + opts := []tests.ConfigOption{ + func(conf *config.Config, serverName string) { + conf.Replication.MaxReplicas = 3 + }, + } + env := tests.NewSchedulingTestEnvironment(suite.T(), opts...) + env.RunTestInTwoModes(suite.checkTransferRegionWithPlacementRule) +} + func (suite *operatorTestSuite) checkTransferRegionWithPlacementRule(cluster *tests.TestCluster) { re := suite.Require() suite.pauseRuleChecker(cluster) diff --git a/tests/server/api/scheduler_test.go b/tests/server/api/scheduler_test.go index 9db94e8562d..38f691a4eda 100644 --- a/tests/server/api/scheduler_test.go +++ b/tests/server/api/scheduler_test.go @@ -42,13 +42,9 @@ func TestScheduleTestSuite(t *testing.T) { suite.Run(t, new(scheduleTestSuite)) } -func (suite *scheduleTestSuite) TestScheduler() { +func (suite *scheduleTestSuite) TestOriginAPI() { env := tests.NewSchedulingTestEnvironment(suite.T()) env.RunTestInTwoModes(suite.checkOriginAPI) - env = tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInTwoModes(suite.checkAPI) - env = tests.NewSchedulingTestEnvironment(suite.T()) - env.RunTestInTwoModes(suite.checkDisable) } func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { @@ -115,6 +111,11 @@ func (suite *scheduleTestSuite) checkOriginAPI(cluster *tests.TestCluster) { suite.NoError(err) } +func (suite *scheduleTestSuite) TestAPI() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkAPI) +} + func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { re := suite.Require() leaderAddr := cluster.GetLeaderServer().GetAddr() @@ -153,9 +154,12 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { body, err := json.Marshal(dataMap) suite.NoError(err) suite.NoError(tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re))) - resp = make(map[string]interface{}) - suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) - suite.Equal(3.0, resp["batch"]) + tu.Eventually(re, func() bool { // wait for scheduling server to be synced. + resp = make(map[string]interface{}) + suite.NoError(tu.ReadGetJSON(re, testDialClient, listURL, &resp)) + return resp["batch"] == 3.0 + }) + // update again err = tu.CheckPostJSON(testDialClient, updateURL, body, tu.StatusOK(re), @@ -556,6 +560,11 @@ func (suite *scheduleTestSuite) checkAPI(cluster *tests.TestCluster) { } } +func (suite *scheduleTestSuite) TestDisable() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkDisable) +} + func (suite *scheduleTestSuite) checkDisable(cluster *tests.TestCluster) { re := suite.Require() leaderAddr := cluster.GetLeaderServer().GetAddr() diff --git a/tests/server/config/config_test.go b/tests/server/config/config_test.go index 1b2178bde33..62b5134d22d 100644 --- a/tests/server/config/config_test.go +++ b/tests/server/config/config_test.go @@ -18,17 +18,25 @@ import ( "bytes" "context" "encoding/json" + "fmt" "net/http" "testing" + "time" "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" "github.com/tikv/pd/pkg/ratelimit" + sc "github.com/tikv/pd/pkg/schedule/config" + tu "github.com/tikv/pd/pkg/utils/testutil" + "github.com/tikv/pd/pkg/utils/typeutil" + "github.com/tikv/pd/pkg/versioninfo" "github.com/tikv/pd/server" + "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" ) -// dialClient used to dial http request. -var dialClient = &http.Client{ +// testDialClient used to dial http request. +var testDialClient = &http.Client{ Transport: &http.Transport{ DisableKeepAlives: true, }, @@ -56,7 +64,7 @@ func TestRateLimitConfigReload(t *testing.T) { data, err := json.Marshal(input) re.NoError(err) req, _ := http.NewRequest(http.MethodPost, leader.GetAddr()+"/pd/api/v1/service-middleware/config", bytes.NewBuffer(data)) - resp, err := dialClient.Do(req) + resp, err := testDialClient.Do(req) re.NoError(err) resp.Body.Close() re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) @@ -74,3 +82,517 @@ func TestRateLimitConfigReload(t *testing.T) { re.True(leader.GetServer().GetServiceMiddlewarePersistOptions().IsRateLimitEnabled()) re.Len(leader.GetServer().GetServiceMiddlewarePersistOptions().GetRateLimitConfig().LimiterConfig, 1) } + +type configTestSuite struct { + suite.Suite +} + +func TestConfigTestSuite(t *testing.T) { + suite.Run(t, new(configTestSuite)) +} + +func (suite *configTestSuite) TestConfigAll() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkConfigAll) +} + +func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := fmt.Sprintf("%s/pd/api/v1/config", urlPrefix) + cfg := &config.Config{} + err := tu.ReadGetJSON(re, testDialClient, addr, cfg) + suite.NoError(err) + + // the original way + r := map[string]int{"max-replicas": 5} + postData, err := json.Marshal(r) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + l := map[string]interface{}{ + "location-labels": "zone,rack", + "region-schedule-limit": 10, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + l = map[string]interface{}{ + "metric-storage": "http://127.0.0.1:9090", + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + newCfg := &config.Config{} + err = tu.ReadGetJSON(re, testDialClient, addr, newCfg) + suite.NoError(err) + cfg.Replication.MaxReplicas = 5 + cfg.Replication.LocationLabels = []string{"zone", "rack"} + cfg.Schedule.RegionScheduleLimit = 10 + cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:9090" + suite.Equal(newCfg, cfg) + + // the new way + l = map[string]interface{}{ + "schedule.tolerant-size-ratio": 2.5, + "schedule.enable-tikv-split-region": "false", + "replication.location-labels": "idc,host", + "pd-server.metric-storage": "http://127.0.0.1:1234", + "log.level": "warn", + "cluster-version": "v4.0.0-beta", + "replication-mode.replication-mode": "dr-auto-sync", + "replication-mode.dr-auto-sync.label-key": "foobar", + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + newCfg1 := &config.Config{} + err = tu.ReadGetJSON(re, testDialClient, addr, newCfg1) + suite.NoError(err) + cfg.Schedule.EnableTiKVSplitRegion = false + cfg.Schedule.TolerantSizeRatio = 2.5 + cfg.Replication.LocationLabels = []string{"idc", "host"} + cfg.PDServerCfg.MetricStorage = "http://127.0.0.1:1234" + cfg.Log.Level = "warn" + cfg.ReplicationMode.DRAutoSync.LabelKey = "foobar" + cfg.ReplicationMode.ReplicationMode = "dr-auto-sync" + v, err := versioninfo.ParseVersion("v4.0.0-beta") + suite.NoError(err) + cfg.ClusterVersion = *v + suite.Equal(cfg, newCfg1) + + // revert this to avoid it affects TestConfigTTL + l["schedule.enable-tikv-split-region"] = "true" + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + // illegal prefix + l = map[string]interface{}{ + "replicate.max-replicas": 1, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, + tu.StatusNotOK(re), + tu.StringContain(re, "not found")) + suite.NoError(err) + + // update prefix directly + l = map[string]interface{}{ + "replication-mode": nil, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, + tu.StatusNotOK(re), + tu.StringContain(re, "cannot update config prefix")) + suite.NoError(err) + + // config item not found + l = map[string]interface{}{ + "schedule.region-limit": 10, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringContain(re, "not found")) + suite.NoError(err) +} + +func (suite *configTestSuite) TestConfigSchedule() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkConfigSchedule) +} + +func (suite *configTestSuite) checkConfigSchedule(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := fmt.Sprintf("%s/pd/api/v1/config/schedule", urlPrefix) + + scheduleConfig := &sc.ScheduleConfig{} + suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig)) + scheduleConfig.MaxStoreDownTime.Duration = time.Second + postData, err := json.Marshal(scheduleConfig) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + scheduleConfig1 := &sc.ScheduleConfig{} + suite.NoError(tu.ReadGetJSON(re, testDialClient, addr, scheduleConfig1)) + suite.Equal(*scheduleConfig1, *scheduleConfig) +} + +func (suite *configTestSuite) TestConfigReplication() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkConfigReplication) +} + +func (suite *configTestSuite) checkConfigReplication(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := fmt.Sprintf("%s/pd/api/v1/config/replicate", urlPrefix) + rc := &sc.ReplicationConfig{} + err := tu.ReadGetJSON(re, testDialClient, addr, rc) + suite.NoError(err) + + rc.MaxReplicas = 5 + rc1 := map[string]int{"max-replicas": 5} + postData, err := json.Marshal(rc1) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + rc.LocationLabels = []string{"zone", "rack"} + rc2 := map[string]string{"location-labels": "zone,rack"} + postData, err = json.Marshal(rc2) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + rc.IsolationLevel = "zone" + rc3 := map[string]string{"isolation-level": "zone"} + postData, err = json.Marshal(rc3) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + rc4 := &sc.ReplicationConfig{} + err = tu.ReadGetJSON(re, testDialClient, addr, rc4) + suite.NoError(err) + + suite.Equal(*rc4, *rc) +} + +func (suite *configTestSuite) TestConfigLabelProperty() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkConfigLabelProperty) +} + +func (suite *configTestSuite) checkConfigLabelProperty(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := urlPrefix + "/pd/api/v1/config/label-property" + loadProperties := func() config.LabelPropertyConfig { + var cfg config.LabelPropertyConfig + err := tu.ReadGetJSON(re, testDialClient, addr, &cfg) + suite.NoError(err) + return cfg + } + + cfg := loadProperties() + suite.Empty(cfg) + + cmds := []string{ + `{"type": "foo", "action": "set", "label-key": "zone", "label-value": "cn1"}`, + `{"type": "foo", "action": "set", "label-key": "zone", "label-value": "cn2"}`, + `{"type": "bar", "action": "set", "label-key": "host", "label-value": "h1"}`, + } + for _, cmd := range cmds { + err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) + suite.NoError(err) + } + + cfg = loadProperties() + suite.Len(cfg, 2) + suite.Equal([]config.StoreLabel{ + {Key: "zone", Value: "cn1"}, + {Key: "zone", Value: "cn2"}, + }, cfg["foo"]) + suite.Equal([]config.StoreLabel{{Key: "host", Value: "h1"}}, cfg["bar"]) + + cmds = []string{ + `{"type": "foo", "action": "delete", "label-key": "zone", "label-value": "cn1"}`, + `{"type": "bar", "action": "delete", "label-key": "host", "label-value": "h1"}`, + } + for _, cmd := range cmds { + err := tu.CheckPostJSON(testDialClient, addr, []byte(cmd), tu.StatusOK(re)) + suite.NoError(err) + } + + cfg = loadProperties() + suite.Len(cfg, 1) + suite.Equal([]config.StoreLabel{{Key: "zone", Value: "cn2"}}, cfg["foo"]) +} + +func (suite *configTestSuite) TestConfigDefault() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkConfigDefault) +} + +func (suite *configTestSuite) checkConfigDefault(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addr := urlPrefix + "/pd/api/v1/config" + + r := map[string]int{"max-replicas": 5} + postData, err := json.Marshal(r) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + l := map[string]interface{}{ + "location-labels": "zone,rack", + "region-schedule-limit": 10, + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + l = map[string]interface{}{ + "metric-storage": "http://127.0.0.1:9090", + } + postData, err = json.Marshal(l) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + + addr = fmt.Sprintf("%s/pd/api/v1/config/default", urlPrefix) + defaultCfg := &config.Config{} + err = tu.ReadGetJSON(re, testDialClient, addr, defaultCfg) + suite.NoError(err) + + suite.Equal(uint64(3), defaultCfg.Replication.MaxReplicas) + suite.Equal(typeutil.StringSlice([]string{}), defaultCfg.Replication.LocationLabels) + suite.Equal(uint64(2048), defaultCfg.Schedule.RegionScheduleLimit) + suite.Equal("", defaultCfg.PDServerCfg.MetricStorage) +} + +func (suite *configTestSuite) TestConfigPDServer() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkConfigPDServer) +} + +func (suite *configTestSuite) checkConfigPDServer(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + + addrPost := urlPrefix + "/pd/api/v1/config" + ms := map[string]interface{}{ + "metric-storage": "", + } + postData, err := json.Marshal(ms) + suite.NoError(err) + suite.NoError(tu.CheckPostJSON(testDialClient, addrPost, postData, tu.StatusOK(re))) + addrGet := fmt.Sprintf("%s/pd/api/v1/config/pd-server", urlPrefix) + sc := &config.PDServerConfig{} + suite.NoError(tu.ReadGetJSON(re, testDialClient, addrGet, sc)) + suite.Equal(bool(true), sc.UseRegionStorage) + suite.Equal("table", sc.KeyType) + suite.Equal(typeutil.StringSlice([]string{}), sc.RuntimeServices) + suite.Equal("", sc.MetricStorage) + suite.Equal("auto", sc.DashboardAddress) + suite.Equal(int(3), sc.FlowRoundByDigit) + suite.Equal(typeutil.NewDuration(time.Second), sc.MinResolvedTSPersistenceInterval) + suite.Equal(24*time.Hour, sc.MaxResetTSGap.Duration) +} + +var ttlConfig = map[string]interface{}{ + "schedule.max-snapshot-count": 999, + "schedule.enable-location-replacement": false, + "schedule.max-merge-region-size": 999, + "schedule.max-merge-region-keys": 999, + "schedule.scheduler-max-waiting-operator": 999, + "schedule.leader-schedule-limit": 999, + "schedule.region-schedule-limit": 999, + "schedule.hot-region-schedule-limit": 999, + "schedule.replica-schedule-limit": 999, + "schedule.merge-schedule-limit": 999, + "schedule.enable-tikv-split-region": false, +} + +var invalidTTLConfig = map[string]interface{}{ + "schedule.invalid-ttl-config": 0, +} + +type ttlConfigInterface interface { + GetMaxSnapshotCount() uint64 + IsLocationReplacementEnabled() bool + GetMaxMergeRegionSize() uint64 + GetMaxMergeRegionKeys() uint64 + GetSchedulerMaxWaitingOperator() uint64 + GetLeaderScheduleLimit() uint64 + GetRegionScheduleLimit() uint64 + GetHotRegionScheduleLimit() uint64 + GetReplicaScheduleLimit() uint64 + GetMergeScheduleLimit() uint64 + IsTikvRegionSplitEnabled() bool +} + +func (suite *configTestSuite) assertTTLConfig( + cluster *tests.TestCluster, + expectedEqual bool, +) { + equality := suite.Equal + if !expectedEqual { + equality = suite.NotEqual + } + checkfunc := func(options ttlConfigInterface) { + equality(uint64(999), options.GetMaxSnapshotCount()) + equality(false, options.IsLocationReplacementEnabled()) + equality(uint64(999), options.GetMaxMergeRegionSize()) + equality(uint64(999), options.GetMaxMergeRegionKeys()) + equality(uint64(999), options.GetSchedulerMaxWaitingOperator()) + equality(uint64(999), options.GetLeaderScheduleLimit()) + equality(uint64(999), options.GetRegionScheduleLimit()) + equality(uint64(999), options.GetHotRegionScheduleLimit()) + equality(uint64(999), options.GetReplicaScheduleLimit()) + equality(uint64(999), options.GetMergeScheduleLimit()) + equality(false, options.IsTikvRegionSplitEnabled()) + } + checkfunc(cluster.GetLeaderServer().GetServer().GetPersistOptions()) + if cluster.GetSchedulingPrimaryServer() != nil { + // wait for the scheduling primary server to be synced + options := cluster.GetSchedulingPrimaryServer().GetPersistConfig() + tu.Eventually(suite.Require(), func() bool { + if expectedEqual { + return uint64(999) == options.GetMaxSnapshotCount() + } + return uint64(999) != options.GetMaxSnapshotCount() + }) + checkfunc(options) + } +} + +func (suite *configTestSuite) assertTTLConfigItemEqaul( + cluster *tests.TestCluster, + item string, + expectedValue interface{}, +) { + checkfunc := func(options ttlConfigInterface) bool { + switch item { + case "max-merge-region-size": + return expectedValue.(uint64) == options.GetMaxMergeRegionSize() + case "max-merge-region-keys": + return expectedValue.(uint64) == options.GetMaxMergeRegionKeys() + case "enable-tikv-split-region": + return expectedValue.(bool) == options.IsTikvRegionSplitEnabled() + } + return false + } + suite.True(checkfunc(cluster.GetLeaderServer().GetServer().GetPersistOptions())) + if cluster.GetSchedulingPrimaryServer() != nil { + // wait for the scheduling primary server to be synced + tu.Eventually(suite.Require(), func() bool { + return checkfunc(cluster.GetSchedulingPrimaryServer().GetPersistConfig()) + }) + } +} + +func createTTLUrl(url string, ttl int) string { + return fmt.Sprintf("%s/pd/api/v1/config?ttlSecond=%d", url, ttl) +} + +func (suite *configTestSuite) TestConfigTTL() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + // FIXME: enable this test in two modes after ttl config is supported. + env.RunTestInPDMode(suite.checkConfigTTL) +} + +func (suite *configTestSuite) checkConfigTTL(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + postData, err := json.Marshal(ttlConfig) + suite.NoError(err) + + // test no config and cleaning up + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, false) + + // test time goes by + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 1), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, true) + time.Sleep(2 * time.Second) + suite.assertTTLConfig(cluster, false) + + // test cleaning up + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 1), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, true) + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, false) + + postData, err = json.Marshal(invalidTTLConfig) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 1), postData, + tu.StatusNotOK(re), tu.StringEqual(re, "\"unsupported ttl config schedule.invalid-ttl-config\"\n")) + suite.NoError(err) + + // only set max-merge-region-size + mergeConfig := map[string]interface{}{ + "schedule.max-merge-region-size": 999, + } + postData, err = json.Marshal(mergeConfig) + suite.NoError(err) + + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 1), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfigItemEqaul(cluster, "max-merge-region-size", uint64(999)) + // max-merge-region-keys should keep consistence with max-merge-region-size. + suite.assertTTLConfigItemEqaul(cluster, "max-merge-region-keys", uint64(999*10000)) + + // on invalid value, we use default config + mergeConfig = map[string]interface{}{ + "schedule.enable-tikv-split-region": "invalid", + } + postData, err = json.Marshal(mergeConfig) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 10), postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfigItemEqaul(cluster, "enable-tikv-split-region", true) +} + +func (suite *configTestSuite) TestTTLConflict() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + // FIXME: enable this test in two modes after ttl config is supported. + env.RunTestInPDMode(suite.checkTTLConflict) +} + +func (suite *configTestSuite) checkTTLConflict(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + urlPrefix := leaderServer.GetAddr() + addr := createTTLUrl(urlPrefix, 1) + postData, err := json.Marshal(ttlConfig) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) + suite.assertTTLConfig(cluster, true) + + cfg := map[string]interface{}{"max-snapshot-count": 30} + postData, err = json.Marshal(cfg) + suite.NoError(err) + addr = fmt.Sprintf("%s/pd/api/v1/config", urlPrefix) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) + suite.NoError(err) + addr = fmt.Sprintf("%s/pd/api/v1/config/schedule", urlPrefix) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusNotOK(re), tu.StringEqual(re, "\"need to clean up TTL first for schedule.max-snapshot-count\"\n")) + suite.NoError(err) + cfg = map[string]interface{}{"schedule.max-snapshot-count": 30} + postData, err = json.Marshal(cfg) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, createTTLUrl(urlPrefix, 0), postData, tu.StatusOK(re)) + suite.NoError(err) + err = tu.CheckPostJSON(testDialClient, addr, postData, tu.StatusOK(re)) + suite.NoError(err) +} From 6d28cd35dedb530cfb4315333366227767e1dc2d Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 01:57:30 +0800 Subject: [PATCH 016/116] test_1 From 4baba6dbc6c7075d73b970bf651a3c87f402603e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 02:16:06 +0800 Subject: [PATCH 017/116] add more test Signed-off-by: lhy1024 --- tests/pdctl/config/config_test.go | 63 +++++++++++++------------------ 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/tests/pdctl/config/config_test.go b/tests/pdctl/config/config_test.go index 7840b151fb5..ef6688bc925 100644 --- a/tests/pdctl/config/config_test.go +++ b/tests/pdctl/config/config_test.go @@ -289,16 +289,15 @@ func (suite *configTestSuite) checkConfig(cluster *tests.TestCluster) { re.Contains(string(output), "is invalid") } -func TestPlacementRules(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestPlacementRules() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkPlacementRules) +} + +func (suite *configTestSuite) checkPlacementRules(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ @@ -306,8 +305,6 @@ func TestPlacementRules(t *testing.T) { State: metapb.StoreState_Up, LastHeartbeat: time.Now().UnixNano(), } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) defer cluster.Destroy() @@ -386,16 +383,15 @@ func TestPlacementRules(t *testing.T) { re.Equal([2]string{"pd", "test1"}, rules[0].Key()) } -func TestPlacementRuleGroups(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestPlacementRuleGroups() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkPlacementRuleGroups) +} + +func (suite *configTestSuite) checkPlacementRuleGroups(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ @@ -403,8 +399,6 @@ func TestPlacementRuleGroups(t *testing.T) { State: metapb.StoreState_Up, LastHeartbeat: time.Now().UnixNano(), } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) defer cluster.Destroy() @@ -460,16 +454,15 @@ func TestPlacementRuleGroups(t *testing.T) { re.Contains(string(output), "404") } -func TestPlacementRuleBundle(t *testing.T) { - re := require.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - cluster, err := tests.NewTestCluster(ctx, 1) - re.NoError(err) - err = cluster.RunInitialServers() - re.NoError(err) - cluster.WaitLeader() - pdAddr := cluster.GetConfig().GetClientURL() +func (suite *configTestSuite) TestPlacementRuleBundle() { + env := tests.NewSchedulingTestEnvironment(suite.T()) + env.RunTestInTwoModes(suite.checkPlacementRuleBundle) +} + +func (suite *configTestSuite) checkPlacementRuleBundle(cluster *tests.TestCluster) { + re := suite.Require() + leaderServer := cluster.GetLeaderServer() + pdAddr := leaderServer.GetAddr() cmd := pdctlCmd.GetRootCmd() store := &metapb.Store{ @@ -477,8 +470,6 @@ func TestPlacementRuleBundle(t *testing.T) { State: metapb.StoreState_Up, LastHeartbeat: time.Now().UnixNano(), } - leaderServer := cluster.GetLeaderServer() - re.NoError(leaderServer.BootstrapCluster()) tests.MustPutStore(re, cluster, store) defer cluster.Destroy() From d83da7147cbc683967e8da326d06c26db9d1a502 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 02:17:35 +0800 Subject: [PATCH 018/116] test_2 From 5709e983ae183f5566ef84b68a34affe79698839 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 02:37:39 +0800 Subject: [PATCH 019/116] test_3 From eac4ea09547e2b25e242842fcb3e934b39270f28 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 02:57:43 +0800 Subject: [PATCH 020/116] test_4 From 64d83b24c93cc524fe753c6101e4cc5f784f46a8 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 03:17:48 +0800 Subject: [PATCH 021/116] test_5 From 366c4db858c880bddf5cf641284a951506709d0e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 03:37:52 +0800 Subject: [PATCH 022/116] test_6 From 09bed12aeb686984d0f23caddd0ee855c19c8dda Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 03:57:56 +0800 Subject: [PATCH 023/116] test_7 From 27139ccc3b4e52f2ddb298f919fa6c43377eb2f4 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 04:18:00 +0800 Subject: [PATCH 024/116] test_8 From acc43bb1ffe6fe099aabc5b16fd1ac00cd7561d9 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 04:38:04 +0800 Subject: [PATCH 025/116] test_9 From 5217e9a6a94f3a4654a6a38fe96f7abdfa998bd0 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 04:58:09 +0800 Subject: [PATCH 026/116] test_10 From 88771aea7947cdae86d4936157396974df57f1c0 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 05:18:13 +0800 Subject: [PATCH 027/116] test_11 From 1de5e12421c2a369ddb893fd8d524c87e00f32a4 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 05:38:17 +0800 Subject: [PATCH 028/116] test_12 From 4ea8f4cd7a99ccced8e0360666479735cef8ce16 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 05:58:22 +0800 Subject: [PATCH 029/116] test_13 From ccb02dba5f658991cb2fb1ac80c864f940d57590 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 06:18:26 +0800 Subject: [PATCH 030/116] test_14 From e8f0185751d9cd03b964c7940d7e3e610d330d24 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 06:38:30 +0800 Subject: [PATCH 031/116] test_15 From 1bf89d0c16125a34e4cd710e47cbf4279b5885ee Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 06:58:34 +0800 Subject: [PATCH 032/116] test_16 From 5ef862e1870f7b57173113668ea6224450dfb20c Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 07:18:39 +0800 Subject: [PATCH 033/116] test_17 From 89c8be7098292e6f39cce38f0299a371ba36fa9d Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 07:38:43 +0800 Subject: [PATCH 034/116] test_18 From 2aeb902db137ebd4b1ab916f9eccaef4a12762ed Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 07:58:47 +0800 Subject: [PATCH 035/116] test_19 From 78b51f7abc76870c87e6d391015c999546eec0e1 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 08:18:51 +0800 Subject: [PATCH 036/116] test_20 From d965706368fad99f32a89e7f3745d14d2eb02cbd Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 08:38:55 +0800 Subject: [PATCH 037/116] test_21 From e12a9cee31697953e209c8afc623e6801c879f3b Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 08:59:00 +0800 Subject: [PATCH 038/116] test_22 From dbc2e48490e379e8dd029729241d0c86c073b382 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 09:19:04 +0800 Subject: [PATCH 039/116] test_23 From ccaf68b3288cc6836d193488b8944ba9b7d6c227 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 09:39:08 +0800 Subject: [PATCH 040/116] test_24 From 5504cdab5ad9bb399fa94119bb36db38e53766e6 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 09:59:13 +0800 Subject: [PATCH 041/116] test_25 From ff4241dacf2db079b9fbad51cd9d84d2c5362eea Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 10:19:17 +0800 Subject: [PATCH 042/116] test_26 From f2605e79f074d6d8a2de1811eb0543493dede02e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 10:39:21 +0800 Subject: [PATCH 043/116] test_27 From 779133f61a4056ec4e3daa46c1799dad31037d17 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 10:59:26 +0800 Subject: [PATCH 044/116] test_28 From e3bf609179b826799405afede5e1d1d16cf375c0 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 11:16:51 +0800 Subject: [PATCH 045/116] make test stable Signed-off-by: lhy1024 --- tests/integrations/mcs/scheduling/api_test.go | 6 ++--- tests/pdctl/config/config_test.go | 25 ++++++++++++++----- tests/server/config/config_test.go | 3 +++ 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index 265333ec625..e4378ecdf59 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -306,7 +306,7 @@ func TestConfigForward(t *testing.T) { sche.GetPersistConfig().GetScheduleConfig().LeaderScheduleLimit = 100 re.Equal(100, int(sche.GetScheduleConfig().LeaderScheduleLimit)) testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], float64(sche.GetScheduleConfig().LeaderScheduleLimit)) + re.Equal(100., cfg["schedule"].(map[string]interface{})["leader-schedule-limit"]) // Test schedule urlPrefix = fmt.Sprintf("%s/pd/api/v1/config/schedule", addr) @@ -316,7 +316,7 @@ func TestConfigForward(t *testing.T) { sche.GetPersistConfig().GetScheduleConfig().LeaderScheduleLimit = 4 re.Equal(4, int(sche.GetScheduleConfig().LeaderScheduleLimit)) testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(cfg["leader-schedule-limit"], float64(sche.GetScheduleConfig().LeaderScheduleLimit)) + re.Equal(4., cfg["leader-schedule-limit"]) // Test replicate urlPrefix = fmt.Sprintf("%s/pd/api/v1/config/replicate", addr) @@ -326,7 +326,7 @@ func TestConfigForward(t *testing.T) { sche.GetPersistConfig().GetReplicationConfig().MaxReplicas = 5 re.Equal(5, int(sche.GetReplicationConfig().MaxReplicas)) testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(cfg["max-replicas"], float64(sche.GetReplicationConfig().MaxReplicas)) + re.Equal(5., cfg["max-replicas"]) } env := tests.NewSchedulingTestEnvironment(t) env.RunTestInAPIMode(checkConfigForward) diff --git a/tests/pdctl/config/config_test.go b/tests/pdctl/config/config_test.go index ef6688bc925..26d70bb955f 100644 --- a/tests/pdctl/config/config_test.go +++ b/tests/pdctl/config/config_test.go @@ -28,6 +28,7 @@ import ( "github.com/stretchr/testify/suite" sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/placement" + "github.com/tikv/pd/pkg/utils/testutil" "github.com/tikv/pd/pkg/utils/typeutil" "github.com/tikv/pd/server/config" "github.com/tikv/pd/tests" @@ -669,7 +670,9 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes re.NoError(err) replicationCfg := sc.ReplicationConfig{} re.NoError(json.Unmarshal(output, &replicationCfg)) - re.Equal(expect, replicationCfg.MaxReplicas) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + return replicationCfg.MaxReplicas == expect + }) } checkLocationLabels := func(expect int) { @@ -678,7 +681,9 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes re.NoError(err) replicationCfg := sc.ReplicationConfig{} re.NoError(json.Unmarshal(output, &replicationCfg)) - re.Len(replicationCfg.LocationLabels, expect) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + return len(replicationCfg.LocationLabels) == expect + }) } checkIsolationLevel := func(expect string) { @@ -687,7 +692,9 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes re.NoError(err) replicationCfg := sc.ReplicationConfig{} re.NoError(json.Unmarshal(output, &replicationCfg)) - re.Equal(replicationCfg.IsolationLevel, expect) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + return replicationCfg.IsolationLevel == expect + }) } checkRuleCount := func(expect int) { @@ -696,7 +703,9 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes re.NoError(err) rule := placement.Rule{} re.NoError(json.Unmarshal(output, &rule)) - re.Equal(expect, rule.Count) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + return rule.Count == expect + }) } checkRuleLocationLabels := func(expect int) { @@ -705,7 +714,9 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes re.NoError(err) rule := placement.Rule{} re.NoError(json.Unmarshal(output, &rule)) - re.Len(rule.LocationLabels, expect) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + return len(rule.LocationLabels) == expect + }) } checkRuleIsolationLevel := func(expect string) { @@ -714,7 +725,9 @@ func (suite *configTestSuite) checkUpdateDefaultReplicaConfig(cluster *tests.Tes re.NoError(err) rule := placement.Rule{} re.NoError(json.Unmarshal(output, &rule)) - re.Equal(rule.IsolationLevel, expect) + testutil.Eventually(re, func() bool { // wait for the config to be synced to the scheduling server + return rule.IsolationLevel == expect + }) } // update successfully when placement rules is not enabled. diff --git a/tests/server/config/config_test.go b/tests/server/config/config_test.go index 62b5134d22d..a84091d9741 100644 --- a/tests/server/config/config_test.go +++ b/tests/server/config/config_test.go @@ -163,6 +163,9 @@ func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { cfg.Log.Level = "warn" cfg.ReplicationMode.DRAutoSync.LabelKey = "foobar" cfg.ReplicationMode.ReplicationMode = "dr-auto-sync" + if cfg.PDServerCfg.DashboardAddress == "auto" { + cfg.PDServerCfg.DashboardAddress = newCfg1.PDServerCfg.DashboardAddress + } v, err := versioninfo.ParseVersion("v4.0.0-beta") suite.NoError(err) cfg.ClusterVersion = *v From 4005db121a9cf466749a29f42e7a541733fd89fb Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 11:19:30 +0800 Subject: [PATCH 046/116] test_29 From 4648f6fc404c5fcbf03ca3b3a30743c6ef9fc162 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 12:22:34 +0800 Subject: [PATCH 047/116] test_1 From 9191326adddc6228973da9135e41f4d8068e0353 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 12:42:39 +0800 Subject: [PATCH 048/116] test_2 From 90b93e9aaf943cc3fe291cf5990eb19491d92252 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 13:02:43 +0800 Subject: [PATCH 049/116] test_3 From 5280c6e0e95347321b83fa6a2c8d6144e2635b05 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 13:22:48 +0800 Subject: [PATCH 050/116] test_4 From 9b08be03b3e768400627659dbf964c76e3cde337 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 13:42:52 +0800 Subject: [PATCH 051/116] test_5 From a7d051c71e0d767df9dd13ebeeb16db78438f017 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 14:02:56 +0800 Subject: [PATCH 052/116] test_6 From 802c2362b7faf590e91528dfadcea66be7422ab6 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 14:23:00 +0800 Subject: [PATCH 053/116] test_7 From d6c89cd7a1ec8a064ab8aa6ceed430316a5177fd Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 14:40:37 +0800 Subject: [PATCH 054/116] remove uncommon interfaces Signed-off-by: lhy1024 --- pkg/mcs/scheduling/server/apis/v1/api.go | 37 -------------- pkg/mcs/scheduling/server/server.go | 16 ------ server/api/config.go | 47 +++++++---------- tests/integrations/mcs/scheduling/api_test.go | 50 +++++-------------- 4 files changed, 31 insertions(+), 119 deletions(-) diff --git a/pkg/mcs/scheduling/server/apis/v1/api.go b/pkg/mcs/scheduling/server/apis/v1/api.go index 7a75d04f483..9581c227741 100644 --- a/pkg/mcs/scheduling/server/apis/v1/api.go +++ b/pkg/mcs/scheduling/server/apis/v1/api.go @@ -131,9 +131,6 @@ func (s *Service) RegisterAdminRouter() { func (s *Service) RegisterConfigRouter() { router := s.root.Group("config") router.GET("", getConfig) - router.GET("/schedule", getScheduleConfig) - router.GET("/replicate", getReplicationConfig) - router.GET("/store", getStoreConfig) } // RegisterSchedulersRouter registers the router of the schedulers handler. @@ -208,40 +205,6 @@ func getConfig(c *gin.Context) { c.IndentedJSON(http.StatusOK, cfg) } -// @Tags config -// @Summary Get schedule config. -// @Produce json -// @Success 200 {object} config.ScheduleConfig -// @Router /config/schedule [get] -func getScheduleConfig(c *gin.Context) { - svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) - cfg := svr.GetScheduleConfig() - cfg.MaxMergeRegionKeys = cfg.GetMaxMergeRegionKeys() - c.IndentedJSON(http.StatusOK, cfg) -} - -// @Tags config -// @Summary Get replication config. -// @Produce json -// @Success 200 {object} config.ReplicationConfig -// @Router /config/replicate [get] -func getReplicationConfig(c *gin.Context) { - svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) - cfg := svr.GetReplicationConfig() - c.IndentedJSON(http.StatusOK, cfg) -} - -// @Tags config -// @Summary Get store config. -// @Produce json -// @Success 200 {object} config.StoreConfig -// @Router /config/store [get] -func getStoreConfig(c *gin.Context) { - svr := c.MustGet(multiservicesapi.ServiceContextKey).(*scheserver.Server) - cfg := svr.GetStoreConfig() - c.IndentedJSON(http.StatusOK, cfg) -} - // @Tags admin // @Summary Drop all regions from cache. // @Produce json diff --git a/pkg/mcs/scheduling/server/server.go b/pkg/mcs/scheduling/server/server.go index d692e24e505..1790cb2b4be 100644 --- a/pkg/mcs/scheduling/server/server.go +++ b/pkg/mcs/scheduling/server/server.go @@ -46,7 +46,6 @@ import ( "github.com/tikv/pd/pkg/mcs/utils" "github.com/tikv/pd/pkg/member" "github.com/tikv/pd/pkg/schedule" - sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/hbstream" "github.com/tikv/pd/pkg/schedule/schedulers" "github.com/tikv/pd/pkg/storage/endpoint" @@ -528,21 +527,6 @@ func (s *Server) GetConfig() *config.Config { return cfg } -// GetScheduleConfig gets the schedule config. -func (s *Server) GetScheduleConfig() *sc.ScheduleConfig { - return s.persistConfig.GetScheduleConfig().Clone() -} - -// GetReplicationConfig gets the replication config. -func (s *Server) GetReplicationConfig() *sc.ReplicationConfig { - return s.persistConfig.GetReplicationConfig().Clone() -} - -// GetStoreConfig gets the store config. -func (s *Server) GetStoreConfig() *sc.StoreConfig { - return s.persistConfig.GetStoreConfig().Clone() -} - // CreateServer creates the Server func CreateServer(ctx context.Context, cfg *config.Config) *Server { svr := &Server{ diff --git a/server/api/config.go b/server/api/config.go index 6b3fbdd9df5..b26da146651 100644 --- a/server/api/config.go +++ b/server/api/config.go @@ -63,20 +63,13 @@ func newConfHandler(svr *server.Server, rd *render.Render) *confHandler { func (h *confHandler) GetConfig(w http.ResponseWriter, r *http.Request) { cfg := h.svr.GetConfig() if h.svr.IsAPIServiceMode() { - b, err := h.GetSchedulingServerConfig("config") + schedulingServerConfig, err := h.GetSchedulingServerConfig() if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - var configSchedulingServer config.Config - err = json.Unmarshal(b, &configSchedulingServer) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - cfg.Schedule = configSchedulingServer.Schedule - cfg.Replication = configSchedulingServer.Replication - // TODO: will we support config/store? + cfg.Schedule = schedulingServerConfig.Schedule + cfg.Replication = schedulingServerConfig.Replication } else { cfg.Schedule.MaxMergeRegionKeys = cfg.Schedule.GetMaxMergeRegionKeys() } @@ -321,18 +314,12 @@ func getConfigMap(cfg map[string]interface{}, key []string, value interface{}) m // @Router /config/schedule [get] func (h *confHandler) GetScheduleConfig(w http.ResponseWriter, r *http.Request) { if h.svr.IsAPIServiceMode() { - b, err := h.GetSchedulingServerConfig("config/schedule") - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - var cfg sc.ScheduleConfig - err = json.Unmarshal(b, &cfg) + cfg, err := h.GetSchedulingServerConfig() if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - h.rd.JSON(w, http.StatusOK, cfg) + h.rd.JSON(w, http.StatusOK, cfg.Schedule) return } cfg := h.svr.GetScheduleConfig() @@ -399,13 +386,7 @@ func (h *confHandler) SetScheduleConfig(w http.ResponseWriter, r *http.Request) // @Router /config/replicate [get] func (h *confHandler) GetReplicationConfig(w http.ResponseWriter, r *http.Request) { if h.svr.IsAPIServiceMode() { - b, err := h.GetSchedulingServerConfig("config/replicate") - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - var cfg sc.ReplicationConfig - err = json.Unmarshal(b, &cfg) + cfg, err := h.GetSchedulingServerConfig() if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return @@ -555,12 +536,12 @@ func (h *confHandler) GetPDServerConfig(w http.ResponseWriter, r *http.Request) h.rd.JSON(w, http.StatusOK, h.svr.GetPDServerConfig()) } -func (h *confHandler) GetSchedulingServerConfig(path string) ([]byte, error) { +func (h *confHandler) GetSchedulingServerConfig() (*config.Config, error) { addr, ok := h.svr.GetServicePrimaryAddr(h.svr.Context(), utils.SchedulingServiceName) if !ok { return nil, errs.ErrNotFoundSchedulingAddr.FastGenByArgs() } - url := fmt.Sprintf("%s/scheduling/api/v1/%s", addr, path) + url := fmt.Sprintf("%s/scheduling/api/v1/config", addr) req, err := http.NewRequest(http.MethodGet, url, nil) if err != nil { return nil, err @@ -573,5 +554,15 @@ func (h *confHandler) GetSchedulingServerConfig(path string) ([]byte, error) { if resp.StatusCode != http.StatusOK { return nil, errs.ErrSchedulingServer.FastGenByArgs(resp.StatusCode) } - return io.ReadAll(resp.Body) + b, err := io.ReadAll(resp.Body) + if err != nil { + + return nil, err + } + var schedulingServerConfig config.Config + err = json.Unmarshal(b, &schedulingServerConfig) + if err != nil { + return nil, err + } + return &schedulingServerConfig, nil } diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index e4378ecdf59..d26cec04f2c 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -14,7 +14,6 @@ import ( "github.com/tikv/pd/pkg/core" _ "github.com/tikv/pd/pkg/mcs/scheduling/server/apis/v1" "github.com/tikv/pd/pkg/mcs/scheduling/server/config" - sc "github.com/tikv/pd/pkg/schedule/config" "github.com/tikv/pd/pkg/schedule/handler" "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/storage" @@ -271,21 +270,6 @@ func (suite *apiTestSuite) TestConfig() { suite.Contains(cfg.Schedule.SchedulersPayload, "balance-hot-region-scheduler") suite.Contains(cfg.Schedule.SchedulersPayload, "balance-witness-scheduler") suite.Contains(cfg.Schedule.SchedulersPayload, "transfer-witness-leader-scheduler") - - var scheduleCfg sc.ScheduleConfig - testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/schedule", &scheduleCfg) - suite.Equal(scheduleCfg.LeaderScheduleLimit, s.GetScheduleConfig().LeaderScheduleLimit) - suite.Equal(scheduleCfg.EnableCrossTableMerge, s.GetScheduleConfig().EnableCrossTableMerge) - - var replicationCfg sc.ReplicationConfig - testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/replicate", &replicationCfg) - suite.Equal(replicationCfg.MaxReplicas, s.GetReplicationConfig().MaxReplicas) - suite.Equal(replicationCfg.LocationLabels, s.GetReplicationConfig().LocationLabels) - - var storeCfg sc.StoreConfig - testutil.ReadGetJSON(re, testDialClient, urlPrefix+"/store", &storeCfg) - suite.Equal(storeCfg.Coprocessor.RegionMaxKeys, s.GetStoreConfig().Coprocessor.RegionMaxKeys) - suite.Equal(storeCfg.Coprocessor.RegionSplitKeys, s.GetStoreConfig().Coprocessor.RegionSplitKeys) } env := tests.NewSchedulingTestEnvironment(suite.T()) env.RunTestInAPIMode(checkConfig) @@ -295,38 +279,28 @@ func TestConfigForward(t *testing.T) { re := require.New(t) checkConfigForward := func(cluster *tests.TestCluster) { sche := cluster.GetSchedulingPrimaryServer() + opts := sche.GetPersistConfig() var cfg map[string]interface{} addr := cluster.GetLeaderServer().GetAddr() // Test config urlPrefix := fmt.Sprintf("%s/pd/api/v1/config", addr) testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], float64(sche.GetScheduleConfig().LeaderScheduleLimit)) - // Test to change config - sche.GetPersistConfig().GetScheduleConfig().LeaderScheduleLimit = 100 - re.Equal(100, int(sche.GetScheduleConfig().LeaderScheduleLimit)) - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(100., cfg["schedule"].(map[string]interface{})["leader-schedule-limit"]) + re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], + float64(opts.GetLeaderScheduleLimit())) - // Test schedule - urlPrefix = fmt.Sprintf("%s/pd/api/v1/config/schedule", addr) - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(cfg["leader-schedule-limit"], float64(sche.GetScheduleConfig().LeaderScheduleLimit)) - // Test to change config - sche.GetPersistConfig().GetScheduleConfig().LeaderScheduleLimit = 4 - re.Equal(4, int(sche.GetScheduleConfig().LeaderScheduleLimit)) - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(4., cfg["leader-schedule-limit"]) + // Test to change config only in scheduling server + // Expect to get new config in scheduling server but not old config in api server - // Test replicate - urlPrefix = fmt.Sprintf("%s/pd/api/v1/config/replicate", addr) + opts.GetScheduleConfig().LeaderScheduleLimit = 100 + re.Equal(100, int(opts.GetLeaderScheduleLimit())) testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(cfg["max-replicas"], float64(sche.GetReplicationConfig().MaxReplicas)) - // Test to change config - sche.GetPersistConfig().GetReplicationConfig().MaxReplicas = 5 - re.Equal(5, int(sche.GetReplicationConfig().MaxReplicas)) + re.Equal(100., cfg["schedule"].(map[string]interface{})["leader-schedule-limit"]) + + opts.GetReplicationConfig().MaxReplicas = 5 + re.Equal(5, int(opts.GetReplicationConfig().MaxReplicas)) testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(5., cfg["max-replicas"]) + re.Equal(5., cfg["replication"].(map[string]interface{})["max-replicas"]) } env := tests.NewSchedulingTestEnvironment(t) env.RunTestInAPIMode(checkConfigForward) From d64d3114c203127fc2dc9829f8e7ef93cd3983d5 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 14:43:04 +0800 Subject: [PATCH 055/116] test_8 From ab8bc53fd0edb71b61c375ba53029c5264639440 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 14:53:22 +0800 Subject: [PATCH 056/116] fix Signed-off-by: lhy1024 --- server/api/config.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/api/config.go b/server/api/config.go index b26da146651..91d3d4ac8c0 100644 --- a/server/api/config.go +++ b/server/api/config.go @@ -319,6 +319,7 @@ func (h *confHandler) GetScheduleConfig(w http.ResponseWriter, r *http.Request) h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } + cfg.Schedule.SchedulersPayload = nil h.rd.JSON(w, http.StatusOK, cfg.Schedule) return } @@ -391,7 +392,7 @@ func (h *confHandler) GetReplicationConfig(w http.ResponseWriter, r *http.Reques h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - h.rd.JSON(w, http.StatusOK, cfg) + h.rd.JSON(w, http.StatusOK, cfg.Replication) return } h.rd.JSON(w, http.StatusOK, h.svr.GetReplicationConfig()) From ab06849a61f8cff9adccd5421ae8feba39e2b27c Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 14:57:31 +0800 Subject: [PATCH 057/116] fix lint Signed-off-by: lhy1024 --- server/api/config.go | 1 - 1 file changed, 1 deletion(-) diff --git a/server/api/config.go b/server/api/config.go index 91d3d4ac8c0..746b1119a73 100644 --- a/server/api/config.go +++ b/server/api/config.go @@ -557,7 +557,6 @@ func (h *confHandler) GetSchedulingServerConfig() (*config.Config, error) { } b, err := io.ReadAll(resp.Body) if err != nil { - return nil, err } var schedulingServerConfig config.Config From ccd37e1727b1e21e235fcb63d3f0436b243c613e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 15:03:09 +0800 Subject: [PATCH 058/116] test_9 From 589cd8080d1987fff03755220d845b8350390c6d Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 15:23:13 +0800 Subject: [PATCH 059/116] test_10 From dd2e3e53c1b204fbad43763ac6485d962a336fd4 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 15:43:17 +0800 Subject: [PATCH 060/116] test_11 From c39f61cafc274f1ba080892ac7345a4792bc697b Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 16:03:22 +0800 Subject: [PATCH 061/116] test_12 From d1788f623bacb410a3748f0b47f32b32a14e1773 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 16:23:26 +0800 Subject: [PATCH 062/116] test_13 From 343411fef22b223e1acc8260b4a196413010e02d Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 16:43:30 +0800 Subject: [PATCH 063/116] test_14 From b9cb9f1d05a4136cbf4ac6e97a15713733de4821 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 17:03:35 +0800 Subject: [PATCH 064/116] test_15 From 37dd60fefd4950beeff8900939f40efee3770176 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 17:23:39 +0800 Subject: [PATCH 065/116] test_16 From d7987c6539e94d878e61387380233ec7044cf832 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 17:43:44 +0800 Subject: [PATCH 066/116] test_17 From 9b7f06fb524c61b13555f64946ad33af526d6523 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 18:03:48 +0800 Subject: [PATCH 067/116] test_18 From e770961beeac7125e5f3b908ff13c972f45c0360 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 18:23:53 +0800 Subject: [PATCH 068/116] test_19 From 5ecb8624789fe49573909292d0dc9a931f91b6a2 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 18:43:57 +0800 Subject: [PATCH 069/116] test_20 From e5bd353ec1116911f911c4614491938a8a4c5120 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 19:04:01 +0800 Subject: [PATCH 070/116] test_21 From b7480514a96495b92c5385e2e7a9edb695fa050f Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 19:24:06 +0800 Subject: [PATCH 071/116] test_22 From 4418735bd3348fff5a6d7e8cc33431a6e5c17bc7 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 19:44:10 +0800 Subject: [PATCH 072/116] test_23 From 42b0ca2b5c15a6da093b5f36b88b5a1936b9a67e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 20:04:14 +0800 Subject: [PATCH 073/116] test_24 From 9f9d2a7c2ce4da679142e5309472e34c73c005b4 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 20:24:19 +0800 Subject: [PATCH 074/116] test_25 From 41f35c2ea0e520e7b0b94a6993ed2a44ad59150e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 20:37:07 +0800 Subject: [PATCH 075/116] make test stable Signed-off-by: lhy1024 --- tests/integrations/mcs/scheduling/api_test.go | 17 ++++++++++++----- tests/server/config/config_test.go | 10 +++++----- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/integrations/mcs/scheduling/api_test.go b/tests/integrations/mcs/scheduling/api_test.go index d26cec04f2c..fe6ae325056 100644 --- a/tests/integrations/mcs/scheduling/api_test.go +++ b/tests/integrations/mcs/scheduling/api_test.go @@ -282,12 +282,19 @@ func TestConfigForward(t *testing.T) { opts := sche.GetPersistConfig() var cfg map[string]interface{} addr := cluster.GetLeaderServer().GetAddr() - - // Test config urlPrefix := fmt.Sprintf("%s/pd/api/v1/config", addr) - testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) - re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], - float64(opts.GetLeaderScheduleLimit())) + + // Test config forward + // Expect to get same config in scheduling server and api server + testutil.Eventually(re, func() bool { + testutil.ReadGetJSON(re, testDialClient, urlPrefix, &cfg) + re.Equal(cfg["schedule"].(map[string]interface{})["leader-schedule-limit"], + float64(opts.GetLeaderScheduleLimit())) + re.Equal(cfg["replication"].(map[string]interface{})["max-replicas"], + float64(opts.GetReplicationConfig().MaxReplicas)) + schedulers := cfg["schedule"].(map[string]interface{})["schedulers-payload"].(map[string]interface{}) + return len(schedulers) == 5 + }) // Test to change config only in scheduling server // Expect to get new config in scheduling server but not old config in api server diff --git a/tests/server/config/config_test.go b/tests/server/config/config_test.go index a84091d9741..8d8cf40e692 100644 --- a/tests/server/config/config_test.go +++ b/tests/server/config/config_test.go @@ -103,8 +103,11 @@ func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { addr := fmt.Sprintf("%s/pd/api/v1/config", urlPrefix) cfg := &config.Config{} - err := tu.ReadGetJSON(re, testDialClient, addr, cfg) - suite.NoError(err) + tu.Eventually(re, func() bool { + err := tu.ReadGetJSON(re, testDialClient, addr, cfg) + suite.NoError(err) + return cfg.PDServerCfg.DashboardAddress != "auto" + }) // the original way r := map[string]int{"max-replicas": 5} @@ -163,9 +166,6 @@ func (suite *configTestSuite) checkConfigAll(cluster *tests.TestCluster) { cfg.Log.Level = "warn" cfg.ReplicationMode.DRAutoSync.LabelKey = "foobar" cfg.ReplicationMode.ReplicationMode = "dr-auto-sync" - if cfg.PDServerCfg.DashboardAddress == "auto" { - cfg.PDServerCfg.DashboardAddress = newCfg1.PDServerCfg.DashboardAddress - } v, err := versioninfo.ParseVersion("v4.0.0-beta") suite.NoError(err) cfg.ClusterVersion = *v From cbfeb9948e1cd6de251df58df9bf1d71de9c3b3e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 20:44:23 +0800 Subject: [PATCH 076/116] test_26 From 55537eaac03ee0083fec96b22c526f1198f09e9c Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 21:04:28 +0800 Subject: [PATCH 077/116] test_27 From df6fb93fb8d7a216e9b50764563f643f9c7f5e42 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 21:24:32 +0800 Subject: [PATCH 078/116] test_28 From 49bebcf37008c7f3e50a3e8134d80f6d46ea59fd Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 21:44:37 +0800 Subject: [PATCH 079/116] test_29 From 24bd8f86b8d9fdbe3958bf6f0a1f620a8e498703 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 22:04:41 +0800 Subject: [PATCH 080/116] test_30 From f9dfa89cd4cc1246b180da43f07be436dc9883d0 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 22:24:45 +0800 Subject: [PATCH 081/116] test_31 From d9266d625286aab6cfe0c9b59de4e2520f2f1e6e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 22:44:50 +0800 Subject: [PATCH 082/116] test_32 From 795ecd6bbbf5e27e43813499852dc31959a4d7ec Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 23:04:54 +0800 Subject: [PATCH 083/116] test_33 From 2f3501577cff27f8a2734e7add9828b2c5fdd8a0 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 23:24:58 +0800 Subject: [PATCH 084/116] test_34 From 20a7057aa51f855b8957776d567175c615dbfa67 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Thu, 2 Nov 2023 23:45:03 +0800 Subject: [PATCH 085/116] test_35 From 587dcf4797666f9fa124bfe194e29dc5b880b57e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 00:05:08 +0800 Subject: [PATCH 086/116] test_36 From 58d42ff54600fd7c6dca11d08879c0b67a193857 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 00:25:12 +0800 Subject: [PATCH 087/116] test_37 From d75e0c8d6b825f16a61a875df66dd71da8a760a0 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 00:45:16 +0800 Subject: [PATCH 088/116] test_38 From 9828981ad460205a1d9a84a44d515e74a01b07ea Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 01:05:20 +0800 Subject: [PATCH 089/116] test_39 From c31b0d9e9068f8c8284e909ab5981320111c22b5 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 01:25:25 +0800 Subject: [PATCH 090/116] test_40 From d8ef237ac95913f4744d050a27b36baff598751e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 01:45:29 +0800 Subject: [PATCH 091/116] test_41 From 1c070527a490311409fd6e58a047e98f22ce68be Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 02:05:33 +0800 Subject: [PATCH 092/116] test_42 From 97bf7aa87d5ef08d58f7c3bf5242ee6179b4935d Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 02:25:38 +0800 Subject: [PATCH 093/116] test_43 From f56f78c609e33c787bf8ba7a4a66c063a0022a57 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 02:45:42 +0800 Subject: [PATCH 094/116] test_44 From 9cbbe37f86aacb07595e43b8425d7627c569bc30 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 03:05:46 +0800 Subject: [PATCH 095/116] test_45 From dea7a6b74542236340976ffad479233b53f3870e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 03:25:50 +0800 Subject: [PATCH 096/116] test_46 From ae99160137d28264546f8a647099a4a12a37a345 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 03:45:55 +0800 Subject: [PATCH 097/116] test_47 From 077c0f9e99a3cd87aebe9ede984430168520dd42 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 04:05:59 +0800 Subject: [PATCH 098/116] test_48 From 48feff3c3360018b0ac33c6a04f7962c8a6288c7 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 04:26:03 +0800 Subject: [PATCH 099/116] test_49 From 23e883b0e5f03eba02f8cd99e243dadff20b55df Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 04:46:08 +0800 Subject: [PATCH 100/116] test_50 From a7c1b73d31d869391e3a0da50655ca2211b6f630 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 05:06:12 +0800 Subject: [PATCH 101/116] test_51 From f08e5405b194e41c6b25e8e0f0ca5fd9af8304d3 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 05:26:16 +0800 Subject: [PATCH 102/116] test_52 From 98840ac7ca4e7ceb8c8e6b51f548a6a90cf7745e Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 05:46:20 +0800 Subject: [PATCH 103/116] test_53 From b976d4942dee77a3c15e694e5eb9461b8179cdde Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 06:06:25 +0800 Subject: [PATCH 104/116] test_54 From 53982c66d97aedc4b523e7a92b1653c66bdab270 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 06:26:29 +0800 Subject: [PATCH 105/116] test_55 From 118da11e8b7a0f44aaff9ad14641b3fab52b938c Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 06:46:33 +0800 Subject: [PATCH 106/116] test_56 From 9786aa34fb70ee625b53ba179d11c261d7b512da Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 07:06:37 +0800 Subject: [PATCH 107/116] test_57 From e0202fae9225097102557cca9c8980e704687a74 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 07:26:42 +0800 Subject: [PATCH 108/116] test_58 From 4798d63b34c069989af1b3c4e7a819dad16e2497 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 07:46:46 +0800 Subject: [PATCH 109/116] test_59 From fa07113438e9b809cfe6fd55afc20f9f61251059 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 08:06:50 +0800 Subject: [PATCH 110/116] test_60 From 30e09fa159849d722c606b4d9182cf7cf360022c Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 08:26:54 +0800 Subject: [PATCH 111/116] test_61 From 39c25a713c93be11fd8fe7e211c9ba17b9252a6b Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 08:46:58 +0800 Subject: [PATCH 112/116] test_62 From 580ba585cabd77aaa07d8c249e562495a41e3b16 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 09:07:02 +0800 Subject: [PATCH 113/116] test_63 From 09f5f67b1659fa2dd211e6eb5c17756d857b7a96 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 09:27:07 +0800 Subject: [PATCH 114/116] test_64 From c37afc673696f5730c93c0d3c62b2f644eeda06b Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 09:47:11 +0800 Subject: [PATCH 115/116] test_65 From 87b4463e9bd462017f39dc758f0c58ce21bceae7 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Fri, 3 Nov 2023 10:07:16 +0800 Subject: [PATCH 116/116] test_66