From 11b752e0bfdf0167029667a33bfc5397f311db38 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Thu, 14 Sep 2023 02:15:08 +0800 Subject: [PATCH] scatter: fix the unexpected error code (#7075) close tikv/pd#7073 scatter: fix the unexpected error code Signed-off-by: nolouch Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com> --- errors.toml | 5 ++++ pkg/errs/errno.go | 1 + pkg/schedule/scatter/region_scatterer.go | 14 +++++----- pkg/schedule/scatter/region_scatterer_test.go | 26 ++++++++++++++----- server/grpc_service.go | 19 +++++--------- 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/errors.toml b/errors.toml index 95324df1888..a0040195e5d 100644 --- a/errors.toml +++ b/errors.toml @@ -111,6 +111,11 @@ error = ''' failed to get the source store ''' +["PD:common:ErrGetTargetStore"] +error = ''' +failed to get the target store +''' + ["PD:common:ErrIncorrectSystemTime"] error = ''' incorrect system time diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 91cd4a78c4f..a077751f561 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -34,6 +34,7 @@ const ( // common error in multiple packages var ( ErrGetSourceStore = errors.Normalize("failed to get the source store", errors.RFCCodeText("PD:common:ErrGetSourceStore")) + ErrGetTargetStore = errors.Normalize("failed to get the target store", errors.RFCCodeText("PD:common:ErrGetTargetStore")) ErrIncorrectSystemTime = errors.Normalize("incorrect system time", errors.RFCCodeText("PD:common:ErrIncorrectSystemTime")) ) diff --git a/pkg/schedule/scatter/region_scatterer.go b/pkg/schedule/scatter/region_scatterer.go index a676afca6cf..c47bcd27e91 100644 --- a/pkg/schedule/scatter/region_scatterer.go +++ b/pkg/schedule/scatter/region_scatterer.go @@ -284,10 +284,10 @@ func (r *RegionScatterer) Scatter(region *core.RegionInfo, group string, skipSto return nil, errors.Errorf("region %d is hot", region.GetID()) } - return r.scatterRegion(region, group, skipStoreLimit), nil + return r.scatterRegion(region, group, skipStoreLimit) } -func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string, skipStoreLimit bool) *operator.Operator { +func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string, skipStoreLimit bool) (*operator.Operator, error) { engineFilter := filter.NewEngineFilter(r.name, filter.NotSpecialEngines) ordinaryPeers := make(map[uint64]*metapb.Peer, len(region.GetPeers())) specialPeers := make(map[string]map[uint64]*metapb.Peer) @@ -296,7 +296,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string, s for _, peer := range region.GetPeers() { store := r.cluster.GetStore(peer.GetStoreId()) if store == nil { - return nil + return nil, errs.ErrGetSourceStore.FastGenByArgs(fmt.Sprintf("store not found, peer: %v, region id: %d", peer, region.GetID())) } if engineFilter.Target(r.cluster.GetSharedConfig(), store).IsOK() { ordinaryPeers[peer.GetStoreId()] = peer @@ -358,7 +358,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string, s targetLeader, leaderStorePickedCount := r.selectAvailableLeaderStore(group, region, leaderCandidateStores, r.ordinaryEngine) if targetLeader == 0 { scatterSkipNoLeaderCounter.Inc() - return nil + return nil, errs.ErrGetTargetStore.FastGenByArgs(fmt.Sprintf("no target leader store found, region: %v", region)) } for engine, peers := range specialPeers { @@ -375,7 +375,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string, s if isSameDistribution(region, targetPeers, targetLeader) { scatterUnnecessaryCounter.Inc() r.Put(targetPeers, targetLeader, group) - return nil + return nil, nil } op, err := operator.CreateScatterRegionOperator("scatter-region", r.cluster, region, targetPeers, targetLeader, skipStoreLimit) if err != nil { @@ -385,7 +385,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string, s } r.Put(targetPeers, region.GetLeader().GetStoreId(), group) log.Debug("fail to create scatter region operator", errs.ZapError(err)) - return nil + return nil, errs.ErrCreateOperator.FastGenByArgs(fmt.Sprintf("failed to create scatter region operator for region %v", region.GetID())) } if op != nil { scatterSuccessCounter.Inc() @@ -394,7 +394,7 @@ func (r *RegionScatterer) scatterRegion(region *core.RegionInfo, group string, s op.AdditionalInfos["leader-picked-count"] = strconv.FormatUint(leaderStorePickedCount, 10) op.SetPriorityLevel(constant.High) } - return op + return op, nil } func allowLeader(fit *placement.RegionFit, peer *metapb.Peer) bool { diff --git a/pkg/schedule/scatter/region_scatterer_test.go b/pkg/schedule/scatter/region_scatterer_test.go index 5bdbdcbf159..0fc7f0967d7 100644 --- a/pkg/schedule/scatter/region_scatterer_test.go +++ b/pkg/schedule/scatter/region_scatterer_test.go @@ -105,12 +105,18 @@ func scatter(re *require.Assertions, numStores, numRegions uint64, useRules bool tc.AddLeaderRegion(i, 1, 2, 3) } scatterer := NewRegionScatterer(ctx, tc, oc, tc.AddSuspectRegions) - + noNeedMoveNum := 0 for i := uint64(1); i <= numRegions; i++ { region := tc.GetRegion(i) - if op, _ := scatterer.Scatter(region, "", false); op != nil { + if op, err := scatterer.Scatter(region, "", false); err == nil { + if op == nil { + noNeedMoveNum++ + continue + } checkOperator(re, op) operator.ApplyOperator(tc, op) + } else { + re.Nil(op) } } @@ -140,6 +146,7 @@ func scatter(re *require.Assertions, numStores, numRegions uint64, useRules bool re.LessOrEqual(float64(count), 1.1*float64(numRegions)/float64(numStores)) re.GreaterOrEqual(float64(count), 0.9*float64(numRegions)/float64(numStores)) } + re.GreaterOrEqual(noNeedMoveNum, 0) } func scatterSpecial(re *require.Assertions, numOrdinaryStores, numSpecialStores, numRegions uint64) { @@ -651,7 +658,8 @@ func TestSelectedStoresTooFewPeers(t *testing.T) { // Try to scatter a region with peer store id 2/3/4 for i := uint64(1); i < 20; i++ { region := tc.AddLeaderRegion(i+200, i%3+2, (i+1)%3+2, (i+2)%3+2) - op := scatterer.scatterRegion(region, group, false) + op, err := scatterer.scatterRegion(region, group, false) + re.NoError(err) re.False(isPeerCountChanged(op)) if op != nil { re.Equal(group, op.AdditionalInfos["group"]) @@ -691,7 +699,8 @@ func TestSelectedStoresTooManyPeers(t *testing.T) { // test region with peer 1 2 3 for i := uint64(1); i < 20; i++ { region := tc.AddLeaderRegion(i+200, i%3+1, (i+1)%3+1, (i+2)%3+1) - op := scatterer.scatterRegion(region, group, false) + op, err := scatterer.scatterRegion(region, group, false) + re.NoError(err) re.False(isPeerCountChanged(op)) } } @@ -715,7 +724,8 @@ func TestBalanceLeader(t *testing.T) { scatterer := NewRegionScatterer(ctx, tc, oc, tc.AddSuspectRegions) for i := uint64(1001); i <= 1300; i++ { region := tc.AddLeaderRegion(i, 2, 3, 4) - op := scatterer.scatterRegion(region, group, false) + op, err := scatterer.scatterRegion(region, group, false) + re.NoError(err) re.False(isPeerCountChanged(op)) } // all leader will be balanced in three stores. @@ -745,7 +755,8 @@ func TestBalanceRegion(t *testing.T) { scatterer := NewRegionScatterer(ctx, tc, oc, tc.AddSuspectRegions) for i := uint64(1001); i <= 1300; i++ { region := tc.AddLeaderRegion(i, 2, 4, 6) - op := scatterer.scatterRegion(region, group, false) + op, err := scatterer.scatterRegion(region, group, false) + re.NoError(err) re.False(isPeerCountChanged(op)) } for i := uint64(2); i <= 7; i++ { @@ -754,7 +765,8 @@ func TestBalanceRegion(t *testing.T) { // Test for unhealthy region // ref https://github.com/tikv/pd/issues/6099 region := tc.AddLeaderRegion(1500, 2, 3, 4, 6) - op := scatterer.scatterRegion(region, group, false) + op, err := scatterer.scatterRegion(region, group, false) + re.NoError(err) re.False(isPeerCountChanged(op)) } diff --git a/server/grpc_service.go b/server/grpc_service.go index 973c45a622f..1d20bb22d4d 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -1709,18 +1709,13 @@ func (s *GrpcServer) ScatterRegion(ctx context.Context, request *pdpb.ScatterReg return nil, err } - if op == nil { - return &pdpb.ScatterRegionResponse{ - Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, - "operator could not be allocated"), - }, nil - } - - if !rc.GetOperatorController().AddOperator(op) { - return &pdpb.ScatterRegionResponse{ - Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, - "operator cancelled because store limit exceeded"), - }, nil + if op != nil { + if !rc.GetOperatorController().AddOperator(op) { + return &pdpb.ScatterRegionResponse{ + Header: s.wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, + "operator canceled because cannot add an operator to the execute queue"), + }, nil + } } return &pdpb.ScatterRegionResponse{