diff --git a/Makefile b/Makefile index f6a1c9a809a..99d6d680a7a 100644 --- a/Makefile +++ b/Makefile @@ -148,7 +148,7 @@ static: install-tools @ echo "gofmt ..." @ gofmt -s -l -d $(PACKAGE_DIRECTORIES) 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' @ echo "golangci-lint ..." - @ golangci-lint run --verbose $(PACKAGE_DIRECTORIES) + @ golangci-lint run --verbose $(PACKAGE_DIRECTORIES) --allow-parallel-runners @ echo "revive ..." @ revive -formatter friendly -config revive.toml $(PACKAGES) diff --git a/client/Makefile b/client/Makefile index b55b6fc8d9d..93c7f247b36 100644 --- a/client/Makefile +++ b/client/Makefile @@ -32,8 +32,15 @@ install-tools: static: install-tools @ gofmt -s -l -d . 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' +<<<<<<< HEAD @ golangci-lint run -c ../.golangci.yml ./... @ revive -formatter friendly -config ../revive.toml . +======= + @ echo "golangci-lint ..." + @ golangci-lint run -c ../.golangci.yml --verbose ./... --allow-parallel-runners + @ echo "revive ..." + @ revive -formatter friendly -config ../revive.toml ./... +>>>>>>> f916e90eb (rule_checker: can replace unhealthPeer with orphanPeer (#6831)) tidy: @ go mod tidy diff --git a/server/api/operator_test.go b/server/api/operator_test.go index 4b30f9f5e16..973f679f098 100644 --- a/server/api/operator_test.go +++ b/server/api/operator_test.go @@ -217,7 +217,13 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul regionURL := fmt.Sprintf("%s/operators/%d", suite.urlPrefix, region.GetId()) operator := mustReadURL(re, regionURL) suite.Contains(operator, "operator not found") - + convertStepsToStr := func(steps []string) string { + stepStrs := make([]string, len(steps)) + for i := range steps { + stepStrs[i] = fmt.Sprintf("%d:{%s}", i, steps[i]) + } + return strings.Join(stepStrs, ", ") + } testCases := []struct { name string placementRuleEnable bool @@ -231,25 +237,25 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul placementRuleEnable: false, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3]}`), expectedError: nil, - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 1}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 1}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), - }, ", "), + }), }, { name: "placement rule disable with peer role", placementRuleEnable: false, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectedError: nil, - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 2}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 2}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 2}.String(), pdoperator.TransferLeader{FromStore: 2, ToStore: 3}.String(), - }, ", "), + }), }, { name: "default placement rule without peer role", @@ -262,13 +268,13 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul name: "default placement rule with peer role", placementRuleEnable: true, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 3}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 3}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), pdoperator.TransferLeader{FromStore: 2, ToStore: 3}.String(), - }, ", "), + }), }, { name: "default placement rule with invalid input", @@ -323,12 +329,12 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul }, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectedError: nil, - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 5}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 5}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 3}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), - }, ", "), + }), }, { name: "customized placement rule with valid peer role2", @@ -363,12 +369,12 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul }, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["leader", "follower"]}`), expectedError: nil, - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 6}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 6}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), - }, ", "), + }), }, } for _, testCase := range testCases { diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 501de81e46d..ad6d434055f 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -42,6 +42,38 @@ var ( errPeerCannotBeWitness = errors.New("peer cannot be witness") errNoNewLeader = errors.New("no new leader") errRegionNoLeader = errors.New("region no leader") +<<<<<<< HEAD:server/schedule/checker/rule_checker.go +======= + // WithLabelValues is a heavy operation, define variable to avoid call it every time. + ruleCheckerCounter = checkerCounter.WithLabelValues(ruleChecker, "check") + ruleCheckerPausedCounter = checkerCounter.WithLabelValues(ruleChecker, "paused") + ruleCheckerRegionNoLeaderCounter = checkerCounter.WithLabelValues(ruleChecker, "region-no-leader") + ruleCheckerGetCacheCounter = checkerCounter.WithLabelValues(ruleChecker, "get-cache") + ruleCheckerNeedSplitCounter = checkerCounter.WithLabelValues(ruleChecker, "need-split") + ruleCheckerSetCacheCounter = checkerCounter.WithLabelValues(ruleChecker, "set-cache") + ruleCheckerReplaceDownCounter = checkerCounter.WithLabelValues(ruleChecker, "replace-down") + ruleCheckerPromoteWitnessCounter = checkerCounter.WithLabelValues(ruleChecker, "promote-witness") + ruleCheckerReplaceOfflineCounter = checkerCounter.WithLabelValues(ruleChecker, "replace-offline") + ruleCheckerAddRulePeerCounter = checkerCounter.WithLabelValues(ruleChecker, "add-rule-peer") + ruleCheckerNoStoreAddCounter = checkerCounter.WithLabelValues(ruleChecker, "no-store-add") + ruleCheckerNoStoreReplaceCounter = checkerCounter.WithLabelValues(ruleChecker, "no-store-replace") + ruleCheckerFixPeerRoleCounter = checkerCounter.WithLabelValues(ruleChecker, "fix-peer-role") + ruleCheckerFixLeaderRoleCounter = checkerCounter.WithLabelValues(ruleChecker, "fix-leader-role") + ruleCheckerNotAllowLeaderCounter = checkerCounter.WithLabelValues(ruleChecker, "not-allow-leader") + ruleCheckerFixFollowerRoleCounter = checkerCounter.WithLabelValues(ruleChecker, "fix-follower-role") + ruleCheckerNoNewLeaderCounter = checkerCounter.WithLabelValues(ruleChecker, "no-new-leader") + ruleCheckerDemoteVoterRoleCounter = checkerCounter.WithLabelValues(ruleChecker, "demote-voter-role") + ruleCheckerRecentlyPromoteToNonWitnessCounter = checkerCounter.WithLabelValues(ruleChecker, "recently-promote-to-non-witness") + ruleCheckerCancelSwitchToWitnessCounter = checkerCounter.WithLabelValues(ruleChecker, "cancel-switch-to-witness") + ruleCheckerSetVoterWitnessCounter = checkerCounter.WithLabelValues(ruleChecker, "set-voter-witness") + ruleCheckerSetLearnerWitnessCounter = checkerCounter.WithLabelValues(ruleChecker, "set-learner-witness") + ruleCheckerSetVoterNonWitnessCounter = checkerCounter.WithLabelValues(ruleChecker, "set-voter-non-witness") + ruleCheckerSetLearnerNonWitnessCounter = checkerCounter.WithLabelValues(ruleChecker, "set-learner-non-witness") + ruleCheckerMoveToBetterLocationCounter = checkerCounter.WithLabelValues(ruleChecker, "move-to-better-location") + ruleCheckerSkipRemoveOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "skip-remove-orphan-peer") + ruleCheckerRemoveOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "remove-orphan-peer") + ruleCheckerReplaceOrphanPeerCounter = checkerCounter.WithLabelValues(ruleChecker, "replace-orphan-peer") +>>>>>>> f916e90eb (rule_checker: can replace unhealthPeer with orphanPeer (#6831)):pkg/schedule/checker/rule_checker.go ) const maxPendingListLen = 100000 @@ -390,14 +422,15 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg if len(fit.OrphanPeers) == 0 { return nil, nil } + var pinDownPeer *metapb.Peer isUnhealthyPeer := func(id uint64) bool { - for _, pendingPeer := range region.GetPendingPeers() { - if pendingPeer.GetId() == id { + for _, downPeer := range region.GetDownPeers() { + if downPeer.Peer.GetId() == id { return true } } - for _, downPeer := range region.GetDownPeers() { - if downPeer.Peer.GetId() == id { + for _, pendingPeer := range region.GetPendingPeers() { + if pendingPeer.GetId() == id { return true } } @@ -414,16 +447,56 @@ loopFits: } for _, p := range rf.Peers { if isUnhealthyPeer(p.GetId()) { + // make sure is down peer. + if region.GetDownPeer(p.GetId()) != nil { + pinDownPeer = p + } hasUnhealthyFit = true break loopFits } } } + // If hasUnhealthyFit is false, it is safe to delete the OrphanPeer. if !hasUnhealthyFit { checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc() return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, fit.OrphanPeers[0].StoreId) } + + // try to use orphan peers to replace unhealthy down peers. + for _, orphanPeer := range fit.OrphanPeers { + if pinDownPeer != nil { + // make sure the orphan peer is healthy. + if isUnhealthyPeer(orphanPeer.GetId()) { + continue + } + // no consider witness in this path. + if pinDownPeer.GetIsWitness() || orphanPeer.GetIsWitness() { + continue + } + // down peer's store should be down. + if !c.isStoreDownTimeHitMaxDownTime(pinDownPeer.GetStoreId()) { + continue + } + // check if down peer can replace with orphan peer. + dstStore := c.cluster.GetStore(orphanPeer.GetStoreId()) + if fit.Replace(pinDownPeer.GetStoreId(), dstStore) { + destRole := pinDownPeer.GetRole() + orphanPeerRole := orphanPeer.GetRole() + ruleCheckerReplaceOrphanPeerCounter.Inc() + switch { + case orphanPeerRole == metapb.PeerRole_Learner && destRole == metapb.PeerRole_Voter: + return operator.CreatePromoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) + case orphanPeerRole == metapb.PeerRole_Voter && destRole == metapb.PeerRole_Learner: + return operator.CreateDemoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) + default: + // destRole should not same with orphanPeerRole. if role is same, it fit with orphanPeer should be better than now. + // destRole never be leader, so we not consider it. + } + } + } + } + // If hasUnhealthyFit is true, try to remove unhealthy orphan peers only if number of OrphanPeers is >= 2. // Ref https://github.com/tikv/pd/issues/4045 if len(fit.OrphanPeers) >= 2 { @@ -462,7 +535,15 @@ func (c *RuleChecker) isDownPeer(region *core.RegionInfo, peer *metapb.Peer) boo func (c *RuleChecker) isStoreDownTimeHitMaxDownTime(storeID uint64) bool { store := c.cluster.GetStore(storeID) +<<<<<<< HEAD:server/schedule/checker/rule_checker.go return store.DownTime() >= c.cluster.GetOpts().GetMaxStoreDownTime() +======= + if store == nil { + log.Warn("lost the store, maybe you are recovering the PD cluster", zap.Uint64("store-id", storeID)) + return false + } + return store.DownTime() >= c.cluster.GetCheckerConfig().GetMaxStoreDownTime() +>>>>>>> f916e90eb (rule_checker: can replace unhealthPeer with orphanPeer (#6831)):pkg/schedule/checker/rule_checker.go } func (c *RuleChecker) isOfflinePeer(peer *metapb.Peer) bool { diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index f1fe4babbf1..6451d2d5de6 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -361,7 +361,6 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness() { op := suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) suite.Equal("add-rule-peer", op.Desc()) - fmt.Println(op) suite.Equal(uint64(3), op.Step(0).(operator.AddLearner).ToStore) suite.True(op.Step(0).(operator.AddLearner).IsWitness) } @@ -685,6 +684,132 @@ func (suite *ruleCheckerTestSuite) TestPriorityFixOrphanPeer() { suite.Equal("remove-orphan-peer", op.Desc()) } +func (suite *ruleCheckerTestSuite) TestPriorityFitHealthWithDifferentRole1() { + suite.cluster.SetEnableUseJointConsensus(true) + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4}) + r1 := suite.cluster.GetRegion(1) + suite.cluster.GetStore(3).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + + // set peer3 to pending and down + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetPeer(3)})) + r1 = r1.Clone(core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 30000, + }, + })) + suite.cluster.PutRegion(r1) + + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Equal(uint64(3), op.Step(0).(operator.ChangePeerV2Enter).DemoteVoters[0].ToStore) + suite.Equal(uint64(4), op.Step(0).(operator.ChangePeerV2Enter).PromoteLearners[0].ToStore) + suite.Equal(uint64(3), op.Step(1).(operator.ChangePeerV2Leave).DemoteVoters[0].ToStore) + suite.Equal(uint64(4), op.Step(1).(operator.ChangePeerV2Leave).PromoteLearners[0].ToStore) + suite.Equal("replace-down-peer-with-orphan-peer", op.Desc()) + + // set peer3 only pending + r1 = r1.Clone(core.WithDownPeers(nil)) + suite.cluster.PutRegion(r1) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Nil(op) +} + +func (suite *ruleCheckerTestSuite) TestPriorityFitHealthWithDifferentRole2() { + suite.cluster.SetEnableUseJointConsensus(true) + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3, 4, 5) + r1 := suite.cluster.GetRegion(1) + + // set peer3 to pending and down, and peer 3 to learner, and store 3 is down + suite.cluster.GetStore(3).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + r1 = r1.Clone(core.WithLearners([]*metapb.Peer{r1.GetPeer(3)})) + r1 = r1.Clone( + core.WithPendingPeers([]*metapb.Peer{r1.GetPeer(3)}), + core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 30000, + }, + }), + ) + suite.cluster.PutRegion(r1) + + // default and test group => 3 voter + 1 learner + err := suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "test", + ID: "10", + Role: placement.Learner, + Count: 1, + }) + suite.NoError(err) + + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Equal(uint64(5), op.Step(0).(operator.ChangePeerV2Enter).DemoteVoters[0].ToStore) + suite.Equal(uint64(3), op.Step(1).(operator.RemovePeer).FromStore) + suite.Equal("replace-down-peer-with-orphan-peer", op.Desc()) +} + +func (suite *ruleCheckerTestSuite) TestPriorityFitHealthPeersAndTiFlash() { + suite.cluster.SetEnableUseJointConsensus(true) + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4", "engine": "tiflash"}) + suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4}) + rule := &placement.Rule{ + GroupID: "pd", + ID: "test", + Role: placement.Voter, + Count: 3, + } + rule2 := &placement.Rule{ + GroupID: "pd", + ID: "test2", + Role: placement.Learner, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "engine", + Op: placement.In, + Values: []string{"tiflash"}, + }, + }, + } + suite.ruleManager.SetRule(rule) + suite.ruleManager.SetRule(rule2) + suite.ruleManager.DeleteRule("pd", "default") + + r1 := suite.cluster.GetRegion(1) + // set peer3 to pending and down + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetPeer(3)})) + r1 = r1.Clone(core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 30000, + }, + })) + suite.cluster.PutRegion(r1) + suite.cluster.GetStore(3).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + + op := suite.rc.Check(suite.cluster.GetRegion(1)) + // should not promote tiflash peer + suite.Nil(op) + + // scale a node, can replace the down peer + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("fast-replace-rule-down-peer", op.Desc()) +} + func (suite *ruleCheckerTestSuite) TestIssue3293() { suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) diff --git a/server/schedule/operator/builder.go b/server/schedule/operator/builder.go index 4dfb98324e5..93c4048e79d 100644 --- a/server/schedule/operator/builder.go +++ b/server/schedule/operator/builder.go @@ -403,7 +403,6 @@ func (b *Builder) Build(kind OpKind) (*Operator, error) { if brief, b.err = b.prepareBuild(); b.err != nil { return nil, b.err } - if b.useJointConsensus { kind, b.err = b.buildStepsWithJointConsensus(kind) } else { @@ -549,6 +548,10 @@ func (b *Builder) brief() string { return fmt.Sprintf("%s: store %s to %s", op, b.toRemove, b.toAdd) case len(b.toAdd) > 0: return fmt.Sprintf("add peer: store %s", b.toAdd) + case len(b.toRemove) > 0 && len(b.toPromote) > 0: + return fmt.Sprintf("promote peer: store %s, rm peer: store %s", b.toRemove, b.toPromote) + case len(b.toRemove) > 0 && len(b.toDemote) > 0: + return fmt.Sprintf("demote peer: store %s, rm peer: store %s", b.toDemote, b.toRemove) case len(b.toRemove) > 0: return fmt.Sprintf("rm peer: store %s", b.toRemove) case len(b.toPromote) > 0: diff --git a/server/schedule/operator/create_operator.go b/server/schedule/operator/create_operator.go index 206d839ab28..0f299839a8d 100644 --- a/server/schedule/operator/create_operator.go +++ b/server/schedule/operator/create_operator.go @@ -50,6 +50,25 @@ func CreatePromoteLearnerOperator(desc string, ci ClusterInformer, region *core. Build(0) } +// CreatePromoteLearnerOperatorAndRemovePeer creates an operator that promotes a learner and removes a peer. +func CreatePromoteLearnerOperatorAndRemovePeer(desc string, ci sche.SharedCluster, region *core.RegionInfo, toPromote *metapb.Peer, toRemove *metapb.Peer) (*Operator, error) { + return NewBuilder(desc, ci, region). + PromoteLearner(toPromote.GetStoreId()). + RemovePeer(toRemove.GetStoreId()). + Build(0) +} + +// CreateDemoteLearnerOperatorAndRemovePeer creates an operator that demotes a learner and removes a peer. +func CreateDemoteLearnerOperatorAndRemovePeer(desc string, ci sche.SharedCluster, region *core.RegionInfo, toDemote *metapb.Peer, toRemove *metapb.Peer) (*Operator, error) { + if !ci.GetSharedConfig().IsUseJointConsensus() { + return nil, errors.Errorf("cannot build demote learner operator due to disabling using joint state") + } + return NewBuilder(desc, ci, region). + DemoteVoter(toDemote.GetStoreId()). + RemovePeer(toRemove.GetStoreId()). + Build(0) +} + // CreateRemovePeerOperator creates an operator that removes a peer from region. func CreateRemovePeerOperator(desc string, ci ClusterInformer, kind OpKind, region *core.RegionInfo, storeID uint64) (*Operator, error) { return NewBuilder(desc, ci, region). @@ -238,7 +257,7 @@ func CreateLeaveJointStateOperator(desc string, ci ClusterInformer, origin *core b := NewBuilder(desc, ci, origin, SkipOriginJointStateCheck, SkipPlacementRulesCheck) if b.err == nil && !core.IsInJointState(origin.GetPeers()...) { - b.err = errors.Errorf("cannot build leave joint state operator for region which is not in joint state") + b.err = errors.Errorf("cannot build leave joint state operator due to disabling using joint state") } if b.err != nil { diff --git a/server/schedule/operator/operator.go b/server/schedule/operator/operator.go index 3fae9d86eea..b38f4555d6c 100644 --- a/server/schedule/operator/operator.go +++ b/server/schedule/operator/operator.go @@ -87,7 +87,7 @@ func (o *Operator) Sync(other *Operator) { func (o *Operator) String() string { stepStrs := make([]string, len(o.steps)) for i := range o.steps { - stepStrs[i] = o.steps[i].String() + stepStrs[i] = fmt.Sprintf("%d:{%s}", i, o.steps[i].String()) } s := fmt.Sprintf("%s {%s} (kind:%s, region:%v(%v, %v), createAt:%s, startAt:%s, currentStep:%v, size:%d, steps:[%s],timeout:[%s])", o.desc, o.brief, o.kind, o.regionID, o.regionEpoch.GetVersion(), o.regionEpoch.GetConfVer(), o.GetCreateTime(), diff --git a/server/schedule/placement/fit.go b/server/schedule/placement/fit.go index 82af3c17d11..454715cdc8e 100644 --- a/server/schedule/placement/fit.go +++ b/server/schedule/placement/fit.go @@ -56,6 +56,9 @@ func (f *RegionFit) IsCached() bool { // Replace return true if the replacement store is fit all constraints and isolation score is not less than the origin. func (f *RegionFit) Replace(srcStoreID uint64, dstStore *core.StoreInfo) bool { + if dstStore == nil { + return false + } fit := f.getRuleFitByStoreID(srcStoreID) // check the target store is fit all constraints. if fit == nil { diff --git a/tests/client/Makefile b/tests/client/Makefile index 91c9e708073..745cbfd1c47 100644 --- a/tests/client/Makefile +++ b/tests/client/Makefile @@ -18,8 +18,15 @@ SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) static: install-tools @ gofmt -s -l -d . 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' +<<<<<<< HEAD:tests/client/Makefile @ golangci-lint run ./... @ revive -formatter friendly -config ../../revive.toml . +======= + @ echo "golangci-lint ..." + @ golangci-lint run -c $(ROOT_PATH)/.golangci.yml --verbose ./... --allow-parallel-runners + @ echo "revive ..." + @ revive -formatter friendly -config $(ROOT_PATH)/revive.toml ./... +>>>>>>> f916e90eb (rule_checker: can replace unhealthPeer with orphanPeer (#6831)):tests/integrations/client/Makefile tidy: @ go mod tidy diff --git a/tests/integrations/mcs/Makefile b/tests/integrations/mcs/Makefile new file mode 100644 index 00000000000..01e63b5baec --- /dev/null +++ b/tests/integrations/mcs/Makefile @@ -0,0 +1,49 @@ +# Copyright 2023 TiKV Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ROOT_PATH := ../../.. +GO_TOOLS_BIN_PATH := $(ROOT_PATH)/.tools/bin +PATH := $(GO_TOOLS_BIN_PATH):$(PATH) +SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) + +static: install-tools + @ echo "gofmt ..." + @ gofmt -s -l -d . 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' + @ echo "golangci-lint ..." + @ golangci-lint run -c $(ROOT_PATH)/.golangci.yml --verbose ./... --allow-parallel-runners + @ echo "revive ..." + @ revive -formatter friendly -config $(ROOT_PATH)/revive.toml ./... + +tidy: + @ go mod tidy + git diff go.mod go.sum | cat + git diff --quiet go.mod go.sum + +test: failpoint-enable + CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover || { $(MAKE) failpoint-disable && exit 1; } + $(MAKE) failpoint-disable + +ci-test-job: + CGO_ENABLED=1 go test ./... -v -timeout=15m -tags deadlock -race -covermode=atomic -coverprofile=covprofile -coverpkg=$(ROOT_PATH)/... github.com/tikv/pd/tests/integrations/mcs + +install-tools: + cd $(ROOT_PATH) && $(MAKE) install-tools + +failpoint-enable: + cd $(ROOT_PATH) && $(MAKE) failpoint-enable + go mod tidy + +failpoint-disable: + cd $(ROOT_PATH) && $(MAKE) failpoint-disable + go mod tidy diff --git a/tests/integrations/tso/Makefile b/tests/integrations/tso/Makefile new file mode 100644 index 00000000000..e353f686fe7 --- /dev/null +++ b/tests/integrations/tso/Makefile @@ -0,0 +1,49 @@ +# Copyright 2023 TiKV Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ROOT_PATH := ../../.. +GO_TOOLS_BIN_PATH := $(ROOT_PATH)/.tools/bin +PATH := $(GO_TOOLS_BIN_PATH):$(PATH) +SHELL := env PATH='$(PATH)' GOBIN='$(GO_TOOLS_BIN_PATH)' $(shell which bash) + +static: install-tools + @ echo "gofmt ..." + @ gofmt -s -l -d . 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' + @ echo "golangci-lint ..." + @ golangci-lint run -c $(ROOT_PATH)/.golangci.yml --verbose ./... --allow-parallel-runners + @ echo "revive ..." + @ revive -formatter friendly -config $(ROOT_PATH)/revive.toml ./... + +tidy: + @ go mod tidy + git diff go.mod go.sum | cat + git diff --quiet go.mod go.sum + +test: failpoint-enable + CGO_ENABLED=1 go test ./... -v -tags deadlock -race -cover || { $(MAKE) failpoint-disable && exit 1; } + $(MAKE) failpoint-disable + +ci-test-job: + CGO_ENABLED=1 go test ./... -v -tags deadlock -race -covermode=atomic -coverprofile=covprofile -coverpkg=$(ROOT_PATH)/... github.com/tikv/pd/tests/integrations/tso + +install-tools: + cd $(ROOT_PATH) && $(MAKE) install-tools + +failpoint-enable: + cd $(ROOT_PATH) && $(MAKE) failpoint-enable + go mod tidy + +failpoint-disable: + cd $(ROOT_PATH) && $(MAKE) failpoint-disable + go mod tidy