From 1bbec47bda2740fba22d0bf369d9a1dc6fa059da Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 4 Sep 2024 21:37:49 +0800 Subject: [PATCH] schedule: fix datarace in `operator.check` (#8264) (#8579) close tikv/pd#8263 Signed-off-by: husharp Co-authored-by: husharp Co-authored-by: Hu# --- server/schedule/operator/operator.go | 5 +++-- server/schedule/operator/operator_test.go | 24 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/server/schedule/operator/operator.go b/server/schedule/operator/operator.go index 0e760846e64..d4dfb5f929e 100644 --- a/server/schedule/operator/operator.go +++ b/server/schedule/operator/operator.go @@ -299,10 +299,11 @@ func (o *Operator) Check(region *core.RegionInfo) OpStep { defer func() { _ = o.CheckTimeout() }() for step := atomic.LoadInt32(&o.currentStep); int(step) < len(o.steps); step++ { if o.steps[int(step)].IsFinish(region) { - if atomic.CompareAndSwapInt64(&(o.stepsTime[step]), 0, time.Now().UnixNano()) { + current := time.Now() + if atomic.CompareAndSwapInt64(&(o.stepsTime[step]), 0, current.UnixNano()) { startTime, _ := o.getCurrentTimeAndStep() operatorStepDuration.WithLabelValues(reflect.TypeOf(o.steps[int(step)]).Name()). - Observe(time.Unix(0, o.stepsTime[step]).Sub(startTime).Seconds()) + Observe(current.Sub(startTime).Seconds()) } atomic.StoreInt32(&o.currentStep, step+1) } else { diff --git a/server/schedule/operator/operator_test.go b/server/schedule/operator/operator_test.go index 87e44e7a6c6..71fb5e43a9a 100644 --- a/server/schedule/operator/operator_test.go +++ b/server/schedule/operator/operator_test.go @@ -18,6 +18,7 @@ import ( "context" "encoding/json" "fmt" + "sync" "sync/atomic" "testing" "time" @@ -487,3 +488,26 @@ func (suite *operatorTestSuite) TestRecord() { suite.Equal(now, ob.FinishTime) suite.Greater(ob.duration.Seconds(), time.Second.Seconds()) } + +func (suite *operatorTestSuite) TestOperatorCheckConcurrently() { + region := suite.newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2}) + // addPeer1, transferLeader1, removePeer3 + steps := []OpStep{ + AddPeer{ToStore: 1, PeerID: 1}, + TransferLeader{FromStore: 3, ToStore: 1}, + RemovePeer{FromStore: 3}, + } + op := NewTestOperator(1, &metapb.RegionEpoch{}, OpAdmin|OpLeader|OpRegion, steps...) + suite.Equal(core.Urgent, op.GetPriorityLevel()) + suite.checkSteps(op, steps) + op.Start() + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + suite.Nil(op.Check(region)) + }() + } + wg.Wait() +}