Skip to content

Commit

Permalink
schedule: fix datarace in operator.check (#8264)
Browse files Browse the repository at this point in the history
close #8263

Signed-off-by: lhy1024 <[email protected]>
  • Loading branch information
lhy1024 authored Jun 7, 2024
1 parent f69d600 commit e767c01
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 3 deletions.
5 changes: 3 additions & 2 deletions pkg/schedule/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,10 +376,11 @@ func (o *Operator) Check(region *core.RegionInfo) OpStep {
defer func() { _ = o.CheckTimeout() }()
for step := atomic.LoadInt32(&o.currentStep); int(step) < len(o.steps); step++ {
if o.steps[int(step)].IsFinish(region) {
if atomic.CompareAndSwapInt64(&(o.stepsTime[step]), 0, time.Now().UnixNano()) {
current := time.Now()
if atomic.CompareAndSwapInt64(&(o.stepsTime[step]), 0, current.UnixNano()) {
startTime, _ := o.getCurrentTimeAndStep()
operatorStepDuration.WithLabelValues(reflect.TypeOf(o.steps[int(step)]).Name()).
Observe(time.Unix(0, o.stepsTime[step]).Sub(startTime).Seconds())
Observe(current.Sub(startTime).Seconds())
}
atomic.StoreInt32(&o.currentStep, step+1)
} else {
Expand Down
2 changes: 1 addition & 1 deletion pkg/schedule/operator/operator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ func (oc *Controller) checkAddOperator(isPromoting bool, ops ...*Operator) (bool
return false, NotInCreateStatus
}
if !isPromoting && oc.wopStatus.getCount(op.Desc()) >= oc.config.GetSchedulerMaxWaitingOperator() {
log.Debug("exceed max return false", zap.Uint64("waiting", oc.wopStatus.ops[op.Desc()]), zap.String("desc", op.Desc()), zap.Uint64("max", oc.config.GetSchedulerMaxWaitingOperator()))
log.Debug("exceed max return false", zap.Uint64("waiting", oc.wopStatus.getCount(op.Desc())), zap.String("desc", op.Desc()), zap.Uint64("max", oc.config.GetSchedulerMaxWaitingOperator()))
operatorCounter.WithLabelValues(op.Desc(), "exceed-max-waiting").Inc()
return false, ExceedWaitLimit
}
Expand Down
25 changes: 25 additions & 0 deletions pkg/schedule/operator/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package operator
import (
"context"
"encoding/json"
"sync"
"sync/atomic"
"testing"
"time"
Expand Down Expand Up @@ -570,3 +571,27 @@ func (suite *operatorTestSuite) TestToJSONObject() {
obj = op.ToJSONObject()
suite.Equal(TIMEOUT, obj.Status)
}

func TestOperatorCheckConcurrently(t *testing.T) {
re := require.New(t)
region := newTestRegion(1, 1, [2]uint64{1, 1}, [2]uint64{2, 2})
// addPeer1, transferLeader1, removePeer3
steps := []OpStep{
AddPeer{ToStore: 1, PeerID: 1},
TransferLeader{FromStore: 3, ToStore: 1},
RemovePeer{FromStore: 3},
}
op := NewTestOperator(1, &metapb.RegionEpoch{}, OpAdmin|OpLeader|OpRegion, steps...)
re.Equal(constant.Urgent, op.GetPriorityLevel())
checkSteps(re, op, steps)
op.Start()
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
defer wg.Done()
re.Nil(op.Check(region))
}()
}
wg.Wait()
}

0 comments on commit e767c01

Please sign in to comment.