-
Notifications
You must be signed in to change notification settings - Fork 720
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
schedule: support patrol region concurrency #8094
Changes from 39 commits
d1f4b8a
6d0fbd4
9b43d61
b2b4f39
cb285f6
2e14405
351ef5c
c198b08
a0ec33d
ab9ef1e
97a40a9
438efce
c59b47c
9f57397
bbc1362
b0eab80
5c442a3
0d02d8b
9896228
a638cce
6147373
2a86197
82785c2
a21ef83
cd1cd8b
5668d98
78e3ba5
cf01076
cc51a2e
9f7406a
bd4ca79
ae0778f
ecb8d8b
8259ad0
18db300
5ead31e
6bdb436
40a2e02
8cd8825
bcd5018
acb4244
457da3d
64abc3c
e570e3c
74d4fdc
aeabc52
7e5813d
240f902
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
"bytes" | ||
"context" | ||
"strconv" | ||
"sync" | ||
"time" | ||
|
||
"github.com/pingcap/failpoint" | ||
|
@@ -31,6 +32,7 @@ | |
"github.com/tikv/pd/pkg/schedule/operator" | ||
"github.com/tikv/pd/pkg/schedule/placement" | ||
"github.com/tikv/pd/pkg/utils/keyutil" | ||
"github.com/tikv/pd/pkg/utils/logutil" | ||
"github.com/tikv/pd/pkg/utils/syncutil" | ||
"go.uber.org/zap" | ||
) | ||
|
@@ -47,13 +49,14 @@ | |
// MaxPatrolScanRegionLimit is the max limit of regions to scan for a batch. | ||
MaxPatrolScanRegionLimit = 8192 | ||
patrolRegionPartition = 1024 | ||
patrolRegionChanLen = MaxPatrolScanRegionLimit | ||
) | ||
|
||
var ( | ||
denyCheckersByLabelerCounter = labeler.LabelerEventCounter.WithLabelValues("checkers", "deny") | ||
// WithLabelValues is a heavy operation, define variable to avoid call it every time. | ||
pendingProcessedRegionsGauge = regionListGauge.WithLabelValues("pending_processed_regions") | ||
priorityListGauge = regionListGauge.WithLabelValues("priority_list") | ||
denyCheckersByLabelerCounter = labeler.LabelerEventCounter.WithLabelValues("checkers", "deny") | ||
) | ||
|
||
// Controller is used to manage all checkers. | ||
|
@@ -71,6 +74,7 @@ | |
priorityInspector *PriorityInspector | ||
pendingProcessedRegions *cache.TTLUint64 | ||
suspectKeyRanges *cache.TTLString // suspect key-range regions that may need fix | ||
patrolRegionContext *PatrolRegionContext | ||
|
||
// duration is the duration of the last patrol round. | ||
// It's exported, so it should be protected by a mutex. | ||
|
@@ -81,7 +85,8 @@ | |
// interval is the config interval of patrol regions. | ||
// It's used to update the ticker, so we need to | ||
// record it to avoid updating the ticker frequently. | ||
interval time.Duration | ||
interval time.Duration | ||
workerCount int | ||
// patrolRegionScanLimit is the limit of regions to scan. | ||
// It is calculated by the number of regions. | ||
patrolRegionScanLimit int | ||
|
@@ -104,6 +109,7 @@ | |
priorityInspector: NewPriorityInspector(cluster, conf), | ||
pendingProcessedRegions: pendingProcessedRegions, | ||
suspectKeyRanges: cache.NewStringTTL(ctx, time.Minute, 3*time.Minute), | ||
patrolRegionContext: &PatrolRegionContext{}, | ||
interval: cluster.GetCheckerConfig().GetPatrolRegionInterval(), | ||
patrolRegionScanLimit: calculateScanLimit(cluster), | ||
} | ||
|
@@ -112,6 +118,9 @@ | |
// PatrolRegions is used to scan regions. | ||
// The checkers will check these regions to decide if they need to do some operations. | ||
func (c *Controller) PatrolRegions() { | ||
c.patrolRegionContext.init(c.ctx) | ||
c.patrolRegionContext.startPatrolRegionWorkers(c) | ||
defer c.patrolRegionContext.stop() | ||
ticker := time.NewTicker(c.interval) | ||
defer ticker.Stop() | ||
start := time.Now() | ||
|
@@ -123,11 +132,20 @@ | |
select { | ||
case <-ticker.C: | ||
c.updateTickerIfNeeded(ticker) | ||
c.updatePatrolWorkersIfNeeded() | ||
if c.cluster.IsSchedulingHalted() { | ||
for len(c.patrolRegionContext.regionChan) > 0 { | ||
<-c.patrolRegionContext.regionChan | ||
} | ||
log.Debug("skip patrol regions due to scheduling is halted") | ||
continue | ||
} | ||
|
||
// wait for the regionChan to be drained | ||
if len(c.patrolRegionContext.regionChan) > 0 { | ||
continue | ||
} | ||
|
||
// Check priority regions first. | ||
c.checkPriorityRegions() | ||
// Check pending processed regions first. | ||
|
@@ -150,6 +168,7 @@ | |
start = time.Now() | ||
} | ||
failpoint.Inject("breakPatrol", func() { | ||
time.Sleep(100 * time.Millisecond) // ensure the regions are handled by the workers | ||
failpoint.Return() | ||
}) | ||
case <-c.ctx.Done(): | ||
|
@@ -160,6 +179,32 @@ | |
} | ||
} | ||
|
||
func (c *Controller) updateTickerIfNeeded(ticker *time.Ticker) { | ||
// Note: we reset the ticker here to support updating configuration dynamically. | ||
newInterval := c.cluster.GetCheckerConfig().GetPatrolRegionInterval() | ||
if c.interval != newInterval { | ||
c.interval = newInterval | ||
ticker.Reset(newInterval) | ||
log.Info("checkers starts patrol regions with new interval", zap.Duration("interval", newInterval)) | ||
} | ||
} | ||
|
||
func (c *Controller) updatePatrolWorkersIfNeeded() { | ||
newWorkersCount := c.cluster.GetCheckerConfig().GetPatrolRegionWorkerCount() | ||
if c.workerCount != newWorkersCount { | ||
oldWorkersCount := c.workerCount | ||
c.workerCount = newWorkersCount | ||
// Stop the old workers and start the new workers. | ||
c.patrolRegionContext.workersCancel() | ||
c.patrolRegionContext.wg.Wait() | ||
c.patrolRegionContext.workersCtx, c.patrolRegionContext.workersCancel = context.WithCancel(c.ctx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we adjust the workers more gracefully? For example, if the new worker count is more than the current workers, we can scale out more wroker and no need to build all workers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure it's necessary, and generally speaking we don't change this configuration very often. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm afraid that it maybe wait some time to stop all and start all wokers. |
||
c.patrolRegionContext.startPatrolRegionWorkers(c) | ||
log.Info("checkers starts patrol regions with new workers count", | ||
zap.Int("old-workers-count", oldWorkersCount), | ||
zap.Int("new-workers-count", newWorkersCount)) | ||
} | ||
} | ||
|
||
// GetPatrolRegionsDuration returns the duration of the last patrol region round. | ||
func (c *Controller) GetPatrolRegionsDuration() time.Duration { | ||
c.mu.RLock() | ||
|
@@ -182,7 +227,7 @@ | |
} | ||
|
||
for _, region := range regions { | ||
c.tryAddOperators(region) | ||
c.patrolRegionContext.regionChan <- region | ||
key = region.GetEndKey() | ||
} | ||
return | ||
|
@@ -443,13 +488,46 @@ | |
} | ||
} | ||
|
||
func (c *Controller) updateTickerIfNeeded(ticker *time.Ticker) { | ||
// Note: we reset the ticker here to support updating configuration dynamically. | ||
newInterval := c.cluster.GetCheckerConfig().GetPatrolRegionInterval() | ||
if c.interval != newInterval { | ||
c.interval = newInterval | ||
ticker.Reset(newInterval) | ||
log.Info("checkers starts patrol regions with new interval", zap.Duration("interval", newInterval)) | ||
// PatrolRegionContext is used to store the context of patrol regions. | ||
type PatrolRegionContext struct { | ||
// workers | ||
workersCtx context.Context | ||
workersCancel context.CancelFunc | ||
regionChan chan *core.RegionInfo | ||
wg sync.WaitGroup | ||
} | ||
|
||
func (p *PatrolRegionContext) init(ctx context.Context) { | ||
p.regionChan = make(chan *core.RegionInfo, patrolRegionChanLen) | ||
p.workersCtx, p.workersCancel = context.WithCancel(ctx) | ||
} | ||
|
||
func (p *PatrolRegionContext) stop() { | ||
close(p.regionChan) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is better to add a failpoint to wait for all rest region consumed in If possible, we can always wait it even if it is not in testing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For example, if we enable this failpoint, it will wait 100 ms for goroutine consuming regions. And it will check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand the role of L171. But I think sleep is a destabilizing factor. And it's fine to wait for it to finish consuming, or actively consume all regions before exiting and then exit. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove sleep, PTAL
lhy1024 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
p.workersCancel() | ||
p.wg.Wait() | ||
} | ||
|
||
func (p *PatrolRegionContext) startPatrolRegionWorkers(c *Controller) { | ||
for i := 0; i < c.workerCount; i++ { | ||
p.wg.Add(1) | ||
go func(i int) { | ||
defer logutil.LogPanic() | ||
defer p.wg.Done() | ||
for { | ||
select { | ||
case region, ok := <-p.regionChan: | ||
if !ok { | ||
log.Debug("region channel is closed", zap.Int("worker-id", i)) | ||
return | ||
} | ||
c.tryAddOperators(region) | ||
case <-p.workersCtx.Done(): | ||
log.Debug("region worker is closed", zap.Int("worker-id", i)) | ||
return | ||
} | ||
} | ||
}(i) | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,6 +67,9 @@ const ( | |
defaultRegionScoreFormulaVersion = "v2" | ||
defaultLeaderSchedulePolicy = "count" | ||
defaultStoreLimitVersion = "v1" | ||
defaultPatrolRegionWorkerCount = 1 | ||
maxPatrolRegionWorkerCount = 8 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it too small and not be changed?how about using the core num as the max limit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Current tests show that 8 is enough, if needed in the future I think it can be increased or core num can be used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
|
||
// DefaultSplitMergeInterval is the default value of config split merge interval. | ||
DefaultSplitMergeInterval = time.Hour | ||
defaultSwitchWitnessInterval = time.Hour | ||
|
@@ -306,6 +309,9 @@ type ScheduleConfig struct { | |
// HaltScheduling is the option to halt the scheduling. Once it's on, PD will halt the scheduling, | ||
// and any other scheduling configs will be ignored. | ||
HaltScheduling bool `toml:"halt-scheduling" json:"halt-scheduling,string,omitempty"` | ||
|
||
// PatrolRegionWorkerCount is the number of workers to patrol region. | ||
PatrolRegionWorkerCount int `toml:"patrol-region-worker-count" json:"patrol-region-worker-count"` | ||
} | ||
|
||
// Clone returns a cloned scheduling configuration. | ||
|
@@ -374,6 +380,9 @@ func (c *ScheduleConfig) Adjust(meta *configutil.ConfigMetaData, reloading bool) | |
if !meta.IsDefined("store-limit-version") { | ||
configutil.AdjustString(&c.StoreLimitVersion, defaultStoreLimitVersion) | ||
} | ||
if !meta.IsDefined("patrol-region-worker-count") { | ||
configutil.AdjustInt(&c.PatrolRegionWorkerCount, defaultPatrolRegionWorkerCount) | ||
} | ||
|
||
if !meta.IsDefined("enable-joint-consensus") { | ||
c.EnableJointConsensus = defaultEnableJointConsensus | ||
|
@@ -518,6 +527,9 @@ func (c *ScheduleConfig) Validate() error { | |
if c.SlowStoreEvictingAffectedStoreRatioThreshold == 0 { | ||
return errors.Errorf("slow-store-evicting-affected-store-ratio-threshold is not set") | ||
} | ||
if c.PatrolRegionWorkerCount > maxPatrolRegionWorkerCount || c.PatrolRegionWorkerCount < 1 { | ||
return errors.Errorf("patrol-region-worker-count should be between 1 and %d", maxPatrolRegionWorkerCount) | ||
} | ||
return nil | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need we wait for? If the
regionChan
is full, it will be blocked.