-
Notifications
You must be signed in to change notification settings - Fork 719
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
*: use a separate runner for updating subtree #8158
Changes from 10 commits
c0f2c94
43c60cb
d458613
835257a
1c4336a
1b64ad0
0f349a0
2004b2d
61951f2
62d98f4
ed08415
eae4f6a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,8 @@ | |
clusterID uint64 | ||
running atomic.Bool | ||
|
||
heartbeatRunnner ratelimit.Runner | ||
heartbeatRunner ratelimit.Runner | ||
statisticsRunner ratelimit.Runner | ||
logRunner ratelimit.Runner | ||
} | ||
|
||
|
@@ -64,8 +65,9 @@ | |
collectWaitTime = time.Minute | ||
|
||
// heartbeat relative const | ||
heartbeatTaskRunner = "heartbeat-task-runner" | ||
logTaskRunner = "log-task-runner" | ||
heartbeatTaskRunner = "heartbeat-task-runner" | ||
statisticsTaskRunner = "statistics-task-runner" | ||
logTaskRunner = "log-task-runner" | ||
) | ||
|
||
var syncRunner = ratelimit.NewSyncRunner() | ||
|
@@ -93,7 +95,8 @@ | |
clusterID: clusterID, | ||
checkMembershipCh: checkMembershipCh, | ||
|
||
heartbeatRunnner: ratelimit.NewConcurrentRunner(heartbeatTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), | ||
heartbeatRunner: ratelimit.NewConcurrentRunner(heartbeatTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), | ||
statisticsRunner: ratelimit.NewConcurrentRunner(statisticsTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), | ||
logRunner: ratelimit.NewConcurrentRunner(logTaskRunner, ratelimit.NewConcurrencyLimiter(uint64(runtime.NumCPU()*2)), time.Minute), | ||
} | ||
c.coordinator = schedule.NewCoordinator(ctx, c, hbStreams) | ||
|
@@ -531,7 +534,8 @@ | |
go c.runUpdateStoreStats() | ||
go c.runCoordinator() | ||
go c.runMetricsCollectionJob() | ||
c.heartbeatRunnner.Start() | ||
c.heartbeatRunner.Start() | ||
c.statisticsRunner.Start() | ||
c.logRunner.Start() | ||
c.running.Store(true) | ||
} | ||
|
@@ -543,7 +547,8 @@ | |
} | ||
c.running.Store(false) | ||
c.coordinator.Stop() | ||
c.heartbeatRunnner.Stop() | ||
c.heartbeatRunner.Stop() | ||
c.statisticsRunner.Stop() | ||
c.logRunner.Stop() | ||
c.cancel() | ||
c.wg.Wait() | ||
|
@@ -560,17 +565,19 @@ | |
if c.persistConfig.GetScheduleConfig().EnableHeartbeatBreakdownMetrics { | ||
tracer = core.NewHeartbeatProcessTracer() | ||
} | ||
var taskRunner, logRunner ratelimit.Runner | ||
taskRunner, logRunner = syncRunner, syncRunner | ||
var taskRunner, statisticsRunner, logRunner ratelimit.Runner | ||
taskRunner, statisticsRunner, logRunner = syncRunner, syncRunner, syncRunner | ||
if c.persistConfig.GetScheduleConfig().EnableHeartbeatConcurrentRunner { | ||
taskRunner = c.heartbeatRunnner | ||
taskRunner = c.heartbeatRunner | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to add in here?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice catch |
||
statisticsRunner = c.statisticsRunner | ||
logRunner = c.logRunner | ||
} | ||
ctx := &core.MetaProcessContext{ | ||
Context: c.ctx, | ||
Tracer: tracer, | ||
TaskRunner: taskRunner, | ||
LogRunner: logRunner, | ||
Context: c.ctx, | ||
Tracer: tracer, | ||
TaskRunner: taskRunner, | ||
StatisticsRunner: statisticsRunner, | ||
LogRunner: logRunner, | ||
} | ||
tracer.Begin() | ||
if err := c.processRegionHeartbeat(ctx, region); err != nil { | ||
|
@@ -591,19 +598,12 @@ | |
return err | ||
} | ||
region.Inherit(origin, c.GetStoreConfig().IsEnableRegionBucket()) | ||
|
||
ctx.TaskRunner.RunTask( | ||
ctx, | ||
ratelimit.HandleStatsAsync, | ||
func(_ context.Context) { | ||
cluster.HandleStatsAsync(c, region) | ||
}, | ||
) | ||
cluster.HandleStatsAsync(c, region) | ||
tracer.OnAsyncHotStatsFinished() | ||
hasRegionStats := c.regionStats != nil | ||
// Save to storage if meta is updated, except for flashback. | ||
// Save to cache if meta or leader is updated, or contains any down/pending peer. | ||
_, saveCache, _ := core.GenerateRegionGuideFunc(true)(ctx, region, origin) | ||
_, saveCache, _, retained := core.GenerateRegionGuideFunc(true)(ctx, region, origin) | ||
|
||
if !saveCache { | ||
// Due to some config changes need to update the region stats as well, | ||
|
@@ -627,6 +627,7 @@ | |
func(_ context.Context) { | ||
c.CheckAndPutSubTree(region) | ||
}, | ||
ratelimit.WithRetained(true), | ||
) | ||
} | ||
return nil | ||
|
@@ -650,6 +651,7 @@ | |
func(_ context.Context) { | ||
c.CheckAndPutSubTree(region) | ||
}, | ||
ratelimit.WithRetained(retained), | ||
) | ||
tracer.OnUpdateSubTreeFinished() | ||
ctx.TaskRunner.RunTask( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,25 +31,41 @@ var ( | |
Name: "runner_task_max_waiting_duration_seconds", | ||
Help: "The duration of tasks waiting in the runner.", | ||
}, []string{nameStr}) | ||
|
||
RunnerTaskPendingTasks = prometheus.NewGaugeVec( | ||
RunnerPendingTasks = prometheus.NewGaugeVec( | ||
prometheus.GaugeOpts{ | ||
Namespace: "pd", | ||
Subsystem: "ratelimit", | ||
Name: "runner_task_pending_tasks", | ||
Name: "runner_pending_tasks", | ||
Help: "The number of pending tasks in the runner.", | ||
}, []string{nameStr, taskStr}) | ||
RunnerTaskFailedTasks = prometheus.NewCounterVec( | ||
RunnerFailedTasks = prometheus.NewCounterVec( | ||
prometheus.CounterOpts{ | ||
Namespace: "pd", | ||
Subsystem: "ratelimit", | ||
Name: "runner_task_failed_tasks_total", | ||
Name: "runner_failed_tasks_total", | ||
nolouch marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Help: "The number of failed tasks in the runner.", | ||
}, []string{nameStr}) | ||
}, []string{nameStr, taskStr}) | ||
RunnerSucceededTasks = prometheus.NewCounterVec( | ||
prometheus.CounterOpts{ | ||
Namespace: "pd", | ||
Subsystem: "ratelimit", | ||
Name: "runner_success_tasks_total", | ||
Help: "The number of tasks in the runner.", | ||
}, []string{nameStr, taskStr}) | ||
RunnerTaskExecutionDuration = prometheus.NewHistogramVec( | ||
prometheus.HistogramOpts{ | ||
Namespace: "pd", | ||
Subsystem: "ratelimit", | ||
Name: "runner_task_execution_duration_seconds", | ||
Help: "Bucketed histogram of processing time (s) of finished tasks.", | ||
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13), | ||
}, []string{nameStr, taskStr}) | ||
) | ||
|
||
func init() { | ||
prometheus.MustRegister(RunnerTaskMaxWaitingDuration) | ||
prometheus.MustRegister(RunnerTaskPendingTasks) | ||
prometheus.MustRegister(RunnerTaskFailedTasks) | ||
prometheus.MustRegister(RunnerPendingTasks) | ||
prometheus.MustRegister(RunnerFailedTasks) | ||
prometheus.MustRegister(RunnerTaskExecutionDuration) | ||
prometheus.MustRegister(RunnerSucceededTasks) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also add metrics to tell the task number of different priorities? |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please help add some comments here to explain the different responsibilities of these runners?