Skip to content

Commit 5c50c2f

Browse files
committed
scx_layered: Add per-layer LLC stickiness control
Add a new configuration option `llc_sticky_runs` that controls how many times a task must run on its current LLC before being allowed to migrate to a different LLC. This provides finer-grained control over LLC locality and complements the existing `xllc_mig_min_us` option. The implementation tracks the number of consecutive runs on the current LLC in the task context (`taskc->llc_runs`). When the scheduler attempts to find an idle CPU and no idle CPUs are available on the current LLC, it checks if the task has run fewer times than the configured threshold. If so, cross-LLC migration is prevented, keeping the task sticky to its current LLC to preserve cache locality. Key changes: - Add `llc_runs` counter to `task_ctx` to track consecutive runs on current LLC - Add `llc_sticky_runs` configuration field to `layer` struct and `LayerConfig` in config.rs - Increment `llc_runs` in `layered_running()` when stickiness is enabled - Reset `llc_runs` to 0 in `maybe_update_task_llc()` on LLC migration - Add stickiness check in `pick_idle_cpu()` to prevent cross-LLC migration when task hasn't run enough times on current LLC - Add `LSTAT_LLC_STICKY_SKIP` statistic to track prevented migrations - Initialize `llc_runs` to 0 in `layered_init_task()` The feature is disabled by default (llc_sticky_runs = 0) and can be configured per-layer. Example configuration: { "name": "batch", "kind": { "Confined": { "llc_sticky_runs": 20, "xllc_mig_min_us": 1000.0 } } } This would keep tasks on their current LLC for 20 runs before allowing cross-LLC migration, providing stronger LLC affinity for workloads that benefit from cache locality. The new statistic appears in the output as: xllc_mig/skip={xllc_migration%}/{xllc_skip%}/{llc_sticky_skip%} Signed-off-by: Daniel Hodges <[email protected]>
1 parent 733c7ed commit 5c50c2f

File tree

5 files changed

+31
-1
lines changed

5 files changed

+31
-1
lines changed

scheds/rust/scx_layered/src/bpf/intf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ enum layer_stat_id {
148148
LSTAT_XNUMA_MIGRATION,
149149
LSTAT_XLLC_MIGRATION,
150150
LSTAT_XLLC_MIGRATION_SKIP,
151+
LSTAT_LLC_STICKY_SKIP,
151152
LSTAT_XLAYER_WAKE,
152153
LSTAT_XLAYER_REWAKE,
153154
LSTAT_LLC_DRAIN_TRY,
@@ -348,6 +349,7 @@ struct layer {
348349
u64 disallow_open_after_ns;
349350
u64 disallow_preempt_after_ns;
350351
u64 xllc_mig_min_ns;
352+
u32 llc_sticky_runs;
351353

352354
int kind;
353355
bool preempt;

scheds/rust/scx_layered/src/bpf/main.bpf.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,7 @@ struct task_ctx {
611611
u64 runtime_avg;
612612
u64 dsq_id;
613613
u32 llc_id;
614+
u32 llc_runs;
614615

615616
/* for llcc->queue_runtime */
616617
u32 qrt_layer_id;
@@ -1281,6 +1282,14 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
12811282
if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask, layer)) >= 0)
12821283
goto out_put;
12831284

1285+
/* Check if task is sticky to current LLC */
1286+
if (layer->llc_sticky_runs > 0 &&
1287+
taskc->llc_runs < layer->llc_sticky_runs) {
1288+
lstat_inc(LSTAT_LLC_STICKY_SKIP, layer, cpuc);
1289+
cpu = -1;
1290+
goto out_put;
1291+
}
1292+
12841293
if (!(prev_llcc = lookup_llc_ctx(prev_cpuc->llc_id)) ||
12851294
prev_llcc->queued_runtime[layer_id] < layer->xllc_mig_min_ns) {
12861295
lstat_inc(LSTAT_XLLC_MIGRATION_SKIP, layer, cpuc);
@@ -1365,6 +1374,7 @@ bool maybe_update_task_llc(struct task_struct *p, struct task_ctx *taskc, s32 ne
13651374
p->scx.dsq_vtime = new_llcc->vtime_now[layer_id] + vtime_delta;
13661375

13671376
taskc->llc_id = new_llc_id;
1377+
taskc->llc_runs = 0;
13681378
return true;
13691379
}
13701380

@@ -3085,6 +3095,10 @@ void BPF_STRUCT_OPS(layered_running, struct task_struct *p)
30853095
if (time_before(llcc->vtime_now[layer_id], p->scx.dsq_vtime))
30863096
llcc->vtime_now[layer_id] = p->scx.dsq_vtime;
30873097

3098+
/* Increment LLC run counter if stickiness is enabled */
3099+
if (layer->llc_sticky_runs > 0)
3100+
taskc->llc_runs++;
3101+
30883102
cpuc->current_preempt = layer->preempt ||
30893103
(is_percpu_kthread(p) && is_percpu_kthread_preempting(p));
30903104
cpuc->used_at = now;
@@ -3439,6 +3453,7 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p,
34393453
taskc->layer_id = MAX_LAYERS;
34403454
taskc->refresh_layer = true;
34413455
taskc->llc_id = MAX_LLCS;
3456+
taskc->llc_runs = 0;
34423457
taskc->qrt_layer_id = MAX_LLCS;
34433458
taskc->qrt_llc_id = MAX_LLCS;
34443459

scheds/rust/scx_layered/src/config.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ pub struct LayerCommon {
132132
pub disallow_preempt_after_us: Option<u64>,
133133
#[serde(default)]
134134
pub xllc_mig_min_us: f64,
135+
#[serde(default)]
136+
pub llc_sticky_runs: u32,
135137
#[serde(default, skip_serializing)]
136138
pub idle_smt: Option<bool>,
137139
#[serde(default)]

scheds/rust/scx_layered/src/main.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ lazy_static! {
152152
disallow_open_after_us: None,
153153
disallow_preempt_after_us: None,
154154
xllc_mig_min_us: 1000.0,
155+
llc_sticky_runs: 0,
155156
growth_algo: LayerGrowthAlgo::Sticky,
156157
idle_resume_us: None,
157158
perf: 1024,
@@ -188,6 +189,7 @@ lazy_static! {
188189
disallow_open_after_us: None,
189190
disallow_preempt_after_us: None,
190191
xllc_mig_min_us: 0.0,
192+
llc_sticky_runs: 0,
191193
growth_algo: LayerGrowthAlgo::Sticky,
192194
perf: 1024,
193195
idle_resume_us: None,
@@ -229,6 +231,7 @@ lazy_static! {
229231
disallow_open_after_us: None,
230232
disallow_preempt_after_us: None,
231233
xllc_mig_min_us: 0.0,
234+
llc_sticky_runs: 0,
232235
growth_algo: LayerGrowthAlgo::Topo,
233236
perf: 1024,
234237
idle_resume_us: None,
@@ -268,6 +271,7 @@ lazy_static! {
268271
disallow_open_after_us: None,
269272
disallow_preempt_after_us: None,
270273
xllc_mig_min_us: 100.0,
274+
llc_sticky_runs: 0,
271275
growth_algo: LayerGrowthAlgo::Linear,
272276
perf: 1024,
273277
idle_resume_us: None,
@@ -1908,6 +1912,7 @@ impl<'a> Scheduler<'a> {
19081912
disallow_open_after_us,
19091913
disallow_preempt_after_us,
19101914
xllc_mig_min_us,
1915+
llc_sticky_runs,
19111916
placement,
19121917
member_expire_ms,
19131918
..
@@ -1944,6 +1949,7 @@ impl<'a> Scheduler<'a> {
19441949
v => v * 1000,
19451950
};
19461951
layer.xllc_mig_min_ns = (xllc_mig_min_us * 1000.0) as u64;
1952+
layer.llc_sticky_runs = *llc_sticky_runs;
19471953
layer_weights.push(layer.weight.try_into().unwrap());
19481954
layer.perf = u32::try_from(*perf)?;
19491955
layer.node_mask = nodemask_from_nodes(nodes) as u64;

scheds/rust/scx_layered/src/stats.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ const LSTAT_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_MIGRATION as usize;
7171
const LSTAT_XNUMA_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XNUMA_MIGRATION as usize;
7272
const LSTAT_XLLC_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION as usize;
7373
const LSTAT_XLLC_MIGRATION_SKIP: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION_SKIP as usize;
74+
const LSTAT_LLC_STICKY_SKIP: usize = bpf_intf::layer_stat_id_LSTAT_LLC_STICKY_SKIP as usize;
7475
const LSTAT_XLAYER_WAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_WAKE as usize;
7576
const LSTAT_XLAYER_REWAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_REWAKE as usize;
7677
const LSTAT_LLC_DRAIN_TRY: usize = bpf_intf::layer_stat_id_LSTAT_LLC_DRAIN_TRY as usize;
@@ -184,6 +185,8 @@ pub struct LayerStats {
184185
pub xllc_migration: f64,
185186
#[stat(desc = "% migration skipped across LLCs due to xllc_mig_min_us")]
186187
pub xllc_migration_skip: f64,
188+
#[stat(desc = "% migration skipped across LLCs due to llc_sticky_runs")]
189+
pub llc_sticky_skip: f64,
187190
#[stat(desc = "% wakers across layers")]
188191
pub xlayer_wake: f64,
189192
#[stat(desc = "% rewakers across layers where waker has waken the task previously")]
@@ -306,6 +309,7 @@ impl LayerStats {
306309
xlayer_rewake: lstat_pct(LSTAT_XLAYER_REWAKE),
307310
xllc_migration: lstat_pct(LSTAT_XLLC_MIGRATION),
308311
xllc_migration_skip: lstat_pct(LSTAT_XLLC_MIGRATION_SKIP),
312+
llc_sticky_skip: lstat_pct(LSTAT_LLC_STICKY_SKIP),
309313
llc_drain_try: lstat_pct(LSTAT_LLC_DRAIN_TRY),
310314
llc_drain: lstat_pct(LSTAT_LLC_DRAIN),
311315
skip_remote_node: lstat_pct(LSTAT_SKIP_REMOTE_NODE),
@@ -378,13 +382,14 @@ impl LayerStats {
378382

379383
writeln!(
380384
w,
381-
" {:<width$} open_idle={} mig={} xnuma_mig={} xllc_mig/skip={}/{} affn_viol={}",
385+
" {:<width$} open_idle={} mig={} xnuma_mig={} xllc_mig/skip={}/{}/{} affn_viol={}",
382386
"",
383387
fmt_pct(self.open_idle),
384388
fmt_pct(self.migration),
385389
fmt_pct(self.xnuma_migration),
386390
fmt_pct(self.xllc_migration),
387391
fmt_pct(self.xllc_migration_skip),
392+
fmt_pct(self.llc_sticky_skip),
388393
fmt_pct(self.affn_viol),
389394
width = header_width,
390395
)?;

0 commit comments

Comments
 (0)