Skip to content

Commit 686fabc

Browse files
committed
scx_mitosis: major work stealing cleanup
1 parent 0dd6be6 commit 686fabc

File tree

5 files changed

+213
-94
lines changed

5 files changed

+213
-94
lines changed

scheds/rust/scx_mitosis/src/bpf/intf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ enum cell_stat_idx {
4646
};
4747

4848
/* Function invocation counters */
49-
enum counter_idx {
49+
enum fn_counter_idx {
5050
COUNTER_SELECT_CPU,
5151
COUNTER_ENQUEUE,
5252
COUNTER_DISPATCH,

scheds/rust/scx_mitosis/src/bpf/l3_aware.bpf.h

Lines changed: 126 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
* This software may be used and distributed according to the terms of the
44
* GNU General Public License version 2.
55
*
6-
* This header adds L3 cache awareness to scx_mitosis by defining BPF
7-
* maps for CPU-to-L3 domain mappings. It provides functions to
6+
* This header assists adding L3 cache awareness to scx_mitosis by defining
7+
* maps and fns for managing CPU-to-L3 domain mappings. It provides code to
88
* recalculate per-L3 CPU counts within cells and implements weighted
99
* random L3 selection for tasks. It also tracks work-stealing
1010
* statistics for cross-L3 task migrations.
@@ -14,10 +14,15 @@
1414
#include "mitosis.bpf.h"
1515
#include "intf.h"
1616

17-
// It's also an option to just compute this from the cpu_to_l3 map.
18-
struct l3_cpu_mask {
19-
unsigned long cpumask[CPUMASK_LONG_ENTRIES];
20-
};
17+
typedef u32 l3_id_t;
18+
#define L3_INVALID ((l3_id_t) ~0u)
19+
20+
// Configure how aggressively we steal work.
21+
// When task is detected as a steal candidate, skip it this many times
22+
// On a web server workload, 100 reduced steal count by ~90%
23+
#ifdef MITOSIS_ENABLE_STEALING
24+
#define PREVENT_N_STEALS 10
25+
#endif
2126

2227
/* Work stealing statistics map - accessible from both BPF and userspace */
2328
struct steal_stats_map {
@@ -38,27 +43,46 @@ struct cpu_to_l3_map {
3843
struct l3_to_cpus_map {
3944
__uint(type, BPF_MAP_TYPE_ARRAY);
4045
__type(key, u32);
41-
__type(value, struct l3_cpu_mask);
46+
__type(value, struct cpumask);
4247
__uint(max_entries, MAX_L3S);
4348
};
4449

45-
extern struct cpu_to_l3_map cpu_to_l3 SEC(".maps");
46-
extern struct l3_to_cpus_map l3_to_cpus SEC(".maps");
47-
extern struct steal_stats_map steal_stats SEC(".maps");
50+
extern struct cpu_to_l3_map cpu_to_l3;
51+
extern struct l3_to_cpus_map l3_to_cpus;
52+
extern struct steal_stats_map steal_stats;
53+
54+
static inline const bool l3_is_valid(u32 l3_id) {
55+
if (l3_id == L3_INVALID)
56+
return false;
57+
58+
return (l3_id >= 0) && (l3_id < MAX_L3S);
59+
}
60+
61+
static inline void init_task_l3(struct task_ctx *tctx) {
62+
tctx->l3 = L3_INVALID;
63+
64+
#if MITOSIS_ENABLE_STEALING
65+
tctx->pending_l3 = L3_INVALID;
66+
tctx->steal_count = 0;
67+
tctx->last_stolen_at = 0;
68+
tctx->steals_prevented = 0;
69+
#endif
70+
71+
}
4872

4973
static inline const struct cpumask *lookup_l3_cpumask(u32 l3)
5074
{
51-
struct l3_cpu_mask *mask;
75+
struct cpumask *mask;
5276

5377
if (!(mask = bpf_map_lookup_elem(&l3_to_cpus, &l3))) {
5478
scx_bpf_error("no l3 cpumask, l3: %d, %p", l3, &l3_to_cpus);
5579
return NULL;
5680
}
5781

58-
return (const struct cpumask *)mask;
82+
return mask;
5983
}
6084

61-
/* Recompute cell->l3_cpu_cnt[] after cell cpumask changes (no persistent kptrs). */
85+
/* Recompute cell->l3_cpu_cnt[] after cell cpumask changes */
6286
static __always_inline void recalc_cell_l3_counts(u32 cell_idx)
6387
{
6488
struct cell *cell = lookup_cell(cell_idx);
@@ -89,7 +113,6 @@ static __always_inline void recalc_cell_l3_counts(u32 cell_idx)
89113
continue;
90114
}
91115

92-
/* ok: dst is bpf_cpumask*, sources are (RCU cpumask*, plain cpumask*) */
93116
bpf_cpumask_and(tmp, cell_mask, l3_mask);
94117

95118
u32 cnt = bpf_cpumask_weight((const struct cpumask *)tmp);
@@ -113,24 +136,24 @@ static __always_inline void recalc_cell_l3_counts(u32 cell_idx)
113136
* have higher probability of being selected.
114137
*
115138
* @cell_id: The cell ID to select an L3 from
116-
* @return: L3 ID on success, INVALID_L3_ID on error, or 0 as fallback
139+
* @return: L3 ID on success, L3_INVALID on error
117140
*/
118141
static inline s32 pick_l3_for_task(u32 cell_id)
119142
{
120143
struct cell *cell;
121144
u32 l3, target, cur = 0;
122-
s32 ret = INVALID_L3_ID;
145+
s32 ret = L3_INVALID;
123146

124147
/* Look up the cell structure */
125148
if (!(cell = lookup_cell(cell_id)))
126-
return INVALID_L3_ID;
149+
return L3_INVALID;
127150

128151
/* Handle case where cell has no CPUs assigned yet */
129152
if (!cell->cpu_cnt) {
130153
scx_bpf_error(
131154
"pick_l3_for_task: cell %d has no CPUs accounted yet",
132155
cell_id);
133-
return INVALID_L3_ID;
156+
return L3_INVALID;
134157
}
135158

136159
/* Generate random target value in range [0, cpu_cnt) */
@@ -148,3 +171,88 @@ static inline s32 pick_l3_for_task(u32 cell_id)
148171
}
149172
return ret;
150173
}
174+
175+
#ifdef MITOSIS_ENABLE_STEALING
176+
177+
static inline bool try_stealing_this_task(struct task_ctx *task_ctx,
178+
s32 local_l3, u64 candidate_dsq)
179+
{
180+
// Attempt the steal, can fail beacuse it's a race.
181+
if (!scx_bpf_dsq_move_to_local(candidate_dsq))
182+
return false;
183+
184+
// We got the task!
185+
task_ctx->steal_count++;
186+
task_ctx->last_stolen_at = scx_bpf_now();
187+
/* Retag to thief L3 (the one for this cpu) */
188+
task_ctx->pending_l3 = local_l3;
189+
task_ctx->steals_prevented = 0;
190+
191+
/* Increment steal counter in map */
192+
u32 key = 0;
193+
u64 *count = bpf_map_lookup_elem(&steal_stats, &key);
194+
// NOTE: This could get expensive, but I'm not anticipating that many steals. Percpu if we care.
195+
if (count)
196+
__sync_fetch_and_add(count, 1);
197+
198+
return true;
199+
}
200+
201+
/* Work stealing:
202+
* Scan sibling (cell,L3) DSQs in the same cell and steal the first queued task if it can run on this cpu
203+
*/
204+
static inline bool try_stealing_work(u32 cell, s32 local_l3)
205+
{
206+
if (!l3_is_valid(local_l3))
207+
scx_bpf_error("try_stealing_work: invalid local_l3");
208+
209+
struct cell *cell_ptr = lookup_cell(cell);
210+
if (!cell_ptr)
211+
scx_bpf_error("try_stealing_work: invalid cell");
212+
213+
// Loop over all L3s, looking for a queued task to steal
214+
u32 candidate_l3;
215+
bpf_for(candidate_l3, 0, nr_l3)
216+
{
217+
if (candidate_l3 == (u32)local_l3)
218+
continue;
219+
220+
// Skip L3s that are not present in this cell
221+
// Note: rechecking cell_ptr for verifier
222+
if (cell_ptr && cell_ptr->l3_cpu_cnt[candidate_l3] == 0)
223+
continue;
224+
225+
u64 candidate_dsq = get_cell_l3_dsq_id(cell, candidate_l3);
226+
227+
struct task_struct *task = NULL;
228+
struct task_ctx *task_ctx;
229+
// I'm only using this for the verifier
230+
bool found_task = false;
231+
232+
// Just a trick for peeking the head element
233+
bpf_for_each(scx_dsq, task, candidate_dsq, 0)
234+
{
235+
task_ctx = lookup_task_ctx(task);
236+
found_task = (task_ctx != NULL);
237+
break;
238+
}
239+
240+
// No task? Try next L3
241+
if (!found_task)
242+
continue;
243+
244+
// This knob throttles stealing.
245+
// TODO: make runtime configurable
246+
if (task_ctx->steals_prevented++ < PREVENT_N_STEALS) {
247+
continue;
248+
}
249+
250+
if (!try_stealing_this_task(task_ctx, local_l3, candidate_dsq))
251+
continue;
252+
253+
// Success, we got a task (no guarantee it was the one we peeked though...)
254+
return true;
255+
}
256+
return false;
257+
}
258+
#endif

0 commit comments

Comments
 (0)