Skip to content

Commit c36ba4c

Browse files
committed
[scx_lavd] modify lavd to use new peek operation
Summary: The per-cpu queues mode in LAVD checks multiple queues for potential steal targets. These checks can be made more efficient with the forthcoming O(1) lockless peek operation. This patch updates LAVD to use peek in three places: - try_to_steal: picking cpu with lowest vtime - on consume_task: checking the cpu DSQ - on consume_task: checking the domain DSQ Note that these usages are necessarily racy because no lock is held. That is, we could peek a low vtime, but the task may have moved or completed by the time we attempt to steal it. Test Plan: - ran schbench under a VM [1 cpdom, 50cpus] Reviewers: daidavid
1 parent 059ce35 commit c36ba4c

File tree

3 files changed

+59
-17
lines changed

3 files changed

+59
-17
lines changed

scheds/include/scx/common.bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ u32 scx_bpf_reenqueue_local(void) __ksym;
7575
void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
7676
s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
7777
void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
78+
struct task_struct *scx_bpf_dsq_peek(u64 dsq_id) __ksym __weak;
7879
int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, u64 flags) __ksym __weak;
7980
struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak;
8081
void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak;

scheds/include/scx/compat.bpf.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,13 @@ static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags)
230230
scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) : \
231231
scx_bpf_pick_any_cpu(cpus_allowed, flags))
232232

233+
#define __COMPAT_scx_bpf_dsq_peek(dsq_id) \
234+
(bpf_ksym_exists(scx_bpf_dsq_peek) ? scx_bpf_dsq_peek(dsq_id) : ({ \
235+
struct task_struct *p = NULL; \
236+
bpf_for_each(scx_dsq, p, dsq_id, 0) { break; } \
237+
p; \
238+
}))
239+
233240
/*
234241
* Define sched_ext_ops. This may be expanded to define multiple variants for
235242
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().

scheds/rust/scx_lavd/src/bpf/balance.bpf.c

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ int plan_x_cpdom_migration(void)
100100
return 0;
101101
}
102102
if ((stealee_threshold <= max_sc_load || overflow_running) &&
103-
(stealer_threshold < min_sc_load)) {
103+
(stealer_threshold < min_sc_load)) {
104104
/*
105105
* If there is a overloaded domain, always try to steal.
106106
* <~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~>
@@ -123,7 +123,7 @@ int plan_x_cpdom_migration(void)
123123
* Under-loaded active domains become a stealer.
124124
*/
125125
if (cpdomc->nr_active_cpus &&
126-
cpdomc->sc_load <= stealer_threshold) {
126+
cpdomc->sc_load <= stealer_threshold) {
127127
WRITE_ONCE(cpdomc->is_stealer, true);
128128
WRITE_ONCE(cpdomc->is_stealee, false);
129129
continue;
@@ -133,7 +133,7 @@ int plan_x_cpdom_migration(void)
133133
* Over-loaded or non-active domains become a stealee.
134134
*/
135135
if (!cpdomc->nr_active_cpus ||
136-
cpdomc->sc_load >= stealee_threshold) {
136+
cpdomc->sc_load >= stealee_threshold) {
137137
WRITE_ONCE(cpdomc->is_stealer, false);
138138
WRITE_ONCE(cpdomc->is_stealee, true);
139139
nr_stealee++;
@@ -173,6 +173,48 @@ static bool consume_dsq(struct cpdom_ctx *cpdomc, u64 dsq_id)
173173
return ret;
174174
}
175175

176+
/*
177+
* Attempts to peek the vtime of the task at the head of the DSQ, or returns U64_MAX if the DSQ is empty.
178+
*/
179+
static int peek_vtime(u64 dsq_id)
180+
{
181+
struct task_struct *task;
182+
task = __COMPAT_scx_bpf_dsq_peek(dsq_id);
183+
return task ? task->scx.dsq_vtime : U64_MAX;
184+
}
185+
186+
/*
187+
* Racy operation that returns the cpu that which appears to have the lowest vtime at its head.
188+
*/
189+
static int pick_cpu_with_lowest_vtime(struct cpdom_ctx *cpdomc)
190+
{
191+
u64 lowest_vtime = U64_MAX;
192+
int pick_cpu = -ENOENT;
193+
int cpu, i, j;
194+
195+
if (!per_cpu_dsq)
196+
return -ENOENT;
197+
198+
bpf_for(i, 0, LAVD_CPU_ID_MAX/64) {
199+
u64 cpumask = cpdomc->__cpumask[i];
200+
bpf_for(j, 0, 64) {
201+
if (cpumask & 0x1LLU << j) {
202+
u64 vtime;
203+
cpu = (i * 64) + j;
204+
if (cpu >= __nr_cpu_ids)
205+
break;
206+
vtime = peek_vtime(cpu_to_dsq(cpu));
207+
if (vtime < lowest_vtime) {
208+
lowest_vtime = vtime;
209+
pick_cpu = cpu;
210+
}
211+
}
212+
}
213+
}
214+
215+
return pick_cpu;
216+
}
217+
176218
/*
177219
* For simplicity, try to just steal from the CPU with
178220
* the highest number of queued_tasks in this domain.
@@ -256,7 +298,7 @@ static bool try_to_steal_task(struct cpdom_ctx *cpdomc)
256298
if (!READ_ONCE(cpdomc_pick->is_stealee) || !cpdomc_pick->is_valid)
257299
continue;
258300

259-
pick_cpu = pick_most_loaded_cpu(cpdomc_pick);
301+
pick_cpu = pick_cpu_with_lowest_vtime(cpdomc_pick);
260302
if (pick_cpu >= 0)
261303
dsq_id = cpu_to_dsq(pick_cpu);
262304
else
@@ -333,7 +375,7 @@ static bool force_to_steal_task(struct cpdom_ctx *cpdomc)
333375
if (!cpdomc_pick->is_valid)
334376
continue;
335377

336-
pick_cpu = pick_most_loaded_cpu(cpdomc_pick);
378+
pick_cpu = pick_cpu_with_lowest_vtime(cpdomc_pick);
337379
if (pick_cpu >= 0)
338380
dsq_id = cpu_to_dsq(pick_cpu);
339381
else
@@ -350,7 +392,6 @@ static bool force_to_steal_task(struct cpdom_ctx *cpdomc)
350392
static bool consume_task(u64 cpu_dsq_id, u64 cpdom_dsq_id)
351393
{
352394
struct cpdom_ctx *cpdomc;
353-
struct task_struct *p;
354395
u64 vtime = U64_MAX, dsq_id = cpu_dsq_id;
355396

356397
cpdomc = MEMBER_VPTR(cpdom_ctxs, [dsq_to_cpdom(cpdom_dsq_id)]);
@@ -364,20 +405,13 @@ static bool consume_task(u64 cpu_dsq_id, u64 cpdom_dsq_id)
364405
* a task from any of stealee domains probabilistically.
365406
*/
366407
if (nr_cpdoms > 1 && READ_ONCE(cpdomc->is_stealer) &&
367-
try_to_steal_task(cpdomc))
408+
try_to_steal_task(cpdomc))
368409
goto x_domain_migration_out;
369410

370411
if (per_cpu_dsq) {
371-
bpf_for_each(scx_dsq, p, cpu_dsq_id, 0) {
372-
vtime = p->scx.dsq_vtime;
373-
break;
374-
}
375-
376-
bpf_for_each(scx_dsq, p, cpdom_dsq_id, 0) {
377-
if (p->scx.dsq_vtime < vtime)
378-
dsq_id = cpdom_dsq_id;
379-
break;
380-
}
412+
vtime = peek_vtime(cpu_dsq_id);
413+
if (peek_vtime(cpdom_dsq_id) < vtime)
414+
dsq_id = cpdom_dsq_id;
381415
} else {
382416
dsq_id = cpdom_dsq_id;
383417
}

0 commit comments

Comments
 (0)