diff --git a/scheds/include/scx/common.bpf.h b/scheds/include/scx/common.bpf.h index 86abdb3c31..6d2f2e2a1a 100644 --- a/scheds/include/scx/common.bpf.h +++ b/scheds/include/scx/common.bpf.h @@ -103,6 +103,7 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; +struct task_struct *scx_bpf_task_acquire_remote_curr(s32 cpu) __ksym; struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; u64 scx_bpf_now(void) __ksym __weak; void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak; diff --git a/scheds/rust/scx_cosmos/src/bpf/main.bpf.c b/scheds/rust/scx_cosmos/src/bpf/main.bpf.c index 9db24bb51e..438f42e876 100644 --- a/scheds/rust/scx_cosmos/src/bpf/main.bpf.c +++ b/scheds/rust/scx_cosmos/src/bpf/main.bpf.c @@ -308,7 +308,14 @@ static inline const struct cpumask *get_idle_smtmask(s32 cpu) */ static inline bool is_cpu_idle(s32 cpu) { - return scx_bpf_cpu_rq(cpu)->curr->flags & PF_IDLE; + struct task_struct *p = scx_bpf_task_acquire_remote_curr(cpu); + bool is_idle; + + if (!p) + return false; + is_idle = p->flags & PF_IDLE; + bpf_task_release(p); + return is_idle; } /* diff --git a/scheds/rust/scx_flash/src/bpf/main.bpf.c b/scheds/rust/scx_flash/src/bpf/main.bpf.c index d145fd2848..b21cbb2655 100644 --- a/scheds/rust/scx_flash/src/bpf/main.bpf.c +++ b/scheds/rust/scx_flash/src/bpf/main.bpf.c @@ -1985,15 +1985,13 @@ static int tickless_timerfn(void *map, int *key, struct bpf_timer *timer) */ bpf_rcu_read_lock(); bpf_for(cpu, 0, nr_cpu_ids) { - struct task_struct *p; - struct rq *rq = scx_bpf_cpu_rq(cpu); + struct task_struct *p = scx_bpf_task_acquire_remote_curr(cpu); - if (!rq) + if (!p) continue; /* * Ignore CPU if idle task is running. */ - p = rq->curr; if (p->flags & PF_IDLE) continue; @@ -2009,6 +2007,7 @@ static int tickless_timerfn(void *map, int *key, struct bpf_timer *timer) */ if (p->scx.slice == SCX_SLICE_INF) p->scx.slice = slice_min; + bpf_task_release(p); } bpf_rcu_read_unlock(); diff --git a/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c b/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c index 62b09f4b89..6434479fb1 100644 --- a/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/preempt.bpf.c @@ -189,11 +189,10 @@ static void ask_cpu_yield_after(struct cpu_ctx *victim_cpuc, u64 new_slice) * set the victim task's time slice to zero so the victim task yields * the CPU in the next scheduling point. */ - struct rq *victim_rq; struct task_struct *victim_p; - victim_rq = scx_bpf_cpu_rq(victim_cpuc->cpu_id); - if (victim_rq && (victim_p = victim_rq->curr)) { + victim_p = scx_bpf_task_acquire_remote_curr(victim_cpuc->cpu_id); + if (victim_p) { /* * Finding a victim is racy, but we do not coordinate. Thus, * two different CPUs can choose the same victim CPU. We do not @@ -213,8 +212,10 @@ static void ask_cpu_yield_after(struct cpu_ctx *victim_cpuc, u64 new_slice) * (SCX_SLICE_DFL, 20 msec). */ u64 old = victim_cpuc->est_stopping_clk; - if (!old) + if (!old) { + bpf_task_release(victim_p); return; + } /* * If the new slice is one, this is the last time to be kicked, @@ -232,6 +233,7 @@ static void ask_cpu_yield_after(struct cpu_ctx *victim_cpuc, u64 new_slice) if (victim_p->scx.slice > new_slice) WRITE_ONCE(victim_p->scx.slice, new_slice); } + bpf_task_release(victim_p); } } diff --git a/scheds/rust/scx_layered/src/bpf/main.bpf.c b/scheds/rust/scx_layered/src/bpf/main.bpf.c index 571b73e95e..e6246eb7dc 100644 --- a/scheds/rust/scx_layered/src/bpf/main.bpf.c +++ b/scheds/rust/scx_layered/src/bpf/main.bpf.c @@ -1249,7 +1249,6 @@ static bool try_preempt_cpu(s32 cand, struct task_struct *p, struct task_ctx *ta struct layer *layer, u64 flags) { struct cpu_ctx *cpuc, *cand_cpuc, *sib_cpuc = NULL; - struct rq *rq; struct task_struct *curr; const struct cpumask *idle_cpumask; bool cand_idle; @@ -1276,19 +1275,20 @@ static bool try_preempt_cpu(s32 cand, struct task_struct *p, struct task_ctx *ta if (scx_bpf_dsq_nr_queued(SCX_DSQ_LOCAL_ON | cand)) return false; - rq = scx_bpf_cpu_rq(cand); - if (!rq) + curr = scx_bpf_task_acquire_remote_curr(cand); + if (!curr) return false; - curr = rq->curr; if (ext_sched_class_addr && idle_sched_class_addr && ((u64)curr->sched_class != ext_sched_class_addr) && ((u64)curr->sched_class != idle_sched_class_addr)) { + bpf_task_release(curr); if (!(cpuc = lookup_cpu_ctx(-1))) return false; gstat_inc(GSTAT_SKIP_PREEMPT, cpuc); return false; } + bpf_task_release(curr); /* * Don't preempt if protection against is in effect. However, open diff --git a/scheds/rust/scx_tickless/src/bpf/main.bpf.c b/scheds/rust/scx_tickless/src/bpf/main.bpf.c index 356d5244f0..f0c0fe36a2 100644 --- a/scheds/rust/scx_tickless/src/bpf/main.bpf.c +++ b/scheds/rust/scx_tickless/src/bpf/main.bpf.c @@ -414,12 +414,17 @@ static int sched_timerfn(void *map, int *key, struct bpf_timer *timer) */ bpf_for(cpu, 0, nr_cpu_ids) { struct task_struct *p; + bool idle; /* * Ignore CPU if idle task is running. */ - p = scx_bpf_cpu_rq(cpu)->curr; - if (p->flags & PF_IDLE) + p = scx_bpf_task_acquire_remote_curr(cpu); + if (!p) + continue; + idle = p->flags & PF_IDLE; + bpf_task_release(p); + if (idle) continue; /*