66#include <scx/percpu.bpf.h>
77#include "intf.h"
88
9+ /*
10+ * Maximum amount of CPUs supported by the scheduler when flat or preferred
11+ * idle CPU scan is enabled.
12+ */
13+ #define MAX_CPUS 1024
14+
915/*
1016 * Maximum rate of task wakeups/sec (tasks with a higher rate are capped to
1117 * this value).
@@ -75,6 +81,21 @@ const volatile bool local_pcpu = true;
7581 */
7682volatile s64 cpufreq_perf_lvl ;
7783
84+ /*
85+ * Enable preferred cores prioritization.
86+ */
87+ const volatile bool preferred_idle_scan ;
88+
89+ /*
90+ * CPUs sorted by their capacity in descendent order.
91+ */
92+ const volatile u64 preferred_cpus [MAX_CPUS ];
93+
94+ /*
95+ * Cache CPU capacity values.
96+ */
97+ const volatile u64 cpu_capacity [MAX_CPUS ];
98+
7899/*
79100 * Scheduling statistics.
80101 */
@@ -252,6 +273,33 @@ static inline const struct cpumask *get_idle_cpumask(s32 cpu)
252273 return __COMPAT_scx_bpf_get_idle_cpumask_node (__COMPAT_scx_bpf_cpu_node (cpu ));
253274}
254275
276+ /*
277+ * Return the cpumask of fully idle SMT cores within the NUMA node that
278+ * contains @cpu.
279+ *
280+ * If NUMA support is disabled, @cpu is ignored.
281+ */
282+ static inline const struct cpumask * get_idle_smtmask (s32 cpu )
283+ {
284+ if (!numa_enabled )
285+ return scx_bpf_get_idle_smtmask ();
286+
287+ return __COMPAT_scx_bpf_get_idle_smtmask_node (__COMPAT_scx_bpf_cpu_node (cpu ));
288+ }
289+
290+ /*
291+ * Return true if @cpu is valid, otherwise trigger an error and return
292+ * false.
293+ */
294+ static inline bool is_cpu_valid (s32 cpu )
295+ {
296+ if (cpu < 0 || cpu >= MAX_CPUS ) {
297+ scx_bpf_error ("invalid CPU id: %d" , cpu );
298+ return false;
299+ }
300+ return true;
301+ }
302+
255303/*
256304 * Return true if @this_cpu and @that_cpu are in the same LLC, false
257305 * otherwise.
@@ -261,6 +309,9 @@ static inline bool cpus_share_cache(s32 this_cpu, s32 that_cpu)
261309 if (this_cpu == that_cpu )
262310 return true;
263311
312+ if (!is_cpu_valid (this_cpu ) || !is_cpu_valid (that_cpu ))
313+ return false;
314+
264315 return cpu_llc_id (this_cpu ) == cpu_llc_id (that_cpu );
265316}
266317
@@ -272,7 +323,10 @@ static inline bool is_cpu_faster(s32 this_cpu, s32 that_cpu)
272323 if (this_cpu == that_cpu )
273324 return false;
274325
275- return cpu_priority (this_cpu ) > cpu_priority (that_cpu );
326+ if (!is_cpu_valid (this_cpu ) || !is_cpu_valid (that_cpu ))
327+ return false;
328+
329+ return cpu_capacity [this_cpu ] > cpu_capacity [that_cpu ];
276330}
277331
278332/*
@@ -329,6 +383,102 @@ static inline bool is_wakeup(u64 wake_flags)
329383 return wake_flags & SCX_WAKE_TTWU ;
330384}
331385
386+ /*
387+ * Try to pick the best idle CPU based on the @preferred_cpus ranking.
388+ * Return a full-idle SMT core if @do_idle_smt is true, or any idle CPU if
389+ * @do_idle_smt is false.
390+ */
391+ static s32 pick_idle_cpu_pref_smt (struct task_struct * p , s32 prev_cpu , bool is_prev_allowed ,
392+ const struct cpumask * primary , const struct cpumask * smt )
393+ {
394+ u64 max_cpus = MIN (nr_cpu_ids , MAX_CPUS );
395+ int i ;
396+
397+ if (is_prev_allowed &&
398+ (!primary || bpf_cpumask_test_cpu (prev_cpu , primary )) &&
399+ (!smt || bpf_cpumask_test_cpu (prev_cpu , smt )) &&
400+ scx_bpf_test_and_clear_cpu_idle (prev_cpu ))
401+ return prev_cpu ;
402+
403+ bpf_for (i , 0 , max_cpus ) {
404+ s32 cpu = preferred_cpus [i ];
405+
406+ if ((cpu == prev_cpu ) || !bpf_cpumask_test_cpu (cpu , p -> cpus_ptr ))
407+ continue ;
408+
409+ if ((!primary || bpf_cpumask_test_cpu (cpu , primary )) &&
410+ (!smt || bpf_cpumask_test_cpu (cpu , smt )) &&
411+ scx_bpf_test_and_clear_cpu_idle (cpu ))
412+ return cpu ;
413+ }
414+
415+ return - EBUSY ;
416+ }
417+
418+ /*
419+ * Return the optimal idle CPU for task @p or -EBUSY if no idle CPU is
420+ * found.
421+ */
422+ static s32 pick_idle_cpu_scan (struct task_struct * p , s32 prev_cpu )
423+ {
424+ const struct cpumask * smt , * primary ;
425+ bool is_prev_allowed = bpf_cpumask_test_cpu (prev_cpu , p -> cpus_ptr );
426+ s32 cpu ;
427+
428+ primary = !primary_all ? cast_mask (primary_cpumask ) : NULL ;
429+ smt = smt_enabled ? get_idle_smtmask (prev_cpu ) : NULL ;
430+
431+ /*
432+ * If the task can't migrate, there's no point looking for other
433+ * CPUs.
434+ */
435+ if (p -> nr_cpus_allowed == 1 || is_migration_disabled (p )) {
436+ if (scx_bpf_test_and_clear_cpu_idle (prev_cpu )) {
437+ cpu = prev_cpu ;
438+ goto out ;
439+ }
440+ }
441+
442+ if (!primary_all ) {
443+ if (smt_enabled ) {
444+ /*
445+ * Try to pick a full-idle core in the primary
446+ * domain.
447+ */
448+ cpu = pick_idle_cpu_pref_smt (p , prev_cpu , is_prev_allowed , primary , smt );
449+ if (cpu >= 0 )
450+ goto out ;
451+ }
452+
453+ /*
454+ * Try to pick any idle CPU in the primary domain.
455+ */
456+ cpu = pick_idle_cpu_pref_smt (p , prev_cpu , is_prev_allowed , primary , NULL );
457+ if (cpu >= 0 )
458+ goto out ;
459+ }
460+
461+ if (smt_enabled ) {
462+ /*
463+ * Try to pick any full-idle core in the system.
464+ */
465+ cpu = pick_idle_cpu_pref_smt (p , prev_cpu , is_prev_allowed , NULL , smt );
466+ if (cpu >= 0 )
467+ goto out ;
468+ }
469+
470+ /*
471+ * Try to pick any idle CPU in the system.
472+ */
473+ cpu = pick_idle_cpu_pref_smt (p , prev_cpu , is_prev_allowed , NULL , NULL );
474+
475+ out :
476+ if (smt )
477+ scx_bpf_put_cpumask (smt );
478+
479+ return cpu ;
480+ }
481+
332482/*
333483 * Pick an optimal idle CPU for task @p (as close as possible to
334484 * @prev_cpu).
@@ -341,6 +491,14 @@ static s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu, s32 this_cpu,
341491 const struct cpumask * primary = cast_mask (primary_cpumask );
342492 s32 cpu ;
343493
494+ /*
495+ * Use lightweight idle CPU scanning when flat or preferred idle
496+ * scan is enabled, unless the system is busy, in which case the
497+ * cpumask-based scanning is more efficient.
498+ */
499+ if (preferred_idle_scan )
500+ return pick_idle_cpu_scan (p , prev_cpu );
501+
344502 /*
345503 * Clear the wake sync bit if synchronous wakeups are disabled.
346504 */
0 commit comments