Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions scheds/rust/scx_layered/src/bpf/intf.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ enum layer_stat_id {
LSTAT_XNUMA_MIGRATION,
LSTAT_XLLC_MIGRATION,
LSTAT_XLLC_MIGRATION_SKIP,
LSTAT_LLC_STICKY_SKIP,
LSTAT_XLAYER_WAKE,
LSTAT_XLAYER_REWAKE,
LSTAT_LLC_DRAIN_TRY,
Expand Down Expand Up @@ -348,6 +349,7 @@ struct layer {
u64 disallow_open_after_ns;
u64 disallow_preempt_after_ns;
u64 xllc_mig_min_ns;
u32 llc_sticky_runs;

int kind;
bool preempt;
Expand Down
70 changes: 68 additions & 2 deletions scheds/rust/scx_layered/src/bpf/main.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,7 @@ struct task_ctx {
u64 runtime_avg;
u64 dsq_id;
u32 llc_id;
u32 llc_runs;

/* for llcc->queue_runtime */
u32 qrt_layer_id;
Expand Down Expand Up @@ -1281,6 +1282,14 @@ s32 pick_idle_cpu(struct task_struct *p, s32 prev_cpu,
if ((cpu = pick_idle_cpu_from(cpumask, prev_cpu, idle_smtmask, layer)) >= 0)
goto out_put;

/* Check if task is sticky to current LLC */
if (layer->llc_sticky_runs > 0 &&
taskc->llc_runs < layer->llc_sticky_runs) {
lstat_inc(LSTAT_LLC_STICKY_SKIP, layer, cpuc);
cpu = -1;
goto out_put;
}

if (!(prev_llcc = lookup_llc_ctx(prev_cpuc->llc_id)) ||
prev_llcc->queued_runtime[layer_id] < layer->xllc_mig_min_ns) {
lstat_inc(LSTAT_XLLC_MIGRATION_SKIP, layer, cpuc);
Expand Down Expand Up @@ -1365,6 +1374,7 @@ bool maybe_update_task_llc(struct task_struct *p, struct task_ctx *taskc, s32 ne
p->scx.dsq_vtime = new_llcc->vtime_now[layer_id] + vtime_delta;

taskc->llc_id = new_llc_id;
taskc->llc_runs = 0;
return true;
}

Expand Down Expand Up @@ -2174,6 +2184,7 @@ static __always_inline bool try_consume_layer(u32 layer_id, struct cpu_ctx *cpuc
u32 nid = llc_node_id(llcc->id);
bool xllc_mig_skipped = false;
bool skip_remote_node;
u64 dsq_id;
u32 u;

if (!(layer = lookup_layer(layer_id)))
Expand All @@ -2196,6 +2207,8 @@ static __always_inline bool try_consume_layer(u32 layer_id, struct cpu_ctx *cpuc
return false;
}

dsq_id = layer_dsq_id(layer_id, *llc_idp);

if (u > 0) {
struct llc_ctx *remote_llcc;

Expand All @@ -2211,9 +2224,42 @@ static __always_inline bool try_consume_layer(u32 layer_id, struct cpu_ctx *cpuc
xllc_mig_skipped = true;
continue;
}

/*
* For remote LLC DSQs with LLC stickiness enabled, use
* DSQ iterator to validate tasks can migrate before
* dispatching directly.
*/
if (layer->llc_sticky_runs > 0 && bpf_ksym_exists(scx_bpf_dsq_move)) {
struct task_struct *p;
bool dispatched = false;

bpf_for_each(scx_dsq, p, dsq_id, 0) {
struct task_ctx *taskc;

if (!(taskc = lookup_task_ctx(p)))
continue;

if (taskc->llc_runs < layer->llc_sticky_runs) {
lstat_inc(LSTAT_LLC_STICKY_SKIP, layer, cpuc);
continue;
}

if (scx_bpf_dsq_move(BPF_FOR_EACH_ITER, p,
SCX_DSQ_LOCAL, 0)) {
dispatched = true;
break;
}
}

if (dispatched)
return true;

continue;
}
}

if (scx_bpf_dsq_move_to_local(layer_dsq_id(layer_id, *llc_idp)))
if (scx_bpf_dsq_move_to_local(dsq_id))
return true;
}

Expand Down Expand Up @@ -2546,7 +2592,22 @@ static __noinline bool match_one(struct layer *layer, struct layer_match *match,
bpf_rcu_read_unlock();
return result;
}
pid_t nspid = get_pid_nr_ns(p_pid, pid_ns);

/* Inline get_pid_nr_ns logic to avoid RCU lock crossing
* function boundary, this all depends on if it gets inlined so
* we can't just do:
* pid_t nspid = get_pid_nr_ns(p_pid, pid_ns);
*/
pid_t nspid = 0;
int level = BPF_CORE_READ(p_pid, level);
int ns_level = BPF_CORE_READ(pid_ns, level);
if (ns_level <= level) {
struct upid upid;
upid = BPF_CORE_READ(p_pid, numbers[ns_level]);
if (upid.ns == pid_ns)
nspid = upid.nr;
}

u64 nsid = BPF_CORE_READ(pid_ns, ns.inum);
bpf_rcu_read_unlock();
return (u32)nspid == match->pid && nsid == match->nsid;
Expand Down Expand Up @@ -3085,6 +3146,10 @@ void BPF_STRUCT_OPS(layered_running, struct task_struct *p)
if (time_before(llcc->vtime_now[layer_id], p->scx.dsq_vtime))
llcc->vtime_now[layer_id] = p->scx.dsq_vtime;

/* Increment LLC run counter if stickiness is enabled */
if (layer->llc_sticky_runs > 0)
taskc->llc_runs++;

cpuc->current_preempt = layer->preempt ||
(is_percpu_kthread(p) && is_percpu_kthread_preempting(p));
cpuc->used_at = now;
Expand Down Expand Up @@ -3439,6 +3504,7 @@ s32 BPF_STRUCT_OPS(layered_init_task, struct task_struct *p,
taskc->layer_id = MAX_LAYERS;
taskc->refresh_layer = true;
taskc->llc_id = MAX_LLCS;
taskc->llc_runs = 0;
taskc->qrt_layer_id = MAX_LLCS;
taskc->qrt_llc_id = MAX_LLCS;

Expand Down
2 changes: 2 additions & 0 deletions scheds/rust/scx_layered/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ pub struct LayerCommon {
pub disallow_preempt_after_us: Option<u64>,
#[serde(default)]
pub xllc_mig_min_us: f64,
#[serde(default)]
pub llc_sticky_runs: u32,
#[serde(default, skip_serializing)]
pub idle_smt: Option<bool>,
#[serde(default)]
Expand Down
6 changes: 6 additions & 0 deletions scheds/rust/scx_layered/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ lazy_static! {
disallow_open_after_us: None,
disallow_preempt_after_us: None,
xllc_mig_min_us: 1000.0,
llc_sticky_runs: 0,
growth_algo: LayerGrowthAlgo::Sticky,
idle_resume_us: None,
perf: 1024,
Expand Down Expand Up @@ -188,6 +189,7 @@ lazy_static! {
disallow_open_after_us: None,
disallow_preempt_after_us: None,
xllc_mig_min_us: 0.0,
llc_sticky_runs: 0,
growth_algo: LayerGrowthAlgo::Sticky,
perf: 1024,
idle_resume_us: None,
Expand Down Expand Up @@ -229,6 +231,7 @@ lazy_static! {
disallow_open_after_us: None,
disallow_preempt_after_us: None,
xllc_mig_min_us: 0.0,
llc_sticky_runs: 2,
growth_algo: LayerGrowthAlgo::Topo,
perf: 1024,
idle_resume_us: None,
Expand Down Expand Up @@ -268,6 +271,7 @@ lazy_static! {
disallow_open_after_us: None,
disallow_preempt_after_us: None,
xllc_mig_min_us: 100.0,
llc_sticky_runs: 0,
growth_algo: LayerGrowthAlgo::Linear,
perf: 1024,
idle_resume_us: None,
Expand Down Expand Up @@ -1908,6 +1912,7 @@ impl<'a> Scheduler<'a> {
disallow_open_after_us,
disallow_preempt_after_us,
xllc_mig_min_us,
llc_sticky_runs,
placement,
member_expire_ms,
..
Expand Down Expand Up @@ -1944,6 +1949,7 @@ impl<'a> Scheduler<'a> {
v => v * 1000,
};
layer.xllc_mig_min_ns = (xllc_mig_min_us * 1000.0) as u64;
layer.llc_sticky_runs = *llc_sticky_runs;
layer_weights.push(layer.weight.try_into().unwrap());
layer.perf = u32::try_from(*perf)?;
layer.node_mask = nodemask_from_nodes(nodes) as u64;
Expand Down
7 changes: 6 additions & 1 deletion scheds/rust/scx_layered/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ const LSTAT_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_MIGRATION as usize;
const LSTAT_XNUMA_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XNUMA_MIGRATION as usize;
const LSTAT_XLLC_MIGRATION: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION as usize;
const LSTAT_XLLC_MIGRATION_SKIP: usize = bpf_intf::layer_stat_id_LSTAT_XLLC_MIGRATION_SKIP as usize;
const LSTAT_LLC_STICKY_SKIP: usize = bpf_intf::layer_stat_id_LSTAT_LLC_STICKY_SKIP as usize;
const LSTAT_XLAYER_WAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_WAKE as usize;
const LSTAT_XLAYER_REWAKE: usize = bpf_intf::layer_stat_id_LSTAT_XLAYER_REWAKE as usize;
const LSTAT_LLC_DRAIN_TRY: usize = bpf_intf::layer_stat_id_LSTAT_LLC_DRAIN_TRY as usize;
Expand Down Expand Up @@ -184,6 +185,8 @@ pub struct LayerStats {
pub xllc_migration: f64,
#[stat(desc = "% migration skipped across LLCs due to xllc_mig_min_us")]
pub xllc_migration_skip: f64,
#[stat(desc = "% migration skipped across LLCs due to llc_sticky_runs")]
pub llc_sticky_skip: f64,
#[stat(desc = "% wakers across layers")]
pub xlayer_wake: f64,
#[stat(desc = "% rewakers across layers where waker has waken the task previously")]
Expand Down Expand Up @@ -306,6 +309,7 @@ impl LayerStats {
xlayer_rewake: lstat_pct(LSTAT_XLAYER_REWAKE),
xllc_migration: lstat_pct(LSTAT_XLLC_MIGRATION),
xllc_migration_skip: lstat_pct(LSTAT_XLLC_MIGRATION_SKIP),
llc_sticky_skip: lstat_pct(LSTAT_LLC_STICKY_SKIP),
llc_drain_try: lstat_pct(LSTAT_LLC_DRAIN_TRY),
llc_drain: lstat_pct(LSTAT_LLC_DRAIN),
skip_remote_node: lstat_pct(LSTAT_SKIP_REMOTE_NODE),
Expand Down Expand Up @@ -378,13 +382,14 @@ impl LayerStats {

writeln!(
w,
" {:<width$} open_idle={} mig={} xnuma_mig={} xllc_mig/skip={}/{} affn_viol={}",
" {:<width$} open_idle={} mig={} xnuma_mig={} xllc_mig/skip/sticky_skip={}/{}/{} affn_viol={}",
"",
fmt_pct(self.open_idle),
fmt_pct(self.migration),
fmt_pct(self.xnuma_migration),
fmt_pct(self.xllc_migration),
fmt_pct(self.xllc_migration_skip),
fmt_pct(self.llc_sticky_skip),
fmt_pct(self.affn_viol),
width = header_width,
)?;
Expand Down
Loading