Skip to content

Commit d629486

Browse files
committed
scx_rusty: set up AMD IBS performance counter
1 parent 2478aa2 commit d629486

File tree

3 files changed

+128
-23
lines changed

3 files changed

+128
-23
lines changed

scheds/rust/scx_rusty/src/bpf/main.bpf.c

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ struct {
198198
__uint(map_flags, 0);
199199
} node_data SEC(".maps");
200200

201+
201202
struct lock_wrapper {
202203
struct bpf_spin_lock lock;
203204
};
@@ -572,16 +573,45 @@ static void refresh_tune_params(void)
572573
}
573574
}
574575

576+
void *bpf_cast_to_kern_ctx(void *) __ksym;
577+
575578
/*
576579
* Performance counter callback.
577580
*/
578-
579581
SEC("perf_event")
580-
int drain_counters(void *ctx)
582+
int read_sample(struct bpf_perf_event_data_kern __kptr *arg)
581583
{
582-
scx_bpf_error("HIT");
583-
bpf_printk("Callback detected");
584-
return 1;
584+
struct bpf_perf_event_data_kern *ctx, a;
585+
struct perf_sample_data data;
586+
union perf_mem_data_src data_src;
587+
int ret;
588+
589+
ctx = bpf_cast_to_kern_ctx(arg);
590+
591+
if ((ret = bpf_probe_read_kernel(&a, sizeof(a), ctx))) {
592+
scx_bpf_error("[0] %s: bpf_probe_read_kernel failed", __func__);
593+
return -EACCES;
594+
}
595+
596+
if ((ret = bpf_probe_read_kernel(&data, sizeof(data), a.data))) {
597+
scx_bpf_error("%s: bpf_probe_read_kernel failed", __func__);
598+
return -EACCES;
599+
}
600+
601+
data_src = ctx->data->data_src;
602+
if (!ctx->data->sample_flags || data_src.mem_op == 1)
603+
return 0;
604+
605+
bpf_printk("%s\t(0x%lx,0x%lx,0x%lx) [%llx, %llx]",
606+
data_src.mem_op == 2 ? "STORE" : (data_src.mem_op == 4 ? "LOAD" : "UNKNOWN") ,
607+
data_src.mem_lvl_num,
608+
data_src.mem_snoop,
609+
data_src.mem_remote,
610+
ctx->data->phys_addr,
611+
ctx->data->addr
612+
);
613+
614+
return 0;
585615
}
586616

587617
static u64 min(u64 a, u64 b)

scheds/rust/scx_rusty/src/main.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,7 @@ struct Scheduler<'a> {
351351

352352
tuner: Tuner,
353353
stats_server: StatsServer<StatsCtx, (StatsCtx, ClusterStats)>,
354+
_pefds: Vec<(i32, libbpf_rs::Link)>,
354355
}
355356

356357
impl<'a> Scheduler<'a> {
@@ -448,10 +449,13 @@ impl<'a> Scheduler<'a> {
448449
// Attach.
449450
let mut skel = scx_ops_load!(skel, rusty, uei)?;
450451

451-
let (pefd, _link) = init_perf_counters(&mut skel, &0)?;
452-
println!("Got perf file descriptor {}", pefd);
452+
let mut pefds: Vec<(i32, libbpf_rs::Link)> = vec![];
453+
for i in 0..32 {
454+
pefds.push(init_perf_counters(&mut skel, &i)?);
455+
}
453456

454457
let struct_ops = Some(scx_ops_attach!(skel, rusty)?);
458+
455459
let stats_server = StatsServer::new(stats::server_data()).launch()?;
456460

457461
for (id, dom) in domains.doms().iter() {
@@ -489,6 +493,7 @@ impl<'a> Scheduler<'a> {
489493
opts.slice_us_overutil * 1000,
490494
)?,
491495
stats_server,
496+
_pefds: pefds,
492497
})
493498
}
494499

scheds/rust/scx_rusty/src/perf.rs

Lines changed: 86 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,83 @@ use std::mem;
1010

1111
// Expanded from systing's code: www.github.com/josefbacik/systing
1212

13-
const PERF_TYPE_HARDWARE: u32 = 0x0;
14-
const PERF_TYPE_SOFTWARE: u32 = 0x1;
15-
const PERF_TYPE_RAW: u32 = 0x3;
16-
const PERF_TYPE_IBS: u32 = 0xb;
17-
18-
const PERF_COUNT_HW_CPU_CYCLES: u64 = 0;
19-
const PERF_COUNT_HW_CACHE_REFERENCES: u64 = 2;
20-
const PERF_COUNT_HW_CACHE_MISSES: u64 = 3;
21-
const PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: u64 = 7;
22-
const PERF_COUNT_HW_STALLED_CYCLES_BACKEND: u64 = 8;
23-
24-
const PERF_COUNT_SW_CPU_CLOCK: u64 = 0;
13+
const _PERF_TYPE_HARDWARE: u32 = 0x0;
14+
const _PERF_TYPE_SOFTWARE: u32 = 0x1;
15+
const _PERF_TYPE_RAW: u32 = 0x3;
16+
const _PERF_TYPE_AMD_IBS: u32 = 0xb;
17+
18+
const _PERF_COUNT_HW_CPU_CYCLES: u64 = 0;
19+
const _PERF_COUNT_HW_CACHE_REFERENCES: u64 = 2;
20+
const _PERF_COUNT_HW_CACHE_MISSES: u64 = 3;
21+
const _PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: u64 = 7;
22+
const _PERF_COUNT_HW_STALLED_CYCLES_BACKEND: u64 = 8;
23+
24+
const _PERF_COUNT_SW_CPU_CLOCK: u64 = 0;
25+
26+
// WARNING: These are not guaranteed to be correct because the layout of the bitfield
27+
// in the perf_sample_attr C struct that contains them is not guaranteed by the C standard.
28+
const _PERF_SAMPLE_FLAG_DISABLED: u64 = 1 << 0;
29+
const _PERF_SAMPLE_FLAG_INHERIT: u64 = 1 << 1;
30+
const _PERF_SAMPLE_FLAG_PINNED: u64 = 1 << 2;
31+
const _PERF_SAMPLE_FLAG_EXCLUSIVE: u64 = 1 << 3;
32+
const _PERF_SAMPLE_FLAG_EXCLUDE_USER: u64 = 1 << 4;
33+
const _PERF_SAMPLE_FLAG_EXCLUDE_KERNEL: u64 = 1 << 5;
34+
const _PERF_SAMPLE_FLAG_EXCLUDE_HV: u64 = 1 << 6;
35+
const _PERF_SAMPLE_FLAG_EXCLUDE_IDLE: u64 = 1 << 7;
36+
const _PERF_SAMPLE_FLAG_MMAP: u64 = 1 << 8;
37+
const _PERF_SAMPLE_FLAG_COMM: u64 = 1 << 9;
38+
const _PERF_SAMPLE_FLAG_FREQ: u64 = 1 << 10;
39+
const _PERF_SAMPLE_FLAG_INHERIT_STAT: u64 = 1 << 11;
40+
const _PERF_SAMPLE_FLAG_ENABLE_ON_EXEC: u64 = 1 << 12;
41+
const _PERF_SAMPLE_FLAG_TASK: u64 = 1 << 13;
42+
const _PERF_SAMPLE_FLAG_WATERMARK: u64 = 1 << 14;
43+
const _PERF_SAMPLE_FLAG_PRECISE_IP: u64 = 1 << 15;
44+
const _PERF_SAMPLE_FLAG_MMAP_DATA: u64 = 1 << 17;
45+
const _PERF_SAMPLE_FLAG_ID_ALL: u64 = 1 << 18;
46+
const _PERF_SAMPLE_FLAG_EXCLUDE_HOST: u64 = 1 << 19;
47+
const _PERF_SAMPLE_FLAG_EXCLUDE_GUEST: u64 = 1 << 20;
48+
const _PERF_SAMPLE_FLAG_EXCLUDE_CALLCHAIN_KERNEL: u64 = 1 << 21;
49+
const _PERF_SAMPLE_FLAG_EXCLUDE_CALLCHAIN_USER: u64 = 1 << 22;
50+
const _PERF_SAMPLE_FLAG_MMAP2: u64 = 1 << 23;
51+
const _PERF_SAMPLE_FLAG_COMM_EXEC: u64 = 1 << 24;
52+
const _PERF_SAMPLE_FLAG_USE_CLOCKID: u64 = 1 << 25;
53+
const _PERF_SAMPLE_FLAG_WRITE_BACKWARD: u64 = 1 << 26;
54+
const _PERF_SAMPLE_FLAG_NAMESPACES: u64 = 1 << 27;
55+
const _PERF_SAMPLE_FLAG_KSYMBOL: u64 = 1 << 28;
56+
const _PERF_SAMPLE_FLAG_BPF_SYMBOL: u64 = 1 << 29;
57+
const _PERF_SAMPLE_FLAG_AUX_OUTPUT: u64 = 1 << 30;
58+
const _PERF_SAMPLE_FLAG_CGROUP: u64 = 1 << 31;
59+
const _PERF_SAMPLE_FLAG_TEXT_POKE: u64 = 1 << 32;
60+
const _PERF_SAMPLE_FLAG_BUILD_ID: u64 = 1 << 33;
61+
const _PERF_SAMPLE_FLAG_INHERIT_THREAD: u64 = 1 << 34;
62+
const _PERF_SAMPLE_FLAG_REMOVE_ON_EXEC: u64 = 1 << 35;
63+
const _PERF_SAMPLE_FLAG_SIGTRAP: u64 = 1 << 36;
64+
65+
const _PERF_SAMPLE_IP: u64 = 1 << 0;
66+
const _PERF_SAMPLE_TID: u64 = 1 << 1;
67+
const _PERF_SAMPLE_TIME: u64 = 1 << 2;
68+
const _PERF_SAMPLE_ADDR: u64 = 1 << 3;
69+
const _PERF_SAMPLE_READ: u64 = 1 << 4;
70+
const _PERF_SAMPLE_CALLCHAIN: u64 = 1 << 5;
71+
const _PERF_SAMPLE_ID: u64 = 1 << 6;
72+
const _PERF_SAMPLE_CPU: u64 = 1 << 7;
73+
const _PERF_SAMPLE_PERIOD: u64 = 1 << 8;
74+
const _PERF_SAMPLE_STREAM_ID: u64 = 1 << 9;
75+
const _PERF_SAMPLE_RAW: u64 = 1 << 10;
76+
const _PERF_SAMPLE_BRANCH_STACK: u64 = 1 << 11;
77+
const _PERF_SAMPLE_REGS_USER: u64 = 1 << 12;
78+
const _PERF_SAMPLE_STACK_USER: u64 = 1 << 13;
79+
const _PERF_SAMPLE_WEIGHT: u64 = 1 << 14;
80+
const _PERF_SAMPLE_DATA_SRC: u64 = 1 << 15;
81+
const _PERF_SAMPLE_IDENTIFIER: u64 = 1 << 16;
82+
const _PERF_SAMPLE_TRANSACTION: u64 = 1 << 17;
83+
const _PERF_SAMPLE_REGS_INTR: u64 = 1 << 18;
84+
const _PERF_SAMPLE_PHYS_ADDR: u64 = 1 << 19;
85+
const _PERF_SAMPLE_PHYS_AUX: u64 = 1 << 20;
86+
const _PERF_SAMPLE_PHYS_CGROUP: u64 = 1 << 21;
87+
const _PERF_SAMPLE_DATA_PAGE_SIZE: u64 = 1 << 22;
88+
const _PERF_SAMPLE_CODE_PAGE_SIZE: u64 = 1 << 23;
89+
const _PERF_SAMPLE_WEIGHT_STRUCT: u64 = 1 << 24;
2590

2691
#[repr(C)]
2792
union sample_un {
@@ -141,19 +206,24 @@ pub fn init_perf_counters(skel: &mut BpfSkel, cpu: &i32) -> Result<(i32, libbpf_
141206
)
142207
};
143208

144-
attr._type = PERF_TYPE_HARDWARE;
209+
/*
210+
* XXX Discover the counter instead of hardcoding it.
211+
* Afterwards we can use any counter we care for.
212+
*/
213+
attr._type = _PERF_TYPE_AMD_IBS;
145214
attr.size = mem::size_of::<perf_event_attr>() as u32;
146-
attr.config = PERF_COUNT_HW_CPU_CYCLES;
215+
attr.config = 0;
216+
attr.sample_type = _PERF_SAMPLE_CPU | _PERF_SAMPLE_IP | _PERF_SAMPLE_TID | _PERF_SAMPLE_DATA_SRC | _PERF_SAMPLE_PHYS_ADDR | _PERF_SAMPLE_ADDR;
147217
attr.sample.sample_period = 1000;
148-
attr.flags = 0;
218+
attr.flags = 3 * _PERF_SAMPLE_FLAG_PRECISE_IP;
149219

150220
let pefd = perf_event_open(attr.as_ref(), -1, *cpu, -1, 0) as i32;
151221
if pefd == -1 {
152222
let os_error = io::Error::last_os_error();
153223
return Err(libbpf_rs::Error::from(os_error));
154224
}
155225

156-
let link = skel.progs.drain_counters.attach_perf_event_with_opts(pefd);
226+
let link = skel.progs.read_sample.attach_perf_event_with_opts(pefd);
157227

158228
Ok((pefd, link?))
159229
}

0 commit comments

Comments
 (0)