Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry pick 6.11.4 verifier fix #89

Merged
merged 9 commits into from
Nov 12, 2024
2 changes: 1 addition & 1 deletion driver/bpf/probe.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ BPF_PROBE("sched/", sched_switch, sched_switch_args) {

evt_type = PPME_SCHEDSWITCH_6_E;

call_filler(ctx, ctx, evt_type, 0, -1);
call_filler(ctx, ctx, evt_type, UF_ALWAYS_DROP, -1);
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion driver/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2393,7 +2393,7 @@ TRACEPOINT_PROBE(sched_switch_probe,
* handler calling printk() and potentially deadlocking the system.
*/
record_event_all_consumers(PPME_SCHEDSWITCH_6_E,
UF_USED | UF_ATOMIC,
UF_ALWAYS_DROP | UF_ATOMIC,
&event_data,
KMOD_PROG_SCHED_SWITCH);
}
Expand Down
5 changes: 0 additions & 5 deletions driver/modern_bpf/helpers/base/maps_getters.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,6 @@ static __always_inline uint8_t maps__64bit_sampling_syscall_table(uint32_t sysca
return g_64bit_sampling_syscall_table[syscall_id & (SYSCALL_TABLE_SIZE - 1)];
}

static __always_inline uint8_t maps__64bit_sampling_tracepoint_table(uint32_t event_id) {
return g_64bit_sampling_tracepoint_table[event_id < PPM_EVENT_MAX ? event_id
: PPM_EVENT_MAX - 1];
}

/*=============================== SAMPLING TABLES ===========================*/

/*=============================== SYSCALL-64 INTERESTING TABLE ===========================*/
Expand Down
23 changes: 4 additions & 19 deletions driver/modern_bpf/helpers/interfaces/attached_programs.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,11 @@

#include <helpers/base/maps_getters.h>

/* This enum is used to tell if we are considering a syscall or a tracepoint */
enum intrumentation_type {
MODERN_BPF_SYSCALL = 0,
MODERN_BPF_TRACEPOINT = 1,
};

/* The sampling logic is used by all BPF programs attached to the kernel.
* We treat the syscalls tracepoints in a dedicated way because they could generate
* more than one event (1 for each syscall) for this reason we need a dedicated table.
*/
static __always_inline bool sampling_logic(void* ctx, uint32_t id, enum intrumentation_type type) {
static __always_inline bool sampling_logic(void* ctx, uint32_t id) {
/* If dropping mode is not enabled we don't perform any sampling
* false: means don't drop the syscall
* true: means drop the syscall
Expand All @@ -29,16 +23,7 @@ static __always_inline bool sampling_logic(void* ctx, uint32_t id, enum intrumen
return false;
}

uint8_t sampling_flag = 0;

/* If we have a syscall we use the sampling_syscall_table otherwise
* with tracepoints we use the sampling_tracepoint_table.
*/
if(type == MODERN_BPF_SYSCALL) {
sampling_flag = maps__64bit_sampling_syscall_table(id);
} else {
sampling_flag = maps__64bit_sampling_tracepoint_table(id);
}
uint8_t sampling_flag = maps__64bit_sampling_syscall_table(id);

if(sampling_flag == UF_NEVER_DROP) {
return false;
Expand All @@ -59,15 +44,15 @@ static __always_inline bool sampling_logic(void* ctx, uint32_t id, enum intrumen
* an iteration we will synchronize again the next time the logic is enabled.
*/
maps__set_is_dropping(true);
bpf_tail_call(ctx, &extra_event_prog_tail_table, T1_DROP_E);
bpf_tail_call(ctx, &extra_syscall_calls, T1_DROP_E);
bpf_printk("unable to tail call into 'drop_e' prog");
}
return true;
}

if(maps__get_is_dropping()) {
maps__set_is_dropping(false);
bpf_tail_call(ctx, &extra_event_prog_tail_table, T1_DROP_X);
bpf_tail_call(ctx, &extra_syscall_calls, T1_DROP_X);
bpf_printk("unable to tail call into 'drop_x' prog");
}

Expand Down
48 changes: 8 additions & 40 deletions driver/modern_bpf/helpers/store/auxmap_store_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,15 @@ static __always_inline void auxmap__finalize_event_header(struct auxiliary_map *
* of events sent to userspace, otherwise we increment the dropped events.
*
* @param auxmap pointer to the auxmap in which we have already written the entire event.
* @param rb pointer to the ringbuffer where the event should be published to.
*/
static __always_inline void auxmap__submit_event_base(struct auxiliary_map *auxmap,
struct ringbuf_map *rb) {
static __always_inline void auxmap__submit_event(struct auxiliary_map *auxmap) {
struct ringbuf_map *rb = maps__get_ringbuf_map();
if(!rb) {
// this should never happen because we check it in sys_enter/sys_exit
bpf_printk("FAILURE: unable to obtain the ring buffer");
return;
}

struct counter_map *counter = maps__get_counter_map();
if(!counter) {
return;
Expand All @@ -147,43 +152,6 @@ static __always_inline void auxmap__submit_event_base(struct auxiliary_map *auxm
return;
}

/**
* @brief Try to copy the entire event from the auxiliary map to bpf ringbuf.
* If the event is correctly copied in the ringbuf we increment the number
* of events sent to userspace, otherwise we increment the dropped events.
*
* @param auxmap pointer to the auxmap in which we have already written the entire event.
* @returns 0 if we got the ringbuffer correctly.
*/
static __always_inline int auxmap__try_submit_event(struct auxiliary_map *auxmap) {
struct ringbuf_map *rb = maps__get_ringbuf_map();
if(!rb) {
return 1;
}

auxmap__submit_event_base(auxmap, rb);
return 0;
}

/**
* @brief Copy the entire event from the auxiliary map to bpf ringbuf.
* If the event is correctly copied in the ringbuf we increment the number
* of events sent to userspace, otherwise we increment the dropped events.
*
* @param auxmap pointer to the auxmap in which we have already written the entire event.
* @param ctx BPF prog context
*/
static __always_inline void auxmap__submit_event(struct auxiliary_map *auxmap, void *ctx) {
struct ringbuf_map *rb = maps__get_ringbuf_map();
if(!rb) {
bpf_tail_call(ctx, &extra_event_prog_tail_table, T1_HOTPLUG_E);
bpf_printk("failed to tail call into the 'hotplug' prog");
return;
}

auxmap__submit_event_base(auxmap, rb);
}

/////////////////////////////////
// STORE EVENT PARAMS INTO THE AUXILIARY MAP
////////////////////////////////
Expand Down
6 changes: 2 additions & 4 deletions driver/modern_bpf/helpers/store/ringbuf_store_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,16 @@ struct ringbuf_struct {
* to know the event dimension at compile time.
*
* @param ringbuf pointer to the `ringbuf_struct`
* @param ctx BPF prog context
* @param event_size exact size of the fixed-size event
* @return `1` in case of success, `0` in case of failure.
*/
static __always_inline uint32_t ringbuf__reserve_space(struct ringbuf_struct *ringbuf,
void *ctx,
uint32_t event_size,
uint16_t event_type) {
struct ringbuf_map *rb = maps__get_ringbuf_map();
if(!rb) {
bpf_tail_call(ctx, &extra_event_prog_tail_table, T1_HOTPLUG_E);
bpf_printk("failed to tail call into the 'hotplug' prog");
// this should never happen because we check it in sys_enter/sys_exit
bpf_printk("FAILURE: unable to obtain the ring buffer");
return 0;
}

Expand Down
13 changes: 2 additions & 11 deletions driver/modern_bpf/maps/maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,6 @@ __weak bool g_64bit_interesting_syscalls_table[SYSCALL_TABLE_SIZE];
*/
__weak uint8_t g_64bit_sampling_syscall_table[SYSCALL_TABLE_SIZE];

/**
* @brief Given the tracepoint enum returns:
* - `UF_NEVER_DROP` if the syscall must not be dropped in the sampling logic.
* - `UF_ALWAYS_DROP` if the syscall must always be dropped in the sampling logic.
* - `UF_NONE` if we drop the syscall depends on the sampling ratio.
*/
/// TOOD: we need to change the dimension! we need to create a dedicated enum for tracepoints!
__weak uint8_t g_64bit_sampling_tracepoint_table[PPM_EVENT_MAX];

/**
* @brief Given the syscall id on 32-bit x86 arch returns
* its x64 value. Used to support ia32 syscall emulation.
Expand Down Expand Up @@ -131,15 +122,15 @@ struct {
* programs directly attached in the kernel (like page_faults,
* context_switch, ...) and by syscall_events (like
* ppme_syscall_execveat_x, ...).
* Given a predefined tail-code (`extra_event_prog_code`), it calls
* Given a predefined tail-code (`extra_syscall_codes`), it calls
* the right bpf program.
*/
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(max_entries, TAIL_EXTRA_EVENT_PROG_MAX);
__type(key, uint32_t);
__type(value, uint32_t);
} extra_event_prog_tail_table __weak SEC(".maps");
} extra_syscall_calls __weak SEC(".maps");

/*=============================== BPF_MAP_TYPE_PROG_ARRAY ===============================*/

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ int BPF_PROG(sys_enter, struct pt_regs *regs, long syscall_id) {
return 0;
}

if(sampling_logic(ctx, syscall_id, MODERN_BPF_SYSCALL)) {
if(sampling_logic(ctx, syscall_id)) {
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,24 @@ int BPF_PROG(sys_exit, struct pt_regs *regs, long ret) {
return 0;
}

if(sampling_logic(ctx, syscall_id, MODERN_BPF_SYSCALL)) {
if(sampling_logic(ctx, syscall_id)) {
return 0;
}

if(maps__get_drop_failed() && ret < 0) {
return 0;
}

// If we cannot find a ring buffer for this CPU we probably have an hotplug event. It's ok to
// check only in the exit path since we will always have at least one exit syscall enabled. If
// we change our architecture we may need to update this logic.
struct ringbuf_map *rb = maps__get_ringbuf_map();
if(!rb) {
bpf_tail_call(ctx, &extra_syscall_calls, T1_HOTPLUG_E);
bpf_printk("failed to tail call into the 'hotplug' prog");
return 0;
}

bpf_tail_call(ctx, &syscall_exit_tail_table, syscall_id);

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
#ifdef CAPTURE_PAGE_FAULTS
SEC("tp_btf/page_fault_kernel")
int BPF_PROG(pf_kernel, unsigned long address, struct pt_regs *regs, unsigned long error_code) {
if(sampling_logic(ctx, PPME_PAGE_FAULT_E, MODERN_BPF_TRACEPOINT)) {
// In case of dropping mode we don't want this kind of events.
if(maps__get_dropping_mode()) {
return 0;
}

struct ringbuf_struct ringbuf;
if(!ringbuf__reserve_space(&ringbuf, ctx, PAGE_FAULT_SIZE, PPME_PAGE_FAULT_E)) {
if(!ringbuf__reserve_space(&ringbuf, PAGE_FAULT_SIZE, PPME_PAGE_FAULT_E)) {
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
#ifdef CAPTURE_PAGE_FAULTS
SEC("tp_btf/page_fault_user")
int BPF_PROG(pf_user, unsigned long address, struct pt_regs *regs, unsigned long error_code) {
if(sampling_logic(ctx, PPME_PAGE_FAULT_E, MODERN_BPF_TRACEPOINT)) {
// In case of dropping mode we don't want this kind of events.
if(maps__get_dropping_mode()) {
return 0;
}

struct ringbuf_struct ringbuf;
if(!ringbuf__reserve_space(&ringbuf, ctx, PAGE_FAULT_SIZE, PPME_PAGE_FAULT_E)) {
if(!ringbuf__reserve_space(&ringbuf, PAGE_FAULT_SIZE, PPME_PAGE_FAULT_E)) {
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,37 @@
* struct linux_binprm *bprm)
*/
#ifdef CAPTURE_SCHED_PROC_EXEC

enum extra_sched_proc_exec_codes {
T1_SCHED_PROC_EXEC,
T2_SCHED_PROC_EXEC,
// add more codes here.
T_SCHED_PROC_EXEC_MAX,
};

/*
* FORWARD DECLARATIONS:
* See the `BPF_PROG` macro in libbpf `libbpf/src/bpf_tracing.h`
* #define BPF_PROG(name, args...) \
* name(unsigned long long *ctx); \
*/
int t1_sched_p_exec(unsigned long long *ctx);
int t2_sched_p_exec(unsigned long long *ctx);

struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(max_entries, T_SCHED_PROC_EXEC_MAX);
__uint(key_size, sizeof(__u32));
__array(values, int(void *));
} extra_sched_proc_exec_calls SEC(".maps") = {
.values =
{
[T1_SCHED_PROC_EXEC] = (void *)&t1_sched_p_exec,
[T2_SCHED_PROC_EXEC] = (void *)&t2_sched_p_exec,
// add more tail calls here.
},
};

/* chose a short name for bpftool debugging*/
SEC("tp_btf/sched_process_exec")
int BPF_PROG(sched_p_exec, struct task_struct *p, pid_t old_pid, struct linux_binprm *bprm) {
Expand Down Expand Up @@ -114,7 +145,7 @@ int BPF_PROG(sched_p_exec, struct task_struct *p, pid_t old_pid, struct linux_bi

/*=============================== COLLECT PARAMETERS ===========================*/

bpf_tail_call(ctx, &extra_event_prog_tail_table, T1_SCHED_PROC_EXEC);
bpf_tail_call(ctx, &extra_sched_proc_exec_calls, T1_SCHED_PROC_EXEC);
return 0;
}

Expand Down Expand Up @@ -234,11 +265,11 @@ int BPF_PROG(t1_sched_p_exec, struct task_struct *p, pid_t old_pid, struct linux

/*=============================== COLLECT PARAMETERS ===========================*/

bpf_tail_call(ctx, &extra_event_prog_tail_table, T2_SCHED_PROC_EXEC);
bpf_tail_call(ctx, &extra_sched_proc_exec_calls, T2_SCHED_PROC_EXEC);
return 0;
}

SEC("tp_btf/sys_exit")
SEC("tp_btf/sched_process_exec")
int BPF_PROG(t2_sched_p_exec, struct pt_regs *regs, long ret) {
struct auxiliary_map *auxmap = auxmap__get();
if(!auxmap) {
Expand All @@ -261,7 +292,7 @@ int BPF_PROG(t2_sched_p_exec, struct pt_regs *regs, long ret) {

auxmap__finalize_event_header(auxmap);

auxmap__submit_event(auxmap, ctx);
auxmap__submit_event(auxmap);
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ int BPF_PROG(sched_proc_exit, struct task_struct *task) {

auxmap__finalize_event_header(auxmap);

auxmap__submit_event(auxmap, ctx);
auxmap__submit_event(auxmap);

return 0;
}
Loading