Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new(bpf-driver): Introduce bpf_probe_read_user and bpf_probe_read_kernel variants #809

Merged
merged 27 commits into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6f15920
new(bpf): introduce bpf_probe_read_{kernel,user}* variants
hbrueckner Sep 8, 2022
94d0521
update(bpf): correct bpf probe reads for kernel/user memory access
hbrueckner Sep 8, 2022
96479fc
new(bpf): provide _READP(P) variants for kernel/user memory reads
hbrueckner Sep 8, 2022
e2ac1c3
update(bpf): correct places where _READ(P) reads from user memory
hbrueckner Sep 8, 2022
92fc0a2
new(bpf): enable helpers to read from kernel and user space memory
hbrueckner Sep 8, 2022
82d6db5
update(bpf): call helpers with user/kernel memory arguments
hbrueckner Sep 8, 2022
2ae0e39
fix(bpf): Fix eBPF compilation on kernels <= 5.5
Molter73 Sep 12, 2022
6ceb32a
update(bpf): add feature gate for `bpf_probe_read_{kernel|user}*()` v…
hbrueckner Sep 12, 2022
4369ec0
update(bpf): clean-up curly braces
hbrueckner Sep 12, 2022
514e634
update(bpf): Extract helper for read values into the ring
hbrueckner Sep 12, 2022
a3bfd5b
update(bpf): add RHEL version check for USE_BPF_PROBE_KERNEL_USER_VAR…
hbrueckner Sep 12, 2022
278fa98
fix(bpf): Fix 'USE_BPF_PROBE_KERNEL_USER_VARIANTS' feature gate
Molter73 Sep 19, 2022
064b63a
fix(bpf): Fix verifier errors
Molter73 Sep 19, 2022
8a7bea3
update(bpf): remove duplicate _READ in bpf_addr_to_kernel
hbrueckner Sep 13, 2022
aa731a8
update(bpf): correct user/kernel bpf read for unix socket path
hbrueckner Sep 26, 2022
5406c3b
update(bpf): update fsconfig bpf probe reads for user/kernel
hbrueckner Oct 11, 2022
5281cb9
update(bpf): read upper_layer from kernel space
hbrueckner Dec 21, 2022
70e2076
update(bpf): read environ data in mm_struct from user space
hbrueckner Jan 16, 2023
1ac9d6e
update(bpf): read ipv4/6 addr/port from kernel sk structure
hbrueckner Jan 31, 2023
d14c702
update(bpf): read socket path from kernel unix_sock
hbrueckner Jan 31, 2023
d909034
update(bpf): provide socktuple / qeuelen for successful accept_x
hbrueckner Feb 1, 2023
e72d25a
fix(bpf): correct fsconfigX_failure when pushing empty PT_CHARBUF param
hbrueckner Feb 1, 2023
93592b1
update(bpf): correct flags param index for clone syscall on s390x
hbrueckner Feb 2, 2023
843b2e2
update(bpf): drop other child events when sched_proc_fork is used
hbrueckner Feb 3, 2023
28650fd
update(kmod): provide socktuple / qeuelen for successful accept_x
hbrueckner Feb 1, 2023
1c75bb2
chore(bpf): remove `memset` in `_READ()` macros
hbrueckner Feb 6, 2023
9b25675
update(bpf): initialize variable to make clang-15 happy
hbrueckner Feb 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions driver/bpf/bpf_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,27 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
(void *)BPF_FUNC_xdp_adjust_head;
static int (*bpf_probe_read_str)(void *dst, u64 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read_str;

#if defined(USE_BPF_PROBE_KERNEL_USER_VARIANTS)
static int (*bpf_probe_read_user)(void *dst, u32 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read_user;
static int (*bpf_probe_read_kernel)(void *dst, u32 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read_kernel;
static int (*bpf_probe_read_user_str)(void *dst, u32 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read_user_str;
static int (*bpf_probe_read_kernel_str)(void *dst, u32 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read_kernel_str;
#else
static int (*bpf_probe_read_user)(void *dst, u32 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read;
static int (*bpf_probe_read_kernel)(void *dst, u32 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read;
static int (*bpf_probe_read_user_str)(void *dst, u32 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read_str;
static int (*bpf_probe_read_kernel_str)(void *dst, u32 size, const void *unsafe_ptr) =
(void *)BPF_FUNC_probe_read_str;
#endif

hbrueckner marked this conversation as resolved.
Show resolved Hide resolved
static u64 (*bpf_get_current_task)(void) =
(void *)BPF_FUNC_get_current_task;
static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
Expand Down
186 changes: 131 additions & 55 deletions driver/bpf/filler_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ or GPL2.txt for full copies of the license.
#define MAX_PATH_COMPONENTS 16
#define MAX_PATH_LENGTH 4096

/* This enum is used to tell our helpers if they have to
* read from kernel or user memory.
*/
enum read_memory
{
USER = 0,
KERNEL = 1,
};

static __always_inline bool in_port_range(uint16_t port, uint16_t min, uint16_t max)
{
return port >= min && port <= max;
Expand Down Expand Up @@ -96,7 +105,7 @@ static __always_inline char *bpf_get_path(struct filler_data *data, int fd)
for(i = 1; i < MAX_PATH_COMPONENTS && i <= nreads && res >= 0; i++)
{
path_level = (nreads-i) & (MAX_PATH_COMPONENTS-1);
res = bpf_probe_read_str(&filepath[curoff_bounded], MAX_PATH_LENGTH,
res = bpf_probe_read_kernel_str(&filepath[curoff_bounded], MAX_PATH_LENGTH,
(const void*)pointers_buf[path_level]);
curoff_bounded = (curoff_bounded+res-1) & SCRATCH_SIZE_HALF;
if(i>1 && i<nreads && res>0)
Expand Down Expand Up @@ -255,9 +264,9 @@ static __always_inline bool bpf_getsockname(struct socket *sock,

#ifdef BPF_FORBIDS_ZERO_ACCESS
if (len > 0)
bpf_probe_read(sunaddr, ((len - 1) & 0xff) + 1, addr->name);
bpf_probe_read_kernel(sunaddr, ((len - 1) & 0xff) + 1, addr->name);
#else
bpf_probe_read(sunaddr, len, addr->name);
bpf_probe_read_kernel(sunaddr, len, addr->name);
#endif
}

Expand All @@ -273,16 +282,17 @@ static __always_inline bool bpf_getsockname(struct socket *sock,
static __always_inline int bpf_addr_to_kernel(void *uaddr, int ulen,
struct sockaddr *kaddr)
{
int len = _READ(ulen);
int len = ulen & 0xfff; /* required by BPF verifier */
Andreagit97 marked this conversation as resolved.
Show resolved Hide resolved

if (len < 0 || len > sizeof(struct sockaddr_storage))
return -EINVAL;
if (len == 0)
return 0;

#ifdef BPF_FORBIDS_ZERO_ACCESS
if (bpf_probe_read(kaddr, ((len - 1) & 0xff) + 1, uaddr))
if (bpf_probe_read_user(kaddr, ((len - 1) & 0xff) + 1, uaddr))
#else
if (bpf_probe_read(kaddr, len & 0xff, uaddr))
if (bpf_probe_read_user(kaddr, len & 0xff, uaddr))
#endif
return -EFAULT;

Expand Down Expand Up @@ -363,7 +373,7 @@ static __always_inline u32 bpf_compute_snaplen(struct filler_data *data,
int addrlen;

val = bpf_syscall_get_argument(data, 1);
if (bpf_probe_read(&mh, sizeof(mh), (void *)val)) {
if (bpf_probe_read_user(&mh, sizeof(mh), (void *)val)) {
usrsockaddr = NULL;
addrlen = 0;
} else {
Expand Down Expand Up @@ -464,9 +474,9 @@ static __always_inline u32 bpf_compute_snaplen(struct filler_data *data,
}

static __always_inline int unix_socket_path(char *dest, const char *user_ptr, size_t size) {
int res = bpf_probe_read_str(dest,
size,
user_ptr);
int res = bpf_probe_read_kernel_str(dest,
size,
user_ptr);
/*
* Extract from: https://man7.org/linux/man-pages/man7/unix.7.html
* an abstract socket address is distinguished (from a
Expand All @@ -477,9 +487,9 @@ static __always_inline int unix_socket_path(char *dest, const char *user_ptr, si
*/
if (res == 1) {
dest[0] = '@';
res = bpf_probe_read_str(dest + 1,
size - 1, // account for '@'
user_ptr + 1);
res = bpf_probe_read_kernel_str(dest + 1,
size - 1, // account for '@'
user_ptr + 1);
res++; // account for '@'
}
return res;
Expand Down Expand Up @@ -649,8 +659,8 @@ static __always_inline long bpf_fd_to_socktuple(struct filler_data *data,
* From kernel 3.13 we can take both ipv4 and ipv6 info from here
* https://elixir.bootlin.com/linux/v3.13/source/include/net/sock.h#L164
*/
bpf_probe_read(&sip, sizeof(sip), &sk->__sk_common.skc_daddr);
bpf_probe_read(&sport, sizeof(sport), &sk->__sk_common.skc_dport);
bpf_probe_read_kernel(&sip, sizeof(sip), &sk->__sk_common.skc_daddr);
bpf_probe_read_kernel(&sport, sizeof(sport), &sk->__sk_common.skc_dport);
sport = ntohs(sport);
dip = ((struct sockaddr_in *)sock_address)->sin_addr.s_addr;
dport = ntohs(((struct sockaddr_in *)sock_address)->sin_port);
Expand Down Expand Up @@ -706,9 +716,9 @@ static __always_inline long bpf_fd_to_socktuple(struct filler_data *data,
struct sockaddr_in6 *usrsockaddr_in6 = (struct sockaddr_in6 *)usrsockaddr;

if (is_inbound) {
bpf_probe_read(&in6, sizeof(in6), &sk->__sk_common.skc_v6_daddr);
bpf_probe_read_kernel(&in6, sizeof(in6), &sk->__sk_common.skc_v6_daddr);
sip6 = in6.in6_u.u6_addr8;
bpf_probe_read(&sport, sizeof(sport), &sk->__sk_common.skc_dport);
bpf_probe_read_kernel(&sport, sizeof(sport), &sk->__sk_common.skc_dport);
sport = ntohs(sport);
dip6 = ((struct sockaddr_in6 *)sock_address)->sin6_addr.s6_addr;
dport = ntohs(((struct sockaddr_in6 *)sock_address)->sin6_port);
Expand Down Expand Up @@ -798,12 +808,43 @@ static __always_inline long bpf_fd_to_socktuple(struct filler_data *data,
return size;
}

static __always_inline int __bpf_read_val_into(struct filler_data *data,
unsigned long curoff_bounded,
unsigned long val,
volatile u16 read_size,
enum read_memory mem)
{
int rc;
int read_size_bound;

#ifdef BPF_FORBIDS_ZERO_ACCESS
if (read_size == 0)
return -1;
Andreagit97 marked this conversation as resolved.
Show resolved Hide resolved

read_size_bound = ((read_size - 1) & SCRATCH_SIZE_HALF) + 1;
#else
read_size_bound = read_size & SCRATCH_SIZE_HALF;
#endif

if (mem == KERNEL)
rc = bpf_probe_read_kernel(&data->buf[curoff_bounded],
read_size_bound,
(void *)val);
else
rc = bpf_probe_read_user(&data->buf[curoff_bounded],
read_size_bound,
(void *)val);

return rc;
}

static __always_inline int __bpf_val_to_ring(struct filler_data *data,
unsigned long val,
unsigned long val_len,
enum ppm_param_type type,
u8 dyn_idx,
bool enforce_snaplen)
bool enforce_snaplen,
enum read_memory mem)
{
unsigned int len_dyn = 0;
unsigned int len = 0;
Expand Down Expand Up @@ -832,13 +873,18 @@ static __always_inline int __bpf_val_to_ring(struct filler_data *data,
case PT_CHARBUF:
case PT_FSPATH:
case PT_FSRELPATH: {
if (!data->curarg_already_on_frame)
if (!data->curarg_already_on_frame)
{
int res;
/* Return `res<0` only in case of error. */
res = bpf_probe_read_str(&data->buf[curoff_bounded],
PPM_MAX_ARG_SIZE,
(const void *)val);
int res = -1;

if (val)
/* Return `res<0` only in case of error. */
res = (mem == KERNEL) ? bpf_probe_read_kernel_str(&data->buf[curoff_bounded],
PPM_MAX_ARG_SIZE,
(const void *)val)
: bpf_probe_read_user_str(&data->buf[curoff_bounded],
PPM_MAX_ARG_SIZE,
(const void *)val);
if(res >= 0)
{
len = res;
Expand Down Expand Up @@ -877,24 +923,14 @@ static __always_inline int __bpf_val_to_ring(struct filler_data *data,
* we send an empty param `len=0`.
*/
volatile u16 read_size = dpi_lookahead_size;
int rc = 0;

#ifdef BPF_FORBIDS_ZERO_ACCESS
if(!read_size || bpf_probe_read(&data->buf[curoff_bounded],
((read_size - 1) & SCRATCH_SIZE_HALF) + 1,
(void *)val))
rc = __bpf_read_val_into(data, curoff_bounded, val, read_size, mem);
if (rc)
{
len=0;
break;
}
#else
if(bpf_probe_read(&data->buf[curoff_bounded],
read_size & SCRATCH_SIZE_HALF,
(void *)val))
{
len=0;
break;
}
#endif /* BPF_FORBIDS_ZERO_ACCESS */
}

/* If `curarg` was already on frame, we are interested only in this computation,
Expand All @@ -913,31 +949,20 @@ static __always_inline int __bpf_val_to_ring(struct filler_data *data,
if(!data->curarg_already_on_frame)
{
volatile u16 read_size = len;
int rc = 0;

curoff_bounded = data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF;
if (data->state->tail_ctx.curoff > SCRATCH_SIZE_HALF)
{
return PPM_FAILURE_FRAME_SCRATCH_MAP_FULL;
}

#ifdef BPF_FORBIDS_ZERO_ACCESS

if (!read_size || bpf_probe_read(&data->buf[curoff_bounded],
((read_size - 1) & SCRATCH_SIZE_HALF) + 1,
(void *)val))
rc = __bpf_read_val_into(data, curoff_bounded, val, read_size, mem);
if (rc)
{
len=0;
break;
}
#else
if (bpf_probe_read(&data->buf[curoff_bounded],
read_size & SCRATCH_SIZE_HALF,
(void *)val))
{
len=0;
break;
}
#endif /* BPF_FORBIDS_ZERO_ACCESS */
}
}
else
Expand Down Expand Up @@ -1055,6 +1080,47 @@ static __always_inline int bpf_push_empty_param(struct filler_data *data)
return PPM_SUCCESS;
}

static __always_inline enum read_memory param_type_to_mem(enum ppm_param_type type)
{
/* __bpf_val_to_ring() uses bpf_probe_read_* functions for particular types
* only. Instead of changing all places, let's keep it simple and try to
* spot the correct address space by type.
*/

switch (type)
{
case PT_CHARBUF:
case PT_FSPATH:
case PT_FSRELPATH:
case PT_BYTEBUF:
/* Those types typically read memory from user space pointers.
* If not, explicit use the respective helper with the _mem()
* suffix to specify the memory to read from.
*
* See also the usage below in the helpers.
*/
return USER;
default:
return KERNEL;
}
}

static __always_inline int bpf_val_to_ring_mem(struct filler_data *data,
unsigned long val,
enum read_memory mem)
{
const struct ppm_param_info *param_info;

if (data->state->tail_ctx.curarg >= PPM_MAX_EVENT_PARAMS) {
bpf_printk("invalid curarg: %d\n", data->state->tail_ctx.curarg);
return PPM_FAILURE_BUG;
}

param_info = &data->evt->params[data->state->tail_ctx.curarg & (PPM_MAX_EVENT_PARAMS - 1)];

return __bpf_val_to_ring(data, val, 0, param_info->type, -1, false, mem);
}

static __always_inline int bpf_val_to_ring(struct filler_data *data,
unsigned long val)
{
Expand All @@ -1067,7 +1133,8 @@ static __always_inline int bpf_val_to_ring(struct filler_data *data,

param_info = &data->evt->params[data->state->tail_ctx.curarg & (PPM_MAX_EVENT_PARAMS - 1)];

return __bpf_val_to_ring(data, val, 0, param_info->type, -1, false);
return __bpf_val_to_ring(data, val, 0, param_info->type, -1, false,
param_type_to_mem(param_info->type));
Comment on lines +1136 to +1137
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We pass param_info->type to __bpf_val_to_ring, so we should be able to call param_type_to_mem in there and keep the signature unchanged, right?

Copy link
Contributor Author

@hbrueckner hbrueckner Dec 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had a version with unchanged signature. The problem here is that param_type_to_mem makes the decision that for PT_CHARBUF and PT_BYTEBUF user space memory is being copied. Unfortunately, this does not work in all cases.

Here are some places where for char/byte buffers are copied from kernel space (either from kernel structures or already copied into the scratch area):

git grep -n __bpf_val_to_ring driver/bpf/ |grep  'BUF' |grep 'KERNEL'
driver/bpf/fillers.h:709:               return __bpf_val_to_ring(data, 0, size, PT_BYTEBUF, -1, true, KERNEL);
driver/bpf/fillers.h:2378:              res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false, KERNEL);
driver/bpf/fillers.h:2386:              res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false, KERNEL);
driver/bpf/fillers.h:2528:      res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false, KERNEL);
driver/bpf/fillers.h:2735:              res = __bpf_val_to_ring(data, 0, env_len, PT_BYTEBUF, -1, false, KERNEL);
driver/bpf/fillers.h:6208:              res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false, KERNEL);
driver/bpf/fillers.h:6216:              res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false, KERNEL);
driver/bpf/fillers.h:6357:      res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false, KERNEL);
driver/bpf/fillers.h:6410:      res = __bpf_val_to_ring(data, 0, env_len, PT_BYTEBUF, -1, false, KERNEL);
driver/bpf/fillers.h:6617:              res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false, KERNEL);
driver/bpf/fillers.h:6625:              res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false, KERNEL);
driver/bpf/fillers.h:6767:      res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false, KERNEL);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, so how about we

#define __bpf_val_to_ring(..., param_type, ...) __bpf_val_to_ring_with_explicit_mem(..., param_type, ..., param_type_to_mem(param_type))

(or do the equivalent static inline) and use __bpf_val_to_ring_with_explicit_mem in the few spots where the default guess is wrong?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @gnosek ,
let me think a bit about to have an explicit mem version for __bpf_val_to_ring.

# git grep -n __bpf_val_to_ring driver/bpf/  |grep 'KERNEL' |wc -l
17
# git grep -n __bpf_val_to_ring driver/bpf/  |grep 'USER' |wc -l
7

This would at least reduce those 7 occurrences where USER has to be specified directly.

}

static __always_inline int bpf_val_to_ring_len(struct filler_data *data,
Expand All @@ -1083,22 +1150,31 @@ static __always_inline int bpf_val_to_ring_len(struct filler_data *data,

param_info = &data->evt->params[data->state->tail_ctx.curarg & (PPM_MAX_EVENT_PARAMS - 1)];

return __bpf_val_to_ring(data, val, val_len, param_info->type, -1, false);
return __bpf_val_to_ring(data, val, val_len, param_info->type, -1, false,
param_type_to_mem(param_info->type));
}

static __always_inline int bpf_val_to_ring_dyn(struct filler_data *data,
unsigned long val,
enum ppm_param_type type,
u8 dyn_idx)
{
return __bpf_val_to_ring(data, val, 0, type, dyn_idx, false);
return __bpf_val_to_ring(data, val, 0, type, dyn_idx, false, param_type_to_mem(type));
}

static __always_inline int bpf_val_to_ring_type_mem(struct filler_data *data,
unsigned long val,
enum ppm_param_type type,
enum read_memory mem)
{
return __bpf_val_to_ring(data, val, 0, type, -1, false, mem);
}

static __always_inline int bpf_val_to_ring_type(struct filler_data *data,
unsigned long val,
enum ppm_param_type type)
{
return __bpf_val_to_ring(data, val, 0, type, -1, false);
return __bpf_val_to_ring(data, val, 0, type, -1, false, param_type_to_mem(type));
}

static __always_inline bool bpf_in_ia32_syscall()
Expand Down
Loading