diff --git a/driver/bpf/bpf_helpers.h b/driver/bpf/bpf_helpers.h index abfa41bf65..073f1575de 100644 --- a/driver/bpf/bpf_helpers.h +++ b/driver/bpf/bpf_helpers.h @@ -71,6 +71,27 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = (void *)BPF_FUNC_xdp_adjust_head; static int (*bpf_probe_read_str)(void *dst, u64 size, const void *unsafe_ptr) = (void *)BPF_FUNC_probe_read_str; + +#if defined(USE_BPF_PROBE_KERNEL_USER_VARIANTS) +static int (*bpf_probe_read_user)(void *dst, u32 size, const void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read_user; +static int (*bpf_probe_read_kernel)(void *dst, u32 size, const void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read_kernel; +static int (*bpf_probe_read_user_str)(void *dst, u32 size, const void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read_user_str; +static int (*bpf_probe_read_kernel_str)(void *dst, u32 size, const void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read_kernel_str; +#else +static int (*bpf_probe_read_user)(void *dst, u32 size, const void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read; +static int (*bpf_probe_read_kernel)(void *dst, u32 size, const void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read; +static int (*bpf_probe_read_user_str)(void *dst, u32 size, const void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read_str; +static int (*bpf_probe_read_kernel_str)(void *dst, u32 size, const void *unsafe_ptr) = + (void *)BPF_FUNC_probe_read_str; +#endif + static u64 (*bpf_get_current_task)(void) = (void *)BPF_FUNC_get_current_task; static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = diff --git a/driver/bpf/filler_helpers.h b/driver/bpf/filler_helpers.h index 56aa4f5c64..c65f4e699b 100644 --- a/driver/bpf/filler_helpers.h +++ b/driver/bpf/filler_helpers.h @@ -25,6 +25,15 @@ or GPL2.txt for full copies of the license. #define MAX_PATH_COMPONENTS 16 #define MAX_PATH_LENGTH 4096 +/* This enum is used to tell our helpers if they have to + * read from kernel or user memory. + */ +enum read_memory +{ + USER = 0, + KERNEL = 1, +}; + static __always_inline bool in_port_range(uint16_t port, uint16_t min, uint16_t max) { return port >= min && port <= max; @@ -96,7 +105,7 @@ static __always_inline char *bpf_get_path(struct filler_data *data, int fd) for(i = 1; i < MAX_PATH_COMPONENTS && i <= nreads && res >= 0; i++) { path_level = (nreads-i) & (MAX_PATH_COMPONENTS-1); - res = bpf_probe_read_str(&filepath[curoff_bounded], MAX_PATH_LENGTH, + res = bpf_probe_read_kernel_str(&filepath[curoff_bounded], MAX_PATH_LENGTH, (const void*)pointers_buf[path_level]); curoff_bounded = (curoff_bounded+res-1) & SCRATCH_SIZE_HALF; if(i>1 && i0) @@ -255,9 +264,9 @@ static __always_inline bool bpf_getsockname(struct socket *sock, #ifdef BPF_FORBIDS_ZERO_ACCESS if (len > 0) - bpf_probe_read(sunaddr, ((len - 1) & 0xff) + 1, addr->name); + bpf_probe_read_kernel(sunaddr, ((len - 1) & 0xff) + 1, addr->name); #else - bpf_probe_read(sunaddr, len, addr->name); + bpf_probe_read_kernel(sunaddr, len, addr->name); #endif } @@ -273,16 +282,17 @@ static __always_inline bool bpf_getsockname(struct socket *sock, static __always_inline int bpf_addr_to_kernel(void *uaddr, int ulen, struct sockaddr *kaddr) { - int len = _READ(ulen); + int len = ulen & 0xfff; /* required by BPF verifier */ + if (len < 0 || len > sizeof(struct sockaddr_storage)) return -EINVAL; if (len == 0) return 0; #ifdef BPF_FORBIDS_ZERO_ACCESS - if (bpf_probe_read(kaddr, ((len - 1) & 0xff) + 1, uaddr)) + if (bpf_probe_read_user(kaddr, ((len - 1) & 0xff) + 1, uaddr)) #else - if (bpf_probe_read(kaddr, len & 0xff, uaddr)) + if (bpf_probe_read_user(kaddr, len & 0xff, uaddr)) #endif return -EFAULT; @@ -363,7 +373,7 @@ static __always_inline u32 bpf_compute_snaplen(struct filler_data *data, int addrlen; val = bpf_syscall_get_argument(data, 1); - if (bpf_probe_read(&mh, sizeof(mh), (void *)val)) { + if (bpf_probe_read_user(&mh, sizeof(mh), (void *)val)) { usrsockaddr = NULL; addrlen = 0; } else { @@ -464,9 +474,9 @@ static __always_inline u32 bpf_compute_snaplen(struct filler_data *data, } static __always_inline int unix_socket_path(char *dest, const char *user_ptr, size_t size) { - int res = bpf_probe_read_str(dest, - size, - user_ptr); + int res = bpf_probe_read_kernel_str(dest, + size, + user_ptr); /* * Extract from: https://man7.org/linux/man-pages/man7/unix.7.html * an abstract socket address is distinguished (from a @@ -477,9 +487,9 @@ static __always_inline int unix_socket_path(char *dest, const char *user_ptr, si */ if (res == 1) { dest[0] = '@'; - res = bpf_probe_read_str(dest + 1, - size - 1, // account for '@' - user_ptr + 1); + res = bpf_probe_read_kernel_str(dest + 1, + size - 1, // account for '@' + user_ptr + 1); res++; // account for '@' } return res; @@ -649,8 +659,8 @@ static __always_inline long bpf_fd_to_socktuple(struct filler_data *data, * From kernel 3.13 we can take both ipv4 and ipv6 info from here * https://elixir.bootlin.com/linux/v3.13/source/include/net/sock.h#L164 */ - bpf_probe_read(&sip, sizeof(sip), &sk->__sk_common.skc_daddr); - bpf_probe_read(&sport, sizeof(sport), &sk->__sk_common.skc_dport); + bpf_probe_read_kernel(&sip, sizeof(sip), &sk->__sk_common.skc_daddr); + bpf_probe_read_kernel(&sport, sizeof(sport), &sk->__sk_common.skc_dport); sport = ntohs(sport); dip = ((struct sockaddr_in *)sock_address)->sin_addr.s_addr; dport = ntohs(((struct sockaddr_in *)sock_address)->sin_port); @@ -706,9 +716,9 @@ static __always_inline long bpf_fd_to_socktuple(struct filler_data *data, struct sockaddr_in6 *usrsockaddr_in6 = (struct sockaddr_in6 *)usrsockaddr; if (is_inbound) { - bpf_probe_read(&in6, sizeof(in6), &sk->__sk_common.skc_v6_daddr); + bpf_probe_read_kernel(&in6, sizeof(in6), &sk->__sk_common.skc_v6_daddr); sip6 = in6.in6_u.u6_addr8; - bpf_probe_read(&sport, sizeof(sport), &sk->__sk_common.skc_dport); + bpf_probe_read_kernel(&sport, sizeof(sport), &sk->__sk_common.skc_dport); sport = ntohs(sport); dip6 = ((struct sockaddr_in6 *)sock_address)->sin6_addr.s6_addr; dport = ntohs(((struct sockaddr_in6 *)sock_address)->sin6_port); @@ -798,12 +808,43 @@ static __always_inline long bpf_fd_to_socktuple(struct filler_data *data, return size; } +static __always_inline int __bpf_read_val_into(struct filler_data *data, + unsigned long curoff_bounded, + unsigned long val, + volatile u16 read_size, + enum read_memory mem) +{ + int rc; + int read_size_bound; + +#ifdef BPF_FORBIDS_ZERO_ACCESS + if (read_size == 0) + return -1; + + read_size_bound = ((read_size - 1) & SCRATCH_SIZE_HALF) + 1; +#else + read_size_bound = read_size & SCRATCH_SIZE_HALF; +#endif + + if (mem == KERNEL) + rc = bpf_probe_read_kernel(&data->buf[curoff_bounded], + read_size_bound, + (void *)val); + else + rc = bpf_probe_read_user(&data->buf[curoff_bounded], + read_size_bound, + (void *)val); + + return rc; +} + static __always_inline int __bpf_val_to_ring(struct filler_data *data, unsigned long val, unsigned long val_len, enum ppm_param_type type, u8 dyn_idx, - bool enforce_snaplen) + bool enforce_snaplen, + enum read_memory mem) { unsigned int len_dyn = 0; unsigned int len = 0; @@ -832,13 +873,18 @@ static __always_inline int __bpf_val_to_ring(struct filler_data *data, case PT_CHARBUF: case PT_FSPATH: case PT_FSRELPATH: { - if (!data->curarg_already_on_frame) + if (!data->curarg_already_on_frame) { - int res; - /* Return `res<0` only in case of error. */ - res = bpf_probe_read_str(&data->buf[curoff_bounded], - PPM_MAX_ARG_SIZE, - (const void *)val); + int res = -1; + + if (val) + /* Return `res<0` only in case of error. */ + res = (mem == KERNEL) ? bpf_probe_read_kernel_str(&data->buf[curoff_bounded], + PPM_MAX_ARG_SIZE, + (const void *)val) + : bpf_probe_read_user_str(&data->buf[curoff_bounded], + PPM_MAX_ARG_SIZE, + (const void *)val); if(res >= 0) { len = res; @@ -877,24 +923,14 @@ static __always_inline int __bpf_val_to_ring(struct filler_data *data, * we send an empty param `len=0`. */ volatile u16 read_size = dpi_lookahead_size; + int rc = 0; -#ifdef BPF_FORBIDS_ZERO_ACCESS - if(!read_size || bpf_probe_read(&data->buf[curoff_bounded], - ((read_size - 1) & SCRATCH_SIZE_HALF) + 1, - (void *)val)) + rc = __bpf_read_val_into(data, curoff_bounded, val, read_size, mem); + if (rc) { len=0; break; } -#else - if(bpf_probe_read(&data->buf[curoff_bounded], - read_size & SCRATCH_SIZE_HALF, - (void *)val)) - { - len=0; - break; - } -#endif /* BPF_FORBIDS_ZERO_ACCESS */ } /* If `curarg` was already on frame, we are interested only in this computation, @@ -913,6 +949,7 @@ static __always_inline int __bpf_val_to_ring(struct filler_data *data, if(!data->curarg_already_on_frame) { volatile u16 read_size = len; + int rc = 0; curoff_bounded = data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF; if (data->state->tail_ctx.curoff > SCRATCH_SIZE_HALF) @@ -920,24 +957,12 @@ static __always_inline int __bpf_val_to_ring(struct filler_data *data, return PPM_FAILURE_FRAME_SCRATCH_MAP_FULL; } -#ifdef BPF_FORBIDS_ZERO_ACCESS - - if (!read_size || bpf_probe_read(&data->buf[curoff_bounded], - ((read_size - 1) & SCRATCH_SIZE_HALF) + 1, - (void *)val)) + rc = __bpf_read_val_into(data, curoff_bounded, val, read_size, mem); + if (rc) { len=0; break; } -#else - if (bpf_probe_read(&data->buf[curoff_bounded], - read_size & SCRATCH_SIZE_HALF, - (void *)val)) - { - len=0; - break; - } -#endif /* BPF_FORBIDS_ZERO_ACCESS */ } } else @@ -1055,6 +1080,47 @@ static __always_inline int bpf_push_empty_param(struct filler_data *data) return PPM_SUCCESS; } +static __always_inline enum read_memory param_type_to_mem(enum ppm_param_type type) +{ + /* __bpf_val_to_ring() uses bpf_probe_read_* functions for particular types + * only. Instead of changing all places, let's keep it simple and try to + * spot the correct address space by type. + */ + + switch (type) + { + case PT_CHARBUF: + case PT_FSPATH: + case PT_FSRELPATH: + case PT_BYTEBUF: + /* Those types typically read memory from user space pointers. + * If not, explicit use the respective helper with the _mem() + * suffix to specify the memory to read from. + * + * See also the usage below in the helpers. + */ + return USER; + default: + return KERNEL; + } +} + +static __always_inline int bpf_val_to_ring_mem(struct filler_data *data, + unsigned long val, + enum read_memory mem) +{ + const struct ppm_param_info *param_info; + + if (data->state->tail_ctx.curarg >= PPM_MAX_EVENT_PARAMS) { + bpf_printk("invalid curarg: %d\n", data->state->tail_ctx.curarg); + return PPM_FAILURE_BUG; + } + + param_info = &data->evt->params[data->state->tail_ctx.curarg & (PPM_MAX_EVENT_PARAMS - 1)]; + + return __bpf_val_to_ring(data, val, 0, param_info->type, -1, false, mem); +} + static __always_inline int bpf_val_to_ring(struct filler_data *data, unsigned long val) { @@ -1067,7 +1133,8 @@ static __always_inline int bpf_val_to_ring(struct filler_data *data, param_info = &data->evt->params[data->state->tail_ctx.curarg & (PPM_MAX_EVENT_PARAMS - 1)]; - return __bpf_val_to_ring(data, val, 0, param_info->type, -1, false); + return __bpf_val_to_ring(data, val, 0, param_info->type, -1, false, + param_type_to_mem(param_info->type)); } static __always_inline int bpf_val_to_ring_len(struct filler_data *data, @@ -1083,7 +1150,8 @@ static __always_inline int bpf_val_to_ring_len(struct filler_data *data, param_info = &data->evt->params[data->state->tail_ctx.curarg & (PPM_MAX_EVENT_PARAMS - 1)]; - return __bpf_val_to_ring(data, val, val_len, param_info->type, -1, false); + return __bpf_val_to_ring(data, val, val_len, param_info->type, -1, false, + param_type_to_mem(param_info->type)); } static __always_inline int bpf_val_to_ring_dyn(struct filler_data *data, @@ -1091,14 +1159,22 @@ static __always_inline int bpf_val_to_ring_dyn(struct filler_data *data, enum ppm_param_type type, u8 dyn_idx) { - return __bpf_val_to_ring(data, val, 0, type, dyn_idx, false); + return __bpf_val_to_ring(data, val, 0, type, dyn_idx, false, param_type_to_mem(type)); +} + +static __always_inline int bpf_val_to_ring_type_mem(struct filler_data *data, + unsigned long val, + enum ppm_param_type type, + enum read_memory mem) +{ + return __bpf_val_to_ring(data, val, 0, type, -1, false, mem); } static __always_inline int bpf_val_to_ring_type(struct filler_data *data, unsigned long val, enum ppm_param_type type) { - return __bpf_val_to_ring(data, val, 0, type, -1, false); + return __bpf_val_to_ring(data, val, 0, type, -1, false, param_type_to_mem(type)); } static __always_inline bool bpf_in_ia32_syscall() diff --git a/driver/bpf/fillers.h b/driver/bpf/fillers.h index f208870b23..bb517c804c 100644 --- a/driver/bpf/fillers.h +++ b/driver/bpf/fillers.h @@ -12,6 +12,7 @@ or GPL2.txt for full copies of the license. #include "../systype_compat.h" #include "../ppm_flag_helpers.h" #include "../ppm_version.h" +#include "bpf_helpers.h" #include #include @@ -453,7 +454,7 @@ FILLER(sys_read_x, true) * data */ data->fd = bpf_syscall_get_argument(data, 0); - res = __bpf_val_to_ring(data, val, bufsize, PT_BYTEBUF, -1, true); + res = __bpf_val_to_ring(data, val, bufsize, PT_BYTEBUF, -1, true, USER); return res; } @@ -481,7 +482,7 @@ FILLER(sys_write_x, true) val = bpf_syscall_get_argument(data, 1); bufsize = bpf_syscall_get_argument(data, 2); - res = __bpf_val_to_ring(data, val, bufsize, PT_BYTEBUF, -1, true); + res = __bpf_val_to_ring(data, val, bufsize, PT_BYTEBUF, -1, true, USER); return res; } @@ -511,11 +512,11 @@ static __always_inline int bpf_poll_parse_fds(struct filler_data *data, val = bpf_syscall_get_argument(data, 0); #ifdef BPF_FORBIDS_ZERO_ACCESS if (read_size) - if (bpf_probe_read(fds, - ((read_size - 1) & SCRATCH_SIZE_MAX) + 1, - (void *)val)) + if (bpf_probe_read_user(fds, + ((read_size - 1) & SCRATCH_SIZE_MAX) + 1, + (void *)val)) #else - if (bpf_probe_read(fds, read_size & SCRATCH_SIZE_MAX, (void *)val)) + if (bpf_probe_read_user(fds, read_size & SCRATCH_SIZE_MAX, (void *)val)) #endif return PPM_FAILURE_INVALID_USER_MEMORY; @@ -558,7 +559,7 @@ static __always_inline int bpf_poll_parse_fds(struct filler_data *data, *((u16 *)&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF]) = fds_count; data->curarg_already_on_frame = true; - return __bpf_val_to_ring(data, 0, off - data->state->tail_ctx.curoff, PT_FDLIST, -1, false); + return __bpf_val_to_ring(data, 0, off - data->state->tail_ctx.curoff, PT_FDLIST, -1, false, KERNEL); } FILLER(sys_poll_e, true) @@ -627,13 +628,13 @@ static __always_inline int bpf_parse_readv_writev_bufs(struct filler_data *data, #ifdef BPF_FORBIDS_ZERO_ACCESS if (copylen) - if (bpf_probe_read((void *)iov, - ((copylen - 1) & SCRATCH_SIZE_MAX) + 1, - (void *)iovsrc)) + if (bpf_probe_read_user((void *)iov, + ((copylen - 1) & SCRATCH_SIZE_MAX) + 1, + (void *)iovsrc)) #else - if (bpf_probe_read((void *)iov, - copylen & SCRATCH_SIZE_MAX, - (void *)iovsrc)) + if (bpf_probe_read_user((void *)iov, + copylen & SCRATCH_SIZE_MAX, + (void *)iovsrc)) #endif return PPM_FAILURE_INVALID_USER_MEMORY; @@ -686,13 +687,13 @@ static __always_inline int bpf_parse_readv_writev_bufs(struct filler_data *data, #ifdef BPF_FORBIDS_ZERO_ACCESS if (to_read) - if (bpf_probe_read(&data->buf[off_bounded], - ((to_read - 1) & SCRATCH_SIZE_HALF) + 1, - iov[j].iov_base)) + if (bpf_probe_read_user(&data->buf[off_bounded], + ((to_read - 1) & SCRATCH_SIZE_HALF) + 1, + iov[j].iov_base)) #else - if (bpf_probe_read(&data->buf[off_bounded], - to_read & SCRATCH_SIZE_HALF, - iov[j].iov_base)) + if (bpf_probe_read_user(&data->buf[off_bounded], + to_read & SCRATCH_SIZE_HALF, + iov[j].iov_base)) #endif return PPM_FAILURE_INVALID_USER_MEMORY; @@ -705,7 +706,7 @@ static __always_inline int bpf_parse_readv_writev_bufs(struct filler_data *data, data->fd = bpf_syscall_get_argument(data, 0); data->curarg_already_on_frame = true; - return __bpf_val_to_ring(data, 0, size, PT_BYTEBUF, -1, true); + return __bpf_val_to_ring(data, 0, size, PT_BYTEBUF, -1, true, KERNEL); } return res; @@ -798,7 +799,7 @@ static __always_inline int timespec_parse(struct filler_data *data, u64 longtime; struct timespec ts; - if (bpf_probe_read(&ts, sizeof(ts), (void *)val)) + if (bpf_probe_read_user(&ts, sizeof(ts), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; longtime = ((u64)ts.tv_sec) * 1000000000 + ts.tv_nsec; @@ -852,7 +853,7 @@ static __always_inline unsigned long bpf_get_mm_counter(struct mm_struct *mm, { long val; - bpf_probe_read(&val, sizeof(val), &mm->rss_stat.count[member]); + bpf_probe_read_kernel(&val, sizeof(val), &mm->rss_stat.count[member]); if (val < 0) val = 0; @@ -883,7 +884,7 @@ FILLER(sys_brk_munmap_mmap_x, true) task = (struct task_struct *)bpf_get_current_task(); mm = NULL; - bpf_probe_read(&mm, sizeof(mm), &task->mm); + bpf_probe_read_kernel(&mm, sizeof(mm), &task->mm); retval = bpf_syscall_get_retval(data->ctx); res = bpf_val_to_ring_type(data, retval, PT_UINT64); @@ -1083,7 +1084,7 @@ FILLER(sys_getrlimit_setrlrimit_x, true) struct rlimit rl; val = bpf_syscall_get_argument(data, 1); - if (bpf_probe_read(&rl, sizeof(rl), (void *)val)) + if (bpf_probe_read_user(&rl, sizeof(rl), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; cur = rl.rlim_cur; @@ -1229,7 +1230,7 @@ FILLER(sys_socketpair_x, true) if (retval >= 0) { val = bpf_syscall_get_argument(data, 3); - if (bpf_probe_read(fds, 2 * sizeof(int), (void *)val)) + if (bpf_probe_read_user(fds, 2 * sizeof(int), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; struct socket *sock = bpf_sockfd_lookup(data, fds[0]); @@ -1269,7 +1270,7 @@ static int __always_inline parse_sockopt(struct filler_data *data, int level, in */ if(level != SOL_SOCKET) { - return __bpf_val_to_ring(data, (unsigned long)optval, optlen, PT_BYTEBUF, PPM_SOCKOPT_IDX_UNKNOWN, false); + return __bpf_val_to_ring(data, (unsigned long)optval, optlen, PT_BYTEBUF, PPM_SOCKOPT_IDX_UNKNOWN, false, USER); } switch (optname) { @@ -1278,7 +1279,7 @@ static int __always_inline parse_sockopt(struct filler_data *data, int level, in /* If there is an error while reading `bpf_probe_read` performs * a `memset` so no need to check return value. */ - bpf_probe_read(&val32, sizeof(val32), optval); + bpf_probe_read_user(&val32, sizeof(val32), optval); return bpf_val_to_ring_dyn(data, (s64)-val32, PT_ERRNO, PPM_SOCKOPT_IDX_ERRNO); #endif @@ -1300,12 +1301,12 @@ static int __always_inline parse_sockopt(struct filler_data *data, int level, in #if (defined(SO_SNDTIMEO_NEW) && !defined(SO_SNDTIMEO)) || (defined(SO_SNDTIMEO_NEW) && (SO_SNDTIMEO_NEW != SO_SNDTIMEO)) case SO_SNDTIMEO_NEW: #endif - bpf_probe_read(&tv, sizeof(tv), optval); + bpf_probe_read_user(&tv, sizeof(tv), optval); return bpf_val_to_ring_dyn(data, tv.tv_sec * SECOND_IN_NS + tv.tv_usec * USECOND_IN_NS, PT_RELTIME, PPM_SOCKOPT_IDX_TIMEVAL); #ifdef SO_COOKIE case SO_COOKIE: - bpf_probe_read(&val64, sizeof(val64), optval); + bpf_probe_read_user(&val64, sizeof(val64), optval); return bpf_val_to_ring_dyn(data, val64, PT_UINT64, PPM_SOCKOPT_IDX_UINT64); #endif @@ -1435,11 +1436,11 @@ static int __always_inline parse_sockopt(struct filler_data *data, int level, in #ifdef SO_INCOMING_CPU case SO_INCOMING_CPU: #endif - bpf_probe_read(&val32, sizeof(val32), optval); + bpf_probe_read_user(&val32, sizeof(val32), optval); return bpf_val_to_ring_dyn(data, val32, PT_UINT32, PPM_SOCKOPT_IDX_UINT32); default: - return __bpf_val_to_ring(data, (unsigned long)optval, optlen, PT_BYTEBUF, PPM_SOCKOPT_IDX_UNKNOWN, false); + return __bpf_val_to_ring(data, (unsigned long)optval, optlen, PT_BYTEBUF, PPM_SOCKOPT_IDX_UNKNOWN, false, USER); } } @@ -1506,7 +1507,7 @@ FILLER(sys_getsockopt_x, true) int optlen = 0; unsigned long optlen_pointer = bpf_syscall_get_argument(data, 4); /* if the read fails it internally calls memeset(0) so we are ok */ - bpf_probe_read(&optlen, sizeof(optlen), (void*)optlen_pointer); + bpf_probe_read_user(&optlen, sizeof(optlen), (void*)optlen_pointer); res = parse_sockopt(data, level, optname, (void*)optval, optlen); CHECK_RES(res); @@ -1642,7 +1643,7 @@ FILLER(sys_send_x, true) } data->fd = bpf_syscall_get_argument(data, 0); - res = __bpf_val_to_ring(data, val, bufsize, PT_BYTEBUF, -1, true); + res = __bpf_val_to_ring(data, val, bufsize, PT_BYTEBUF, -1, true, USER); return res; } @@ -1656,7 +1657,7 @@ FILLER(sys_execve_e, true) * filename */ val = bpf_syscall_get_argument(data, 0); - res = bpf_val_to_ring(data, val); + res = bpf_val_to_ring_mem(data, val, USER); return res; } @@ -1687,7 +1688,7 @@ FILLER(sys_execveat_e, true) */ val = bpf_syscall_get_argument(data, 1); - res = bpf_val_to_ring(data, val); + res = bpf_val_to_ring_mem(data, val, USER); if (res != PPM_SUCCESS) { return res; @@ -1858,9 +1859,9 @@ static __always_inline int __bpf_append_cgroup(struct css_set *cgroups, return PPM_FAILURE_FRAME_SCRATCH_MAP_FULL; } - int res = bpf_probe_read_str(&buf[off_bounded], - SCRATCH_SIZE_HALF, - subsys_name); + int res = bpf_probe_read_kernel_str(&buf[off_bounded], + SCRATCH_SIZE_HALF, + subsys_name); if (res == -EFAULT) return PPM_FAILURE_INVALID_USER_MEMORY; @@ -1907,9 +1908,9 @@ static __always_inline int __bpf_append_cgroup(struct css_set *cgroups, return PPM_FAILURE_FRAME_SCRATCH_MAP_FULL; } - res = bpf_probe_read_str(&buf[off_bounded], - SCRATCH_SIZE_HALF, - cgroup_path[k]); + res = bpf_probe_read_kernel_str(&buf[off_bounded], + SCRATCH_SIZE_HALF, + cgroup_path[k]); if (res > 1) { off += res - 1; @@ -1991,7 +1992,7 @@ static __always_inline int bpf_accumulate_argv_or_env(struct filler_data *data, #pragma unroll for (j = 0; j < FAILED_ARGS_ENV_ITEMS_MAX; ++j) { - arg = _READ(argv[j]); + arg = _READ_USER(argv[j]); if (!arg) break; @@ -2000,7 +2001,7 @@ static __always_inline int bpf_accumulate_argv_or_env(struct filler_data *data, return PPM_FAILURE_FRAME_SCRATCH_MAP_FULL; } - len = bpf_probe_read_str(&data->buf[off & SCRATCH_SIZE_HALF], SCRATCH_SIZE_HALF, arg); + len = bpf_probe_read_user_str(&data->buf[off & SCRATCH_SIZE_HALF], SCRATCH_SIZE_HALF, arg); if (len == -EFAULT) return PPM_FAILURE_INVALID_USER_MEMORY; @@ -2242,7 +2243,7 @@ static __always_inline bool get_exe_upper_layer(struct inode *inode) // Pointer arithmetics due to unexported ovl_inode struct // warning: this works if and only if the dentry pointer is placed right after the inode struct - bpf_probe_read(&upper_dentry, sizeof(upper_dentry), vfs_inode + sizeof(struct inode)); + bpf_probe_read_kernel(&upper_dentry, sizeof(upper_dentry), vfs_inode + sizeof(struct inode)); if(upper_dentry) { @@ -2306,11 +2307,11 @@ FILLER(proc_startupdate, true) args_len = ARGS_ENV_SIZE_MAX; #ifdef BPF_FORBIDS_ZERO_ACCESS - if (bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + if (bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], ((args_len - 1) & SCRATCH_SIZE_HALF) + 1, (void *)arg_start)) #else - if (bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + if (bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], args_len & SCRATCH_SIZE_HALF, (void *)arg_start)) #endif @@ -2349,9 +2350,9 @@ FILLER(proc_startupdate, true) if (args_len) { int exe_len; - exe_len = bpf_probe_read_str(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], - SCRATCH_SIZE_HALF, - &data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF]); + exe_len = bpf_probe_read_kernel_str(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + SCRATCH_SIZE_HALF, + &data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF]); if (exe_len == -EFAULT) return PPM_FAILURE_INVALID_USER_MEMORY; @@ -2360,7 +2361,7 @@ FILLER(proc_startupdate, true) * exe */ data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false); + res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false, KERNEL); if (res != PPM_SUCCESS) return res; @@ -2368,7 +2369,7 @@ FILLER(proc_startupdate, true) * Args */ data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false); + res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false, KERNEL); if (res != PPM_SUCCESS) return res; } else { @@ -2486,7 +2487,7 @@ FILLER(proc_startupdate, true) /* * comm */ - res = bpf_val_to_ring_type(data, (unsigned long)task->comm, PT_CHARBUF); + res = bpf_val_to_ring_type_mem(data, (unsigned long)task->comm, PT_CHARBUF, KERNEL); if (res != PPM_SUCCESS) return res; @@ -2510,7 +2511,7 @@ FILLER(proc_startupdate_2, true) if (res != PPM_SUCCESS) return res; - res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false); + res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false, KERNEL); if (res != PPM_SUCCESS) return res; @@ -2560,13 +2561,17 @@ FILLER(proc_startupdate_3, true) switch (data->state->tail_ctx.evt_type) { case PPME_SYSCALL_CLONE_20_X: +#ifdef CONFIG_S390 + flags = bpf_syscall_get_argument(data, 1); +#else flags = bpf_syscall_get_argument(data, 0); +#endif break; case PPME_SYSCALL_CLONE3_X: #ifdef __NR_clone3 flags = bpf_syscall_get_argument(data, 0); - if (bpf_probe_read(&cl_args, sizeof(struct clone_args), (void *)flags)) + if (bpf_probe_read_user(&cl_args, sizeof(struct clone_args), (void *)flags)) { return PPM_FAILURE_INVALID_USER_MEMORY; } @@ -2678,13 +2683,13 @@ FILLER(proc_startupdate_3, true) env_len = ARGS_ENV_SIZE_MAX; #ifdef BPF_FORBIDS_ZERO_ACCESS - if (bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], - ((env_len - 1) & SCRATCH_SIZE_HALF) + 1, - (void *)env_start)) + if (bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + ((env_len - 1) & SCRATCH_SIZE_HALF) + 1, + (void *)env_start)) #else - if (bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], - env_len & SCRATCH_SIZE_HALF, - (void *)env_start)) + if (bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + env_len & SCRATCH_SIZE_HALF, + (void *)env_start)) #endif env_len = 0; else @@ -2717,7 +2722,7 @@ FILLER(proc_startupdate_3, true) } data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, env_len, PT_BYTEBUF, -1, false); + res = __bpf_val_to_ring(data, 0, env_len, PT_BYTEBUF, -1, false, KERNEL); if (res != PPM_SUCCESS) return res; @@ -2865,25 +2870,33 @@ FILLER(sys_accept_x, true) int res = bpf_val_to_ring_type(data, (s64)fd, PT_FD); CHECK_RES(res); - /* Parameter 2: tuple (type: PT_SOCKTUPLE) */ - long size = bpf_fd_to_socktuple(data, fd, NULL, 0, false, true, data->tmp_scratch); - data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, size, PT_SOCKTUPLE, -1, false); - CHECK_RES(res); - u32 queuelen = 0; u32 queuemax = 0; u8 queuepct = 0; - /* Get the listening socket (first syscall parameter) */ - s32 listening_fd = (s32)bpf_syscall_get_argument(data, 0); - struct socket * sock = bpf_sockfd_lookup(data, listening_fd); - struct sock *sk = _READ(sock->sk); - queuelen = _READ(sk->sk_ack_backlog); - queuemax = _READ(sk->sk_max_ack_backlog); - if(queuelen && queuemax) + if (fd >= 0) { - queuepct = (u8)((u64)queuelen * 100 / queuemax); + /* Parameter 2: tuple (type: PT_SOCKTUPLE) */ + long size = bpf_fd_to_socktuple(data, fd, NULL, 0, false, true, data->tmp_scratch); + data->curarg_already_on_frame = true; + res = __bpf_val_to_ring(data, 0, size, PT_SOCKTUPLE, -1, false, KERNEL); + CHECK_RES(res); + + /* Get the listening socket (first syscall parameter) */ + s32 listening_fd = (s32)bpf_syscall_get_argument(data, 0); + struct socket * sock = bpf_sockfd_lookup(data, listening_fd); + struct sock *sk = _READ(sock->sk); + queuelen = _READ(sk->sk_ack_backlog); + queuemax = _READ(sk->sk_max_ack_backlog); + if(queuelen && queuemax) + { + queuepct = (u8)((u64)queuelen * 100 / queuemax); + } + } + else + { + res = bpf_push_empty_param(data); + CHECK_RES(res); } /* Parameter 3: queuepct (type: PT_UINT8) */ @@ -3149,7 +3162,7 @@ FILLER(sys_openat2_e, true) * how: we get the data structure, and put its fields in the buffer one by one */ val = bpf_syscall_get_argument(data, 2); - if (bpf_probe_read(&how, sizeof(struct open_how), (void *)val)) { + if (bpf_probe_read_user(&how, sizeof(struct open_how), (void *)val)) { return PPM_FAILURE_INVALID_USER_MEMORY; } flags = open_flags_to_scap(how.flags); @@ -3227,7 +3240,7 @@ FILLER(sys_openat2_x, true) * how: we get the data structure, and put its fields in the buffer one by one */ val = bpf_syscall_get_argument(data, 2); - if (bpf_probe_read(&how, sizeof(struct open_how), (void *)val)) { + if (bpf_probe_read_user(&how, sizeof(struct open_how), (void *)val)) { return PPM_FAILURE_INVALID_USER_MEMORY; } flags = open_flags_to_scap(how.flags); @@ -3290,7 +3303,7 @@ FILLER(sys_open_by_handle_at_x, true) { filepath = bpf_get_path(data, retval); } - return bpf_val_to_ring(data,(unsigned long)filepath); + return bpf_val_to_ring_mem(data,(unsigned long)filepath, KERNEL); } FILLER(sys_io_uring_setup_x, true) @@ -3312,7 +3325,7 @@ FILLER(sys_io_uring_setup_x, true) /* if the call fails we don't care since `bpf_probe_read` under the hood memsets * the destination memory to `0` */ - bpf_probe_read(¶ms, sizeof(struct io_uring_params), (void *)params_pointer); + bpf_probe_read_user(¶ms, sizeof(struct io_uring_params), (void *)params_pointer); sq_entries = params.sq_entries; cq_entries = params.cq_entries; @@ -3565,7 +3578,7 @@ FILLER(sys_fsconfig_x, true) /* Parameter 4: key (type: PT_CHARBUF) */ unsigned long key_pointer = bpf_syscall_get_argument(data, 2); - res = bpf_val_to_ring(data, key_pointer); + res = bpf_val_to_ring_mem(data, key_pointer, USER); CHECK_RES(res); int aux = bpf_syscall_get_argument(data, 4); @@ -3593,11 +3606,11 @@ FILLER(sys_fsconfig_x, true) /* Since `value` is NULL we send two empty params. */ /* Parameter 5: value_bytebuf (type: PT_BYTEBUF) */ - res = bpf_val_to_ring(data, 0); + res = bpf_val_to_ring_mem(data, 0, KERNEL); CHECK_RES(res); /* Parameter 6: value_charbuf (type: PT_CHARBUF) */ - res = bpf_val_to_ring(data, 0); + res = bpf_val_to_ring_mem(data, 0, KERNEL); CHECK_RES(res); break; @@ -3609,11 +3622,11 @@ FILLER(sys_fsconfig_x, true) */ /* Parameter 5: value_bytebuf (type: PT_BYTEBUF) */ - res = bpf_val_to_ring(data, 0); + res = bpf_val_to_ring_mem(data, 0, KERNEL); CHECK_RES(res); /* Parameter 6: value_charbuf (type: PT_CHARBUF) */ - res = bpf_val_to_ring(data, value_pointer); + res = bpf_val_to_ring_mem(data, value_pointer, USER); CHECK_RES(res); break; @@ -3623,22 +3636,22 @@ FILLER(sys_fsconfig_x, true) */ /* Parameter 5: value_bytebuf (type: PT_BYTEBUF) */ - res = __bpf_val_to_ring(data, value_pointer, aux, PT_BYTEBUF, -1, true); + res = __bpf_val_to_ring(data, value_pointer, aux, PT_BYTEBUF, -1, true, USER); CHECK_RES(res); /* Parameter 6: value_charbuf (type: PT_CHARBUF) */ - res = bpf_val_to_ring(data, 0); + res = bpf_val_to_ring_mem(data, 0, KERNEL); CHECK_RES(res); break; default: /* Parameter 5: value_bytebuf (type: PT_BYTEBUF) */ - res = bpf_val_to_ring(data, 0); + res = bpf_val_to_ring_mem(data, 0, KERNEL); CHECK_RES(res); /* Parameter 6: value_charbuf (type: PT_CHARBUF) */ - res = bpf_val_to_ring(data, 0); + res = bpf_val_to_ring_mem(data, 0, KERNEL); CHECK_RES(res); break; } @@ -3803,7 +3816,7 @@ FILLER(sys_prlimit_x, true) */ if (retval >= 0) { val = bpf_syscall_get_argument(data, 2); - if (bpf_probe_read(&rl, sizeof(rl), (void *)val)) { + if (bpf_probe_read_user(&rl, sizeof(rl), (void *)val)) { newcur = -1; newmax = -1; } else { @@ -3816,7 +3829,7 @@ FILLER(sys_prlimit_x, true) } val = bpf_syscall_get_argument(data, 3); - if (bpf_probe_read(&rl, sizeof(rl), (void *)val)) { + if (bpf_probe_read_user(&rl, sizeof(rl), (void *)val)) { oldcur = -1; oldmax = -1; } else { @@ -4019,7 +4032,7 @@ static __always_inline int f_sys_recv_x_common(struct filler_data *data, long re } data->fd = bpf_syscall_get_argument(data, 0); - res = __bpf_val_to_ring(data, val, bufsize, PT_BYTEBUF, -1, true); + res = __bpf_val_to_ring(data, val, bufsize, PT_BYTEBUF, -1, true, USER); return res; } @@ -4078,7 +4091,7 @@ FILLER(sys_recvfrom_x, true) val = bpf_syscall_get_argument(data, 5); if (usrsockaddr && val != 0) { - if (bpf_probe_read(&addrlen, sizeof(addrlen), + if (bpf_probe_read_user(&addrlen, sizeof(addrlen), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; @@ -4108,7 +4121,7 @@ FILLER(sys_recvfrom_x, true) * Copy the endpoint info into the ring */ data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, size, PT_SOCKTUPLE, -1, false); + res = __bpf_val_to_ring(data, 0, size, PT_SOCKTUPLE, -1, false, KERNEL); return res; } @@ -4165,7 +4178,7 @@ FILLER(sys_recvmsg_x, true) * Retrieve the message header */ val = bpf_syscall_get_argument(data, 1); - if (bpf_probe_read(&mh, sizeof(mh), (void *)val)) + if (bpf_probe_read_user(&mh, sizeof(mh), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; /* @@ -4204,7 +4217,7 @@ FILLER(sys_recvmsg_x_2, true) * Retrieve the message header */ val = bpf_syscall_get_argument(data, 1); - if (bpf_probe_read(&mh, sizeof(mh), (void *)val)) + if (bpf_probe_read_user(&mh, sizeof(mh), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; /* @@ -4239,7 +4252,7 @@ FILLER(sys_recvmsg_x_2, true) } data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, size, PT_SOCKTUPLE, -1, false); + res = __bpf_val_to_ring(data, 0, size, PT_SOCKTUPLE, -1, false, KERNEL); return res; } @@ -4269,7 +4282,7 @@ FILLER(sys_sendmsg_e, true) * Retrieve the message header */ val = bpf_syscall_get_argument(data, 1); - if (bpf_probe_read(&mh, sizeof(mh), (void *)val)) + if (bpf_probe_read_user(&mh, sizeof(mh), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; /* @@ -4312,7 +4325,7 @@ FILLER(sys_sendmsg_e, true) } data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, size, PT_SOCKTUPLE, -1, false); + res = __bpf_val_to_ring(data, 0, size, PT_SOCKTUPLE, -1, false, KERNEL); return res; } @@ -4342,7 +4355,7 @@ FILLER(sys_sendmsg_x, true) * data */ val = bpf_syscall_get_argument(data, 1); - if (bpf_probe_read(&mh, sizeof(mh), (void *)val)) + if (bpf_probe_read_user(&mh, sizeof(mh), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; iov = (const struct iovec *)mh.msg_iov; @@ -4364,7 +4377,7 @@ FILLER(sys_creat_e, true) * name */ val = bpf_syscall_get_argument(data, 0); - res = bpf_val_to_ring(data, val); + res = bpf_val_to_ring_mem(data, val, USER); if (res != PPM_SUCCESS) return res; @@ -4398,7 +4411,7 @@ FILLER(sys_creat_x, true) * name */ val = bpf_syscall_get_argument(data, 0); - res = bpf_val_to_ring(data, val); + res = bpf_val_to_ring_mem(data, val, USER); if (res != PPM_SUCCESS) return res; @@ -4438,7 +4451,7 @@ FILLER(sys_pipe_x, true) s32 pipefd[2] = {-1, -1}; /* This is a pointer to the vector with the 2 file descriptors. */ unsigned long fd_vector_pointer = bpf_syscall_get_argument(data, 0); - if(bpf_probe_read(pipefd, sizeof(pipefd), (void *)fd_vector_pointer)) + if(bpf_probe_read_user(pipefd, sizeof(pipefd), (void *)fd_vector_pointer)) { pipefd[0] = -1; pipefd[1] = -1; @@ -4604,7 +4617,7 @@ FILLER(sys_ppoll_e, true) */ val = bpf_syscall_get_argument(data, 3); if (val != (unsigned long)NULL) - if (bpf_probe_read(&val, sizeof(val), (void *)val)) + if (bpf_probe_read_user(&val, sizeof(val), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; res = bpf_val_to_ring_type(data, val, PT_SIGSET); @@ -4650,7 +4663,7 @@ FILLER(sys_semop_x, true) struct sembuf sops = {0, 0, 0}; if (nsops--) - if (bpf_probe_read(&sops, sizeof(sops), + if (bpf_probe_read_user(&sops, sizeof(sops), (void *)&ptr[j])) return PPM_FAILURE_INVALID_USER_MEMORY; @@ -4782,7 +4795,7 @@ FILLER(sys_renameat_x, true) * oldpath */ val = bpf_syscall_get_argument(data, 1); - res = bpf_val_to_ring(data, val); + res = bpf_val_to_ring_mem(data, val, USER); if (res != PPM_SUCCESS) return res; @@ -4802,7 +4815,7 @@ FILLER(sys_renameat_x, true) * newpath */ val = bpf_syscall_get_argument(data, 3); - res = bpf_val_to_ring(data, val); + res = bpf_val_to_ring_mem(data, val, USER); return res; } @@ -4834,7 +4847,7 @@ FILLER(sys_renameat2_x, true) * oldpath */ val = bpf_syscall_get_argument(data, 1); - res = bpf_val_to_ring(data, val); + res = bpf_val_to_ring_mem(data, val, USER); if (res != PPM_SUCCESS) return res; @@ -4854,7 +4867,7 @@ FILLER(sys_renameat2_x, true) * newpath */ val = bpf_syscall_get_argument(data, 3); - res = bpf_val_to_ring(data, val); + res = bpf_val_to_ring_mem(data, val, USER); /* * flags @@ -4880,7 +4893,7 @@ FILLER(sys_symlinkat_x, true) * oldpath */ val = bpf_syscall_get_argument(data, 0); - res = bpf_val_to_ring_type(data, val, PT_CHARBUF); + res = bpf_val_to_ring_type_mem(data, val, PT_CHARBUF, USER); if (res != PPM_SUCCESS) return res; @@ -4900,7 +4913,7 @@ FILLER(sys_symlinkat_x, true) * newpath */ val = bpf_syscall_get_argument(data, 2); - res = bpf_val_to_ring_type(data, val, PT_CHARBUF); + res = bpf_val_to_ring_type_mem(data, val, PT_CHARBUF, USER); return res; } @@ -5230,7 +5243,7 @@ FILLER(sys_quotactl_x, true) * Add special */ val = bpf_syscall_get_argument(data, 1); - res = bpf_val_to_ring_type(data, val, PT_CHARBUF); + res = bpf_val_to_ring_type_mem(data, val, PT_CHARBUF, USER); if (res != PPM_SUCCESS) return res; @@ -5243,7 +5256,7 @@ FILLER(sys_quotactl_x, true) * get quotafilepath only for QUOTAON */ if (cmd == PPM_Q_QUOTAON) { - res = bpf_val_to_ring_type(data, val, PT_CHARBUF); + res = bpf_val_to_ring_type_mem(data, val, PT_CHARBUF, USER); if (res != PPM_SUCCESS) return res; } else { @@ -5256,7 +5269,7 @@ FILLER(sys_quotactl_x, true) * dqblk fields if present */ if (cmd == PPM_Q_GETQUOTA || cmd == PPM_Q_SETQUOTA) { - if (bpf_probe_read(&dqblk, sizeof(dqblk), + if (bpf_probe_read_user(&dqblk, sizeof(dqblk), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; } @@ -5328,7 +5341,7 @@ FILLER(sys_quotactl_x, true) * dqinfo fields if present */ if (cmd == PPM_Q_GETINFO || cmd == PPM_Q_SETINFO) { - if (bpf_probe_read(&dqinfo, sizeof(dqinfo), + if (bpf_probe_read_user(&dqinfo, sizeof(dqinfo), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; } @@ -5367,7 +5380,7 @@ FILLER(sys_quotactl_x, true) if (cmd == PPM_Q_GETFMT) { u32 tmp; - if (bpf_probe_read(&tmp, sizeof(tmp), (void *)val)) + if (bpf_probe_read_user(&tmp, sizeof(tmp), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; quota_fmt_out = quotactl_fmt_to_scap(tmp); } @@ -5492,7 +5505,7 @@ static __always_inline int bpf_parse_ptrace_data(struct filler_data *data, u16 r case PPM_PTRACE_PEEKUSR: idx = PPM_PTRACE_IDX_UINT64; type = PT_UINT64; - if (bpf_probe_read(&dst, sizeof(long), (void *)val)) + if (bpf_probe_read_user(&dst, sizeof(long), (void *)val)) return PPM_FAILURE_INVALID_USER_MEMORY; break; @@ -6053,13 +6066,13 @@ FILLER(sched_prog_exec, false) /* `bpf_probe_read()` returns 0 in case of success. */ #ifdef BPF_FORBIDS_ZERO_ACCESS - int correctly_read = bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], - ((args_len - 1) & SCRATCH_SIZE_HALF) + 1, - (void *)arg_start); + int correctly_read = bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + ((args_len - 1) & SCRATCH_SIZE_HALF) + 1, + (void *)arg_start); #else - int correctly_read = bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], - args_len & SCRATCH_SIZE_HALF, - (void *)arg_start); + int correctly_read = bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + args_len & SCRATCH_SIZE_HALF, + (void *)arg_start); #endif /* BPF_FORBIDS_ZERO_ACCESS */ /* If there was something to read and we read it correctly, update all @@ -6070,9 +6083,9 @@ FILLER(sched_prog_exec, false) data->buf[(data->state->tail_ctx.curoff + args_len - 1) & SCRATCH_SIZE_MAX] = 0; /* We need the len of the second param `exe`. */ - int exe_len = bpf_probe_read_str(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], - SCRATCH_SIZE_HALF, - &data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF]); + int exe_len = bpf_probe_read_kernel_str(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + SCRATCH_SIZE_HALF, + &data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF]); if(exe_len == -EFAULT) { @@ -6081,7 +6094,7 @@ FILLER(sched_prog_exec, false) /* Parameter 2: exe (type: PT_CHARBUF) */ data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false); + res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false, KERNEL); if(res != PPM_SUCCESS) { return res; @@ -6089,7 +6102,7 @@ FILLER(sched_prog_exec, false) /* Parameter 3: args (type: PT_CHARBUFARRAY) */ data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false); + res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false, KERNEL); if(res != PPM_SUCCESS) { return res; @@ -6206,7 +6219,7 @@ FILLER(sched_prog_exec, false) } /* Parameter 14: comm (type: PT_CHARBUF) */ - res = bpf_val_to_ring_type(data, (unsigned long)task->comm, PT_CHARBUF); + res = bpf_val_to_ring_type_mem(data, (unsigned long)task->comm, PT_CHARBUF, KERNEL); if(res != PPM_SUCCESS) { return res; @@ -6230,7 +6243,7 @@ FILLER(sched_prog_exec_2, false) } /* Parameter 15: cgroups (type: PT_CHARBUFARRAY) */ - res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false); + res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false, KERNEL); if(res != PPM_SUCCESS) { return res; @@ -6264,11 +6277,11 @@ FILLER(sched_prog_exec_3, false) } #ifdef BPF_FORBIDS_ZERO_ACCESS - if(bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + if(bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], ((env_len - 1) & SCRATCH_SIZE_HALF) + 1, (void *)env_start)) #else - if(bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + if(bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], env_len & SCRATCH_SIZE_HALF, (void *)env_start)) #endif /* BPF_FORBIDS_ZERO_ACCESS */ @@ -6283,7 +6296,7 @@ FILLER(sched_prog_exec_3, false) /* Parameter 16: env (type: PT_CHARBUFARRAY) */ data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, env_len, PT_BYTEBUF, -1, false); + res = __bpf_val_to_ring(data, 0, env_len, PT_BYTEBUF, -1, false, KERNEL); if(res != PPM_SUCCESS) { return res; @@ -6467,9 +6480,9 @@ FILLER(sched_prog_fork, false) } /* `bpf_probe_read()` returns 0 in case of success. */ - int correctly_read = bpf_probe_read(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], - args_len & SCRATCH_SIZE_HALF, - (void *)arg_start); + int correctly_read = bpf_probe_read_user(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + args_len & SCRATCH_SIZE_HALF, + (void *)arg_start); /* If there was something to read and we read it correctly, update all * the offsets, otherwise push empty params to userspace. @@ -6479,9 +6492,9 @@ FILLER(sched_prog_fork, false) data->buf[(data->state->tail_ctx.curoff + args_len - 1) & SCRATCH_SIZE_MAX] = 0; /* We need the len of the second param `exe`. */ - int exe_len = bpf_probe_read_str(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], - SCRATCH_SIZE_HALF, - &data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF]); + int exe_len = bpf_probe_read_kernel_str(&data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF], + SCRATCH_SIZE_HALF, + &data->buf[data->state->tail_ctx.curoff & SCRATCH_SIZE_HALF]); if(exe_len == -EFAULT) { @@ -6490,7 +6503,7 @@ FILLER(sched_prog_fork, false) /* Parameter 2: exe (type: PT_CHARBUF) */ data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false); + res = __bpf_val_to_ring(data, 0, exe_len, PT_CHARBUF, -1, false, KERNEL); if(res != PPM_SUCCESS) { return res; @@ -6498,7 +6511,7 @@ FILLER(sched_prog_fork, false) /* Parameter 3: args (type: PT_CHARBUFARRAY) */ data->curarg_already_on_frame = true; - res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false); + res = __bpf_val_to_ring(data, 0, args_len - exe_len, PT_BYTEBUF, -1, false, KERNEL); if(res != PPM_SUCCESS) { return res; @@ -6615,7 +6628,7 @@ FILLER(sched_prog_fork, false) } /* Parameter 14: comm (type: PT_CHARBUF) */ - res = bpf_val_to_ring_type(data, (unsigned long)child->comm, PT_CHARBUF); + res = bpf_val_to_ring_type_mem(data, (unsigned long)child->comm, PT_CHARBUF, KERNEL); if(res != PPM_SUCCESS) { return res; @@ -6640,7 +6653,7 @@ FILLER(sched_prog_fork_2, false) } /* Parameter 15: cgroups (type: PT_CHARBUFARRAY) */ - res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false); + res = __bpf_val_to_ring(data, (unsigned long)data->tmp_scratch, cgroups_len, PT_BYTEBUF, -1, false, KERNEL); if(res != PPM_SUCCESS) { return res; diff --git a/driver/bpf/plumbing_helpers.h b/driver/bpf/plumbing_helpers.h index 51e3b4ab19..82a36945ac 100644 --- a/driver/bpf/plumbing_helpers.h +++ b/driver/bpf/plumbing_helpers.h @@ -16,10 +16,14 @@ or GPL2.txt for full copies of the license. #include "types.h" #include "builtins.h" -#define _READ(P) ({ typeof(P) _val; \ - memset(&_val, 0, sizeof(_val)); \ - bpf_probe_read(&_val, sizeof(_val), &P); \ - _val; \ +#define _READ(P) ({ typeof(P) _val; \ + bpf_probe_read_kernel(&_val, sizeof(_val), &P); \ + _val; \ + }) +#define _READ_KERNEL(P) _READ(P) +#define _READ_USER(P) ({ typeof(P) _val; \ + bpf_probe_read_user(&_val, sizeof(_val), &P); \ + _val; \ }) #ifdef BPF_DEBUG @@ -417,6 +421,34 @@ static __always_inline int bpf_test_bit(int nr, unsigned long *addr) return 1UL & (_READ(addr[BIT_WORD(nr)]) >> (nr & (BITS_PER_LONG - 1))); } +#ifdef CAPTURE_SCHED_PROC_FORK +static __always_inline bool drop_syscall_child_events(void *ctx, enum ppm_event_type evt_type) +{ + switch (evt_type) { + case PPME_SYSCALL_CLONE_20_X: + case PPME_SYSCALL_FORK_20_X: + case PPME_SYSCALL_VFORK_20_X: + case PPME_SYSCALL_CLONE3_X: { + /* On s390x, clone and fork child events will be generated but + * due to page faults, no args/envp information will be collected. + * Also no child events appear for clone3 syscall. + * + * Because child events are covered by CAPTURE_SCHED_PROC_FORK, + * let proactively ignore them. + */ + long ret = bpf_syscall_get_retval(ctx); + + if (!ret) + return true; + break; + } + default: + break; + } + return false; +} +#endif + static __always_inline bool drop_event(void *ctx, struct scap_bpf_per_cpu_state *state, enum ppm_event_type evt_type, @@ -558,6 +590,11 @@ static __always_inline void call_filler(void *ctx, ts = settings->boot_time + bpf_ktime_get_boot_ns(); reset_tail_ctx(state, evt_type, ts); +#ifdef CAPTURE_SCHED_PROC_FORK + if (drop_syscall_child_events(stack_ctx, evt_type)) + goto cleanup; +#endif + /* drop_event can change state->tail_ctx.evt_type */ if (drop_event(stack_ctx, state, evt_type, settings, drop_flags)) goto cleanup; diff --git a/driver/feature_gates.h b/driver/feature_gates.h index db494285d3..a6bce83e4b 100644 --- a/driver/feature_gates.h +++ b/driver/feature_gates.h @@ -27,7 +27,7 @@ or GPL2.txt for full copies of the license. #ifdef __KERNEL__ /* Kernel module - BPF probe */ -#include +#include "ppm_version.h" /////////////////////////////// // CAPTURE_SCHED_PROC_FORK @@ -132,6 +132,15 @@ or GPL2.txt for full copies of the license. #define CAPTURE_PAGE_FAULTS #endif +/////////////////////////////// +// USE_BPF_PROBE_KERNEL_USER_VARIANTS +/////////////////////////////// + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5,5,0)) || \ + ((PPM_RHEL_RELEASE_CODE > 0) && (PPM_RHEL_RELEASE_CODE >= PPM_RHEL_RELEASE_VERSION(8, 5))) + #define USE_BPF_PROBE_KERNEL_USER_VARIANTS +#endif + #elif defined(__USE_VMLINUX__) /* modern BPF probe */ /////////////////////////////// diff --git a/driver/ppm_fillers.c b/driver/ppm_fillers.c index 0b775e7976..c9732fb8d9 100644 --- a/driver/ppm_fillers.c +++ b/driver/ppm_fillers.c @@ -2135,61 +2135,71 @@ int f_sys_accept_x(struct event_filler_arguments *args) if (unlikely(res != PPM_SUCCESS)) return res; - /* - * Convert the fd into socket endpoint information - */ - size = fd_to_socktuple(fd, - NULL, - 0, - false, - true, - targetbuf, - STR_STORAGE_SIZE); - - /* - * Copy the endpoint info into the ring - */ - res = val_to_ring(args, - (uint64_t)targetbuf, - size, - false, - 0); - if (unlikely(res != PPM_SUCCESS)) - return res; - - /* - * queuepct - */ - if (!args->is_socketcall) - syscall_get_arguments_deprecated(current, args->regs, 0, 1, &srvskfd); + if (fd >= 0) + { + /* + * Convert the fd into socket endpoint information + */ + size = fd_to_socktuple(fd, + NULL, + 0, + false, + true, + targetbuf, + STR_STORAGE_SIZE); + /* + * queuepct + */ + if (!args->is_socketcall) + syscall_get_arguments_deprecated(current, args->regs, 0, 1, &srvskfd); #ifndef UDIG - else - srvskfd = args->socketcall_args[0]; + else + srvskfd = args->socketcall_args[0]; #endif #ifndef UDIG - sock = sockfd_lookup(srvskfd, &err); + sock = sockfd_lookup(srvskfd, &err); - if (sock && sock->sk) { - ack_backlog = sock->sk->sk_ack_backlog; - max_ack_backlog = sock->sk->sk_max_ack_backlog; - } + if (sock && sock->sk) { + ack_backlog = sock->sk->sk_ack_backlog; + max_ack_backlog = sock->sk->sk_max_ack_backlog; + } - if (sock) - sockfd_put(sock); + if (sock) + sockfd_put(sock); - if (max_ack_backlog) - queuepct = (unsigned long)ack_backlog * 100 / max_ack_backlog; + if (max_ack_backlog) + queuepct = (unsigned long)ack_backlog * 100 / max_ack_backlog; #endif /* UDIG */ + /* Parameter 2: tuple (type: PT_SOCKTUPLE) */ + res = val_to_ring(args, + (uint64_t)targetbuf, + size, + false, + 0); + if (unlikely(res != PPM_SUCCESS)) + return res; + } + else + { + /* Parameter 2: tuple (type: PT_SOCKTUPLE) */ + res = push_empty_param(args); + if (unlikely(res != PPM_SUCCESS)) + return res; + } + + /* Parameter 3: queuepct (type: PT_UINT8) */ res = val_to_ring(args, queuepct, 0, false, 0); if (res != PPM_SUCCESS) return res; + /* Parameter 4: queuelen (type: PT_UINT32) */ res = val_to_ring(args, ack_backlog, 0, false, 0); if (res != PPM_SUCCESS) return res; + /* Parameter 5: queuemax (type: PT_UINT32) */ res = val_to_ring(args, max_ack_backlog, 0, false, 0); if (res != PPM_SUCCESS) return res;