diff --git a/doc/userguide/capture-hardware/ebpf-xdp.rst b/doc/userguide/capture-hardware/ebpf-xdp.rst index 116038716eff..b68b8e6c32be 100644 --- a/doc/userguide/capture-hardware/ebpf-xdp.rst +++ b/doc/userguide/capture-hardware/ebpf-xdp.rst @@ -302,6 +302,56 @@ be blind on packets on port 443 with the correct pattern. If you are not using VLAN tracking (``vlan.use-for-tracking`` set to false in suricata.yaml) then you also have to set the VLAN_TRACKING define to 0 in ``xdp_filter.c``. +Setup XDP Syncookie +------------------- + +XDP Syncookie allows Suricata with AF_PACKET IDS mode to prevent host from SYN flooding attack +via iptables SYNPROXY module with XDP acceleration. + +More info about XDP Syncookie: + +- `ACCELERATING SYNPROXY WITH XDP `__ + +Linux kernel 6.2 or newer are required to use this feature. + +Copy the resulting XDP Syncookie program as needed:: + + cp ebpf/xdp_synproxy_kern.bpf /usr/libexec/suricata/ebpf/ + +Setup af-packet section/interface in ``suricata.yaml``. + +:: + + - interface: eth3 + threads: auto + cluster-id: 97 + cluster-type: cluster_flow + defrag: yes + # Xdp mode, "soft" for skb based version, "driver" for network card based + # and "hw" for card supporting eBPF. + xdp-mode: driver + xdp-syncookie-file: /usr/libexec/suricata/ebpf/xdp_synproxy_kern.bpf + use-mmap: yes + ring-size: 200000 + + +Example setup on host:: + + sysctl -w net.ipv4.tcp_syncookies=2 + sysctl -w net.ipv4.tcp_timestamps=1 + sysctl -w net.netfilter.nf_conntrack_tcp_loose=0 + iptables -t raw -I PREROUTING -i eth3 -p tcp -m tcp --syn --dport 80 -j CT --notrack + iptables -A INPUT -i eth3 -p tcp -m tcp --dport 80 -m state --state INVALID,UNTRACKED -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460 + iptables -A INPUT -i eth3 -m state --state INVALID -j DROP + +Find program id with bpftool:: + + bpftool prog show name syncookie_xdp + +Use xdp_synproxy program to add ports XDP Syncookie protection:: + + xdp_synproxy --prog --ports 80 + Intel NIC setup ~~~~~~~~~~~~~~~ diff --git a/ebpf/include/vmlinux/vmlinux_common.h b/ebpf/include/vmlinux/vmlinux_common.h index 4c8ea59892be..5c58f48ea391 100644 --- a/ebpf/include/vmlinux/vmlinux_common.h +++ b/ebpf/include/vmlinux/vmlinux_common.h @@ -2,8 +2,8 @@ #define __VMLINUX_COMMON_H__ enum { - false = 0, - true = 1, + false = 0, + true = 1, }; typedef _Bool bool; diff --git a/ebpf/include/vmlinux/vmlinux_net.h b/ebpf/include/vmlinux/vmlinux_net.h index 2db199bbb496..10df30cfadda 100644 --- a/ebpf/include/vmlinux/vmlinux_net.h +++ b/ebpf/include/vmlinux/vmlinux_net.h @@ -4,13 +4,13 @@ typedef __u32 __wsum; struct nf_conn { - unsigned long status; + unsigned long status; }; enum ip_conntrack_status { - /* Connection is confirmed: originating packet has left box */ - IPS_CONFIRMED_BIT = 3, - IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT), + /* Connection is confirmed: originating packet has left box */ + IPS_CONFIRMED_BIT = 3, + IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT), }; #endif /* __VMLINUX_NET_H__ */ diff --git a/ebpf/include/vmlinux_local.h b/ebpf/include/vmlinux_local.h index 42c9a434dfc4..1d9815a4c91d 100644 --- a/ebpf/include/vmlinux_local.h +++ b/ebpf/include/vmlinux_local.h @@ -13,7 +13,7 @@ #include /* Needed for __uNN in vmlinux/vmlinux_types.h */ #ifndef BPF_NO_PRESERVE_ACCESS_INDEX -#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record) +#pragma clang attribute push(__attribute__((preserve_access_index)), apply_to = record) #endif #include "vmlinux/vmlinux_types.h" diff --git a/ebpf/xdp_synproxy.c b/ebpf/xdp_synproxy.c index 4020776078c0..2b9cf16b389b 100644 --- a/ebpf/xdp_synproxy.c +++ b/ebpf/xdp_synproxy.c @@ -20,268 +20,264 @@ static unsigned int ifindex; static noreturn void usage(const char *progname) { - fprintf(stderr, "Usage: %s [--iface |--prog ] [--mss4 --mss6 --wscale --ttl ] [--ports ,,...]\n", - progname); - exit(1); + fprintf(stderr, + "Usage: %s [--iface |--prog ] [--mss4 --mss6 " + "--wscale --ttl ] [--ports ,,...]\n", + progname); + exit(1); } static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit) { - unsigned long res; - char *endptr; + unsigned long res; + char *endptr; - errno = 0; - res = strtoul(arg, &endptr, 10); - if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit) - usage(progname); + errno = 0; + res = strtoul(arg, &endptr, 10); + if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit) + usage(progname); - return res; + return res; } static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id, - __u64 *tcpipopts, char **ports) + __u64 *tcpipopts, char **ports) { - static struct option long_options[] = { - { "help", no_argument, NULL, 'h' }, - { "iface", required_argument, NULL, 'i' }, - { "prog", required_argument, NULL, 'x' }, - { "mss4", required_argument, NULL, 4 }, - { "mss6", required_argument, NULL, 6 }, - { "wscale", required_argument, NULL, 'w' }, - { "ttl", required_argument, NULL, 't' }, - { "ports", required_argument, NULL, 'p' }, - { NULL, 0, NULL, 0 }, - }; - unsigned long mss4, wscale, ttl; - unsigned long long mss6; - unsigned int tcpipopts_mask = 0; - - if (argc < 2) - usage(argv[0]); - - *ifindex = 0; - *prog_id = 0; - *tcpipopts = 0; - *ports = NULL; - - while (true) { - int opt; - - opt = getopt_long(argc, argv, "", long_options, NULL); - if (opt == -1) - break; - - switch (opt) { - case 'h': - usage(argv[0]); - break; - case 'i': - *ifindex = if_nametoindex(optarg); - if (*ifindex == 0) - usage(argv[0]); - break; - case 'x': - *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX); - if (*prog_id == 0) - usage(argv[0]); - break; - case 4: - mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX); - tcpipopts_mask |= 1 << 0; - break; - case 6: - mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX); - tcpipopts_mask |= 1 << 1; - break; - case 'w': - wscale = parse_arg_ul(argv[0], optarg, 14); - tcpipopts_mask |= 1 << 2; - break; - case 't': - ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX); - tcpipopts_mask |= 1 << 3; - break; - case 'p': - *ports = optarg; - break; - default: - usage(argv[0]); - } - } - if (optind < argc) - usage(argv[0]); - - if (tcpipopts_mask == 0xf) { - if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0) - usage(argv[0]); - *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4; - } else if (tcpipopts_mask != 0) { - usage(argv[0]); - } - - if (*ifindex != 0 && *prog_id != 0) - usage(argv[0]); - if (*ifindex == 0 && *prog_id == 0) - usage(argv[0]); + static struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "iface", required_argument, NULL, 'i' }, + { "prog", required_argument, NULL, 'x' }, + { "mss4", required_argument, NULL, 4 }, + { "mss6", required_argument, NULL, 6 }, + { "wscale", required_argument, NULL, 'w' }, + { "ttl", required_argument, NULL, 't' }, + { "ports", required_argument, NULL, 'p' }, + { NULL, 0, NULL, 0 }, + }; + unsigned long mss4, wscale, ttl; + unsigned long long mss6; + unsigned int tcpipopts_mask = 0; + + if (argc < 2) + usage(argv[0]); + + *ifindex = 0; + *prog_id = 0; + *tcpipopts = 0; + *ports = NULL; + + while (true) { + int opt; + + opt = getopt_long(argc, argv, "", long_options, NULL); + if (opt == -1) + break; + + switch (opt) { + case 'h': + usage(argv[0]); + break; + case 'i': + *ifindex = if_nametoindex(optarg); + if (*ifindex == 0) + usage(argv[0]); + break; + case 'x': + *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX); + if (*prog_id == 0) + usage(argv[0]); + break; + case 4: + mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 0; + break; + case 6: + mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 1; + break; + case 'w': + wscale = parse_arg_ul(argv[0], optarg, 14); + tcpipopts_mask |= 1 << 2; + break; + case 't': + ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX); + tcpipopts_mask |= 1 << 3; + break; + case 'p': + *ports = optarg; + break; + default: + usage(argv[0]); + } + } + if (optind < argc) + usage(argv[0]); + + if (tcpipopts_mask == 0xf) { + if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0) + usage(argv[0]); + *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4; + } else if (tcpipopts_mask != 0) { + usage(argv[0]); + } + + if (*ifindex != 0 && *prog_id != 0) + usage(argv[0]); + if (*ifindex == 0 && *prog_id == 0) + usage(argv[0]); } static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd) { - struct bpf_prog_info prog_info; - __u32 map_ids[8]; - __u32 info_len; - int prog_fd; - int err; - int i; - - *values_map_fd = -1; - *ports_map_fd = -1; - - prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (prog_fd < 0) { - fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); - return prog_fd; - } - - prog_info = (struct bpf_prog_info) { - .nr_map_ids = 8, - .map_ids = (__u64)(unsigned long)map_ids, - }; - info_len = sizeof(prog_info); - - err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); - if (err != 0) { - fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n", - strerror(-err)); - goto out; - } - - if (prog_info.nr_map_ids < 2) { - fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n", - prog_info.nr_map_ids); - err = -ENOENT; - goto out; - } - - for (i = 0; i < prog_info.nr_map_ids; i++) { - struct bpf_map_info map_info = {}; - int map_fd; - - err = bpf_map_get_fd_by_id(map_ids[i]); - if (err < 0) { - fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err)); - goto err_close_map_fds; - } - map_fd = err; - - info_len = sizeof(map_info); - err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); - if (err != 0) { - fprintf(stderr, "Error: bpf_map_get_info_by_fd: %s\n", - strerror(-err)); - close(map_fd); - goto err_close_map_fds; - } - if (strcmp(map_info.name, "values") == 0) { - *values_map_fd = map_fd; - continue; - } - if (strcmp(map_info.name, "allowed_ports") == 0) { - *ports_map_fd = map_fd; - continue; - } - close(map_fd); - } - - if (*values_map_fd != -1 && *ports_map_fd != -1) { - err = 0; - goto out; - } - - err = -ENOENT; + struct bpf_prog_info prog_info; + __u32 map_ids[8]; + __u32 info_len; + int prog_fd; + int err; + int i; + + *values_map_fd = -1; + *ports_map_fd = -1; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + return prog_fd; + } + + prog_info = (struct bpf_prog_info){ + .nr_map_ids = 8, + .map_ids = (__u64)(unsigned long)map_ids, + }; + info_len = sizeof(prog_info); + + err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + + if (prog_info.nr_map_ids < 2) { + fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n", prog_info.nr_map_ids); + err = -ENOENT; + goto out; + } + + for (i = 0; i < prog_info.nr_map_ids; i++) { + struct bpf_map_info map_info = {}; + int map_fd; + + err = bpf_map_get_fd_by_id(map_ids[i]); + if (err < 0) { + fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err)); + goto err_close_map_fds; + } + map_fd = err; + + info_len = sizeof(map_info); + err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_get_info_by_fd: %s\n", strerror(-err)); + close(map_fd); + goto err_close_map_fds; + } + if (strcmp(map_info.name, "values") == 0) { + *values_map_fd = map_fd; + continue; + } + if (strcmp(map_info.name, "allowed_ports") == 0) { + *ports_map_fd = map_fd; + continue; + } + close(map_fd); + } + + if (*values_map_fd != -1 && *ports_map_fd != -1) { + err = 0; + goto out; + } + + err = -ENOENT; err_close_map_fds: - if (*values_map_fd != -1) - close(*values_map_fd); - if (*ports_map_fd != -1) - close(*ports_map_fd); - *values_map_fd = -1; - *ports_map_fd = -1; + if (*values_map_fd != -1) + close(*values_map_fd); + if (*ports_map_fd != -1) + close(*ports_map_fd); + *values_map_fd = -1; + *ports_map_fd = -1; out: - close(prog_fd); - return err; + close(prog_fd); + return err; } int main(int argc, char *argv[]) { - int values_map_fd, ports_map_fd; - __u64 tcpipopts; - __u32 prog_id; - char *ports; - int err = 0; - - parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports); - - if (prog_id == 0) { - err = bpf_xdp_query_id(ifindex, 0, &prog_id); - if (err < 0) { - fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n", - strerror(-err)); - goto out; - } - } - - err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd); - if (err < 0) - goto out; - - if (ports) { - __u16 port_last = 0; - __u32 port_idx = 0; - char *p = ports; - - fprintf(stderr, "Replacing allowed ports\n"); - - while (p && *p != '\0') { - char *token = strsep(&p, ","); - __u16 port; - - port = parse_arg_ul(argv[0], token, UINT16_MAX); - err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY); - if (err != 0) { - fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); - fprintf(stderr, "Failed to add port %u (index %u)\n", - port, port_idx); - goto out_close_maps; - } - fprintf(stderr, "Added port %u\n", port); - port_idx++; - } - err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY); - if (err != 0) { - fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); - fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n", - port_idx); - goto out_close_maps; - } - } - - if (tcpipopts) { - __u32 key = 0; - - fprintf(stderr, "Replacing TCP/IP options\n"); - - err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY); - if (err != 0) { - fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); - goto out_close_maps; - } - } + int values_map_fd, ports_map_fd; + __u64 tcpipopts; + __u32 prog_id; + char *ports; + int err = 0; + + parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports); + + if (prog_id == 0) { + err = bpf_xdp_query_id(ifindex, 0, &prog_id); + if (err < 0) { + fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n", strerror(-err)); + goto out; + } + } + + err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd); + if (err < 0) + goto out; + + if (ports) { + __u16 port_last = 0; + __u32 port_idx = 0; + char *p = ports; + + fprintf(stderr, "Replacing allowed ports\n"); + + while (p && *p != '\0') { + char *token = strsep(&p, ","); + __u16 port; + + port = parse_arg_ul(argv[0], token, UINT16_MAX); + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add port %u (index %u)\n", port, port_idx); + goto out_close_maps; + } + fprintf(stderr, "Added port %u\n", port); + port_idx++; + } + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n", port_idx); + goto out_close_maps; + } + } + + if (tcpipopts) { + __u32 key = 0; + + fprintf(stderr, "Replacing TCP/IP options\n"); + + err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + } out_close_maps: - close(values_map_fd); - close(ports_map_fd); + close(values_map_fd); + close(ports_map_fd); out: - return err == 0 ? 0 : 1; + return err == 0 ? 0 : 1; } diff --git a/ebpf/xdp_synproxy_kern.c b/ebpf/xdp_synproxy_kern.c index a9eec02772cd..f5641b4ca4cb 100644 --- a/ebpf/xdp_synproxy_kern.c +++ b/ebpf/xdp_synproxy_kern.c @@ -19,71 +19,78 @@ #define NSEC_PER_SEC 1000000000L -#define ETH_ALEN 6 -#define ETH_P_IP 0x0800 +#define ETH_ALEN 6 +#define ETH_P_IP 0x0800 #define ETH_P_IPV6 0x86DD #define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) -#define IP_DF 0x4000 -#define IP_MF 0x2000 +#define IP_DF 0x4000 +#define IP_MF 0x2000 #define IP_OFFSET 0x1fff #define NEXTHDR_TCP 6 -#define TCPOPT_NOP 1 -#define TCPOPT_EOL 0 -#define TCPOPT_MSS 2 -#define TCPOPT_WINDOW 3 +#define TCPOPT_NOP 1 +#define TCPOPT_EOL 0 +#define TCPOPT_MSS 2 +#define TCPOPT_WINDOW 3 #define TCPOPT_SACK_PERM 4 #define TCPOPT_TIMESTAMP 8 -#define TCPOLEN_MSS 4 -#define TCPOLEN_WINDOW 3 +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 -#define TCP_TS_HZ 1000 +#define TCP_TS_HZ 1000 #define TS_OPT_WSCALE_MASK 0xf -#define TS_OPT_SACK (1 << 4) -#define TS_OPT_ECN (1 << 5) -#define TSBITS 6 -#define TSMASK (((__u32)1 << TSBITS) - 1) -#define TCP_MAX_WSCALE 14U +#define TS_OPT_SACK (1 << 4) +#define TS_OPT_ECN (1 << 5) +#define TSBITS 6 +#define TSMASK (((__u32)1 << TSBITS) - 1) +#define TCP_MAX_WSCALE 14U #define IPV4_MAXLEN 60 -#define TCP_MAXLEN 60 +#define TCP_MAXLEN 60 -#define DEFAULT_MSS4 1460 -#define DEFAULT_MSS6 1440 -#define DEFAULT_WSCALE 7 -#define DEFAULT_TTL 64 +#define DEFAULT_MSS4 1460 +#define DEFAULT_MSS6 1440 +#define DEFAULT_WSCALE 7 +#define DEFAULT_TTL 64 #define MAX_ALLOWED_PORTS 8 #define MAX_PACKET_OFF 0xffff -#define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) - -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ -}) +#define swap(a, b) \ + do { \ + typeof(a) __tmp = (a); \ + (a) = (b); \ + (b) = __tmp; \ + } while (0) + +#define __get_unaligned_t(type, ptr) \ + ({ \ + const struct { \ + type x; \ + } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ + }) #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, __u32); - __type(value, __u64); - __uint(max_entries, 2); + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 2); } values SEC(".maps"); struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, __u32); - __type(value, __u16); - __uint(max_entries, MAX_ALLOWED_PORTS); + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u16); + __uint(max_entries, MAX_ALLOWED_PORTS); } allowed_ports SEC(".maps"); /* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in @@ -91,728 +98,702 @@ struct { */ struct bpf_ct_opts___local { - int netns_id; - int error; - __u8 l4proto; - __u8 dir; - __u8 reserved[2]; + int netns_id; + int error; + __u8 l4proto; + __u8 dir; + __u8 reserved[2]; } __attribute__((preserve_access_index)); -extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, - struct bpf_sock_tuple *bpf_tuple, - __u32 len_tuple, - struct bpf_ct_opts___local *opts, - __u32 len_opts) __ksym; +extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, + __u32 len_tuple, struct bpf_ct_opts___local *opts, __u32 len_opts) __ksym; extern void bpf_ct_release(struct nf_conn *ct) __ksym; static __always_inline void swap_eth_addr(__u8 *a, __u8 *b) { - __u8 tmp[ETH_ALEN]; + __u8 tmp[ETH_ALEN]; - __builtin_memcpy(tmp, a, ETH_ALEN); - __builtin_memcpy(a, b, ETH_ALEN); - __builtin_memcpy(b, tmp, ETH_ALEN); + __builtin_memcpy(tmp, a, ETH_ALEN); + __builtin_memcpy(a, b, ETH_ALEN); + __builtin_memcpy(b, tmp, ETH_ALEN); } static __always_inline __u16 csum_fold(__u32 csum) { - csum = (csum & 0xffff) + (csum >> 16); - csum = (csum & 0xffff) + (csum >> 16); - return (__u16)~csum; + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (__u16)~csum; } -static __always_inline __u16 csum_tcpudp_magic(__u32 saddr, __u32 daddr, - __u32 len, __u8 proto, - __u32 csum) +static __always_inline __u16 csum_tcpudp_magic( + __u32 saddr, __u32 daddr, __u32 len, __u8 proto, __u32 csum) { - __u64 s = csum; + __u64 s = csum; - s += (__u32)saddr; - s += (__u32)daddr; + s += (__u32)saddr; + s += (__u32)daddr; #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - s += proto + len; + s += proto + len; #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - s += (proto + len) << 8; + s += (proto + len) << 8; #else #error Unknown endian #endif - s = (s & 0xffffffff) + (s >> 32); - s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); - return csum_fold((__u32)s); + return csum_fold((__u32)s); } static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr, - const struct in6_addr *daddr, - __u32 len, __u8 proto, __u32 csum) + const struct in6_addr *daddr, __u32 len, __u8 proto, __u32 csum) { - __u64 sum = csum; - int i; + __u64 sum = csum; + int i; #pragma unroll - for (i = 0; i < 4; i++) - sum += (__u32)saddr->in6_u.u6_addr32[i]; + for (i = 0; i < 4; i++) + sum += (__u32)saddr->in6_u.u6_addr32[i]; #pragma unroll - for (i = 0; i < 4; i++) - sum += (__u32)daddr->in6_u.u6_addr32[i]; + for (i = 0; i < 4; i++) + sum += (__u32)daddr->in6_u.u6_addr32[i]; - /* Don't combine additions to avoid 32-bit overflow. */ - sum += bpf_htonl(len); - sum += bpf_htonl(proto); + /* Don't combine additions to avoid 32-bit overflow. */ + sum += bpf_htonl(len); + sum += bpf_htonl(proto); - sum = (sum & 0xffffffff) + (sum >> 32); - sum = (sum & 0xffffffff) + (sum >> 32); + sum = (sum & 0xffffffff) + (sum >> 32); + sum = (sum & 0xffffffff) + (sum >> 32); - return csum_fold((__u32)sum); + return csum_fold((__u32)sum); } static __always_inline __u64 tcp_clock_ns(void) { - return bpf_ktime_get_ns(); + return bpf_ktime_get_ns(); } static __always_inline __u32 tcp_ns_to_ts(__u64 ns) { - return ns / (NSEC_PER_SEC / TCP_TS_HZ); + return ns / (NSEC_PER_SEC / TCP_TS_HZ); } static __always_inline __u32 tcp_clock_ms(void) { - return tcp_ns_to_ts(tcp_clock_ns()); + return tcp_ns_to_ts(tcp_clock_ns()); } struct tcpopt_context { - void *data; - void *data_end; - __be32 *tsecr; - __u8 wscale; - bool option_timestamp; - bool option_sack; - __u32 off; + void *data; + void *data_end; + __be32 *tsecr; + __u8 wscale; + bool option_timestamp; + bool option_sack; + __u32 off; }; static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz) { - __u64 off = ctx->off; - __u8 *data; + __u64 off = ctx->off; + __u8 *data; - /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ - if (off > MAX_PACKET_OFF - sz) - return NULL; + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ + if (off > MAX_PACKET_OFF - sz) + return NULL; - data = ctx->data + off; - barrier_var(data); - if (data + sz >= ctx->data_end) - return NULL; + data = ctx->data + off; + barrier_var(data); + if (data + sz >= ctx->data_end) + return NULL; - ctx->off += sz; - return data; + ctx->off += sz; + return data; } static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) { - __u8 *opcode, *opsize, *wscale, *tsecr; - __u32 off = ctx->off; - - opcode = next(ctx, 1); - if (!opcode) - return 1; - - if (*opcode == TCPOPT_EOL) - return 1; - if (*opcode == TCPOPT_NOP) - return 0; - - opsize = next(ctx, 1); - if (!opsize || *opsize < 2) - return 1; - - switch (*opcode) { - case TCPOPT_WINDOW: - wscale = next(ctx, 1); - if (!wscale) - return 1; - if (*opsize == TCPOLEN_WINDOW) - ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; - break; - case TCPOPT_TIMESTAMP: - tsecr = next(ctx, 4); - if (!tsecr) - return 1; - if (*opsize == TCPOLEN_TIMESTAMP) { - ctx->option_timestamp = true; - /* Client's tsval becomes our tsecr. */ - *ctx->tsecr = get_unaligned((__be32 *)tsecr); - } - break; - case TCPOPT_SACK_PERM: - if (*opsize == TCPOLEN_SACK_PERM) - ctx->option_sack = true; - break; - } - - ctx->off = off + *opsize; - - return 0; + __u8 *opcode, *opsize, *wscale, *tsecr; + __u32 off = ctx->off; + + opcode = next(ctx, 1); + if (!opcode) + return 1; + + if (*opcode == TCPOPT_EOL) + return 1; + if (*opcode == TCPOPT_NOP) + return 0; + + opsize = next(ctx, 1); + if (!opsize || *opsize < 2) + return 1; + + switch (*opcode) { + case TCPOPT_WINDOW: + wscale = next(ctx, 1); + if (!wscale) + return 1; + if (*opsize == TCPOLEN_WINDOW) + ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; + break; + case TCPOPT_TIMESTAMP: + tsecr = next(ctx, 4); + if (!tsecr) + return 1; + if (*opsize == TCPOLEN_TIMESTAMP) { + ctx->option_timestamp = true; + /* Client's tsval becomes our tsecr. */ + *ctx->tsecr = get_unaligned((__be32 *)tsecr); + } + break; + case TCPOPT_SACK_PERM: + if (*opsize == TCPOLEN_SACK_PERM) + ctx->option_sack = true; + break; + } + + ctx->off = off + *opsize; + + return 0; } static int tscookie_tcpopt_parse_batch(__u32 index, void *context) { - int i; + int i; - for (i = 0; i < 7; i++) - if (tscookie_tcpopt_parse(context)) - return 1; - return 0; + for (i = 0; i < 7; i++) + if (tscookie_tcpopt_parse(context)) + return 1; + return 0; } -static __always_inline bool tscookie_init(struct tcphdr *tcp_header, - __u16 tcp_len, __be32 *tsval, - __be32 *tsecr, void *data, void *data_end) +static __always_inline bool tscookie_init(struct tcphdr *tcp_header, __u16 tcp_len, __be32 *tsval, + __be32 *tsecr, void *data, void *data_end) { - struct tcpopt_context loop_ctx = { - .data = data, - .data_end = data_end, - .tsecr = tsecr, - .wscale = TS_OPT_WSCALE_MASK, - .option_timestamp = false, - .option_sack = false, - /* Note: currently verifier would track .off as unbound scalar. - * In case if verifier would at some point get smarter and - * compute bounded value for this var, beware that it might - * hinder bpf_loop() convergence validation. - */ - .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, - }; - u32 cookie; - - bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0); - - if (!loop_ctx.option_timestamp) - return false; - - cookie = tcp_clock_ms() & ~TSMASK; - cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; - if (loop_ctx.option_sack) - cookie |= TS_OPT_SACK; - if (tcp_header->ece && tcp_header->cwr) - cookie |= TS_OPT_ECN; - *tsval = bpf_htonl(cookie); - - return true; + struct tcpopt_context loop_ctx = { + .data = data, + .data_end = data_end, + .tsecr = tsecr, + .wscale = TS_OPT_WSCALE_MASK, + .option_timestamp = false, + .option_sack = false, + /* Note: currently verifier would track .off as unbound scalar. + * In case if verifier would at some point get smarter and + * compute bounded value for this var, beware that it might + * hinder bpf_loop() convergence validation. + */ + .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, + }; + u32 cookie; + + bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0); + + if (!loop_ctx.option_timestamp) + return false; + + cookie = tcp_clock_ms() & ~TSMASK; + cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; + if (loop_ctx.option_sack) + cookie |= TS_OPT_SACK; + if (tcp_header->ece && tcp_header->cwr) + cookie |= TS_OPT_ECN; + *tsval = bpf_htonl(cookie); + + return true; } -static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale, - __u8 *ttl, bool ipv6) +static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale, __u8 *ttl, bool ipv6) { - __u32 key = 0; - __u64 *value; - - value = bpf_map_lookup_elem(&values, &key); - if (value && *value != 0) { - if (ipv6) - *mss = (*value >> 32) & 0xffff; - else - *mss = *value & 0xffff; - *wscale = (*value >> 16) & 0xf; - *ttl = (*value >> 24) & 0xff; - return; - } - - *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4; - *wscale = DEFAULT_WSCALE; - *ttl = DEFAULT_TTL; + __u32 key = 0; + __u64 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value && *value != 0) { + if (ipv6) + *mss = (*value >> 32) & 0xffff; + else + *mss = *value & 0xffff; + *wscale = (*value >> 16) & 0xf; + *ttl = (*value >> 24) & 0xff; + return; + } + + *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4; + *wscale = DEFAULT_WSCALE; + *ttl = DEFAULT_TTL; } static __always_inline void values_inc_synacks(void) { - __u32 key = 1; - __u64 *value; + __u32 key = 1; + __u64 *value; - value = bpf_map_lookup_elem(&values, &key); - if (value) - __sync_fetch_and_add(value, 1); + value = bpf_map_lookup_elem(&values, &key); + if (value) + __sync_fetch_and_add(value, 1); } static __always_inline bool check_port_allowed(__u16 port) { - __u32 i; + __u32 i; - for (i = 0; i < MAX_ALLOWED_PORTS; i++) { - __u32 key = i; - __u16 *value; + for (i = 0; i < MAX_ALLOWED_PORTS; i++) { + __u32 key = i; + __u16 *value; - value = bpf_map_lookup_elem(&allowed_ports, &key); + value = bpf_map_lookup_elem(&allowed_ports, &key); - if (!value) - break; - /* 0 is a terminator value. Check it first to avoid matching on - * a forbidden port == 0 and returning true. - */ - if (*value == 0) - break; + if (!value) + break; + /* 0 is a terminator value. Check it first to avoid matching on + * a forbidden port == 0 and returning true. + */ + if (*value == 0) + break; - if (*value == port) - return true; - } + if (*value == port) + return true; + } - return false; + return false; } struct header_pointers { - struct ethhdr *eth; - struct iphdr *ipv4; - struct ipv6hdr *ipv6; - struct tcphdr *tcp; - __u16 tcp_len; + struct ethhdr *eth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + __u16 tcp_len; }; -static __always_inline int tcp_dissect(void *data, void *data_end, - struct header_pointers *hdr) +static __always_inline int tcp_dissect(void *data, void *data_end, struct header_pointers *hdr) { - hdr->eth = data; - if (hdr->eth + 1 > data_end) - return XDP_DROP; - - switch (bpf_ntohs(hdr->eth->h_proto)) { - case ETH_P_IP: - hdr->ipv6 = NULL; - - hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); - if (hdr->ipv4 + 1 > data_end) - return XDP_DROP; - if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4)) - return XDP_DROP; - if (hdr->ipv4->version != 4) - return XDP_DROP; - - if (hdr->ipv4->protocol != IPPROTO_TCP) - return XDP_PASS; - - hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; - break; - case ETH_P_IPV6: - hdr->ipv4 = NULL; - - hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); - if (hdr->ipv6 + 1 > data_end) - return XDP_DROP; - if (hdr->ipv6->version != 6) - return XDP_DROP; - - /* XXX: Extension headers are not supported and could circumvent - * XDP SYN flood protection. - */ - if (hdr->ipv6->nexthdr != NEXTHDR_TCP) - return XDP_PASS; - - hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); - break; - default: - /* XXX: VLANs will circumvent XDP SYN flood protection. */ - return XDP_PASS; - } - - if (hdr->tcp + 1 > data_end) - return XDP_DROP; - hdr->tcp_len = hdr->tcp->doff * 4; - if (hdr->tcp_len < sizeof(*hdr->tcp)) - return XDP_DROP; - - return XDP_TX; + hdr->eth = data; + if (hdr->eth + 1 > data_end) + return XDP_DROP; + + switch (bpf_ntohs(hdr->eth->h_proto)) { + case ETH_P_IP: + hdr->ipv6 = NULL; + + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv4 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4)) + return XDP_DROP; + if (hdr->ipv4->version != 4) + return XDP_DROP; + + if (hdr->ipv4->protocol != IPPROTO_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + break; + case ETH_P_IPV6: + hdr->ipv4 = NULL; + + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv6 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv6->version != 6) + return XDP_DROP; + + /* XXX: Extension headers are not supported and could circumvent + * XDP SYN flood protection. + */ + if (hdr->ipv6->nexthdr != NEXTHDR_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + break; + default: + /* XXX: VLANs will circumvent XDP SYN flood protection. */ + return XDP_PASS; + } + + if (hdr->tcp + 1 > data_end) + return XDP_DROP; + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_DROP; + + return XDP_TX; } static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp) { - struct bpf_ct_opts___local ct_lookup_opts = { - .netns_id = BPF_F_CURRENT_NETNS, - .l4proto = IPPROTO_TCP, - }; - struct bpf_sock_tuple tup = {}; - struct nf_conn *ct; - __u32 tup_size; - - if (hdr->ipv4) { - /* TCP doesn't normally use fragments, and XDP can't reassemble - * them. - */ - if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF)) - return XDP_DROP; - - tup.ipv4.saddr = hdr->ipv4->saddr; - tup.ipv4.daddr = hdr->ipv4->daddr; - tup.ipv4.sport = hdr->tcp->source; - tup.ipv4.dport = hdr->tcp->dest; - tup_size = sizeof(tup.ipv4); - } else if (hdr->ipv6) { - __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr)); - __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr)); - tup.ipv6.sport = hdr->tcp->source; - tup.ipv6.dport = hdr->tcp->dest; - tup_size = sizeof(tup.ipv6); - } else { - /* The verifier can't track that either ipv4 or ipv6 is not - * NULL. - */ - return XDP_ABORTED; - } - - ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); - if (ct) { - unsigned long status = ct->status; - - bpf_ct_release(ct); - if (status & IPS_CONFIRMED) - return XDP_PASS; - } else if (ct_lookup_opts.error != -ENOENT) { - return XDP_ABORTED; - } - - /* error == -ENOENT || !(status & IPS_CONFIRMED) */ - return XDP_TX; + struct bpf_ct_opts___local ct_lookup_opts = { + .netns_id = BPF_F_CURRENT_NETNS, + .l4proto = IPPROTO_TCP, + }; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + __u32 tup_size; + + if (hdr->ipv4) { + /* TCP doesn't normally use fragments, and XDP can't reassemble + * them. + */ + if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF)) + return XDP_DROP; + + tup.ipv4.saddr = hdr->ipv4->saddr; + tup.ipv4.daddr = hdr->ipv4->daddr; + tup.ipv4.sport = hdr->tcp->source; + tup.ipv4.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv4); + } else if (hdr->ipv6) { + __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr)); + __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr)); + tup.ipv6.sport = hdr->tcp->source; + tup.ipv6.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv6); + } else { + /* The verifier can't track that either ipv4 or ipv6 is not + * NULL. + */ + return XDP_ABORTED; + } + + ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + if (ct) { + unsigned long status = ct->status; + + bpf_ct_release(ct); + if (status & IPS_CONFIRMED) + return XDP_PASS; + } else if (ct_lookup_opts.error != -ENOENT) { + return XDP_ABORTED; + } + + /* error == -ENOENT || !(status & IPS_CONFIRMED) */ + return XDP_TX; } -static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss, - __u8 wscale) +static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss, __u8 wscale) { - __be32 *start = buf; - - *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); - - if (!tsopt) - return buf - start; - - if (tsopt[0] & bpf_htonl(1 << 4)) - *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) | - (TCPOLEN_SACK_PERM << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - else - *buf++ = bpf_htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - *buf++ = tsopt[0]; - *buf++ = tsopt[1]; - - if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf)) - *buf++ = bpf_htonl((TCPOPT_NOP << 24) | - (TCPOPT_WINDOW << 16) | - (TCPOLEN_WINDOW << 8) | - wscale); - - return buf - start; + __be32 *start = buf; + + *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); + + if (!tsopt) + return buf - start; + + if (tsopt[0] & bpf_htonl(1 << 4)) + *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + else + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *buf++ = tsopt[0]; + *buf++ = tsopt[1]; + + if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf)) + *buf++ = bpf_htonl( + (TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | wscale); + + return buf - start; } -static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header, - __u32 cookie, __be32 *tsopt, - __u16 mss, __u8 wscale) +static __always_inline void tcp_gen_synack( + struct tcphdr *tcp_header, __u32 cookie, __be32 *tsopt, __u16 mss, __u8 wscale) { - void *tcp_options; - - tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK; - if (tsopt && (tsopt[0] & bpf_htonl(1 << 5))) - tcp_flag_word(tcp_header) |= TCP_FLAG_ECE; - tcp_header->doff = 5; /* doff is part of tcp_flag_word. */ - swap(tcp_header->source, tcp_header->dest); - tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1); - tcp_header->seq = bpf_htonl(cookie); - tcp_header->window = 0; - tcp_header->urg_ptr = 0; - tcp_header->check = 0; /* Calculate checksum later. */ - - tcp_options = (void *)(tcp_header + 1); - tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale); + void *tcp_options; + + tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (tsopt && (tsopt[0] & bpf_htonl(1 << 5))) + tcp_flag_word(tcp_header) |= TCP_FLAG_ECE; + tcp_header->doff = 5; /* doff is part of tcp_flag_word. */ + swap(tcp_header->source, tcp_header->dest); + tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1); + tcp_header->seq = bpf_htonl(cookie); + tcp_header->window = 0; + tcp_header->urg_ptr = 0; + tcp_header->check = 0; /* Calculate checksum later. */ + + tcp_options = (void *)(tcp_header + 1); + tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale); } -static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr, - __u32 cookie, __be32 *tsopt) +static __always_inline void tcpv4_gen_synack( + struct header_pointers *hdr, __u32 cookie, __be32 *tsopt) { - __u8 wscale; - __u16 mss; - __u8 ttl; + __u8 wscale; + __u16 mss; + __u8 ttl; - values_get_tcpipopts(&mss, &wscale, &ttl, false); + values_get_tcpipopts(&mss, &wscale, &ttl, false); - swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); - swap(hdr->ipv4->saddr, hdr->ipv4->daddr); - hdr->ipv4->check = 0; /* Calculate checksum later. */ - hdr->ipv4->tos = 0; - hdr->ipv4->id = 0; - hdr->ipv4->ttl = ttl; + swap(hdr->ipv4->saddr, hdr->ipv4->daddr); + hdr->ipv4->check = 0; /* Calculate checksum later. */ + hdr->ipv4->tos = 0; + hdr->ipv4->id = 0; + hdr->ipv4->ttl = ttl; - tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); - hdr->tcp_len = hdr->tcp->doff * 4; - hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len); + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len); } -static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr, - __u32 cookie, __be32 *tsopt) +static __always_inline void tcpv6_gen_synack( + struct header_pointers *hdr, __u32 cookie, __be32 *tsopt) { - __u8 wscale; - __u16 mss; - __u8 ttl; + __u8 wscale; + __u16 mss; + __u8 ttl; - values_get_tcpipopts(&mss, &wscale, &ttl, true); + values_get_tcpipopts(&mss, &wscale, &ttl, true); - swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); - swap(hdr->ipv6->saddr, hdr->ipv6->daddr); - *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000); - hdr->ipv6->hop_limit = ttl; + swap(hdr->ipv6->saddr, hdr->ipv6->daddr); + *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000); + hdr->ipv6->hop_limit = ttl; - tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); - hdr->tcp_len = hdr->tcp->doff * 4; - hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len); + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len); } -static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, - void *ctx, - void *data, void *data_end) +static __always_inline int syncookie_handle_syn( + struct header_pointers *hdr, void *ctx, void *data, void *data_end) { - __u32 old_pkt_size, new_pkt_size; - /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the - * BPF verifier if tsopt is not volatile. Volatile forces it to store - * the pointer value and use it directly, otherwise tcp_mkoptions is - * (mis)compiled like this: - * if (!tsopt) - * return buf - start; - * reg = stored_return_value_of_tscookie_init; - * if (reg) - * tsopt = tsopt_buf; - * else - * tsopt = NULL; - * ... - * *buf++ = tsopt[1]; - * It creates a dead branch where tsopt is assigned NULL, but the - * verifier can't prove it's dead and blocks the program. - */ - __be32 * volatile tsopt = NULL; - __be32 tsopt_buf[2] = {}; - __u16 ip_len; - __u32 cookie; - __s64 value; - - /* Checksum is not yet verified, but both checksum failure and TCP - * header checks return XDP_DROP, so the order doesn't matter. - */ - if (hdr->tcp->fin || hdr->tcp->rst) - return XDP_DROP; - - if (hdr->ipv4) { - /* Check the IPv4 and TCP checksums before creating a SYNACK. */ - value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0); - if (value < 0) - return XDP_ABORTED; - if (csum_fold(value) != 0) - return XDP_DROP; /* Bad IPv4 checksum. */ - - value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); - if (value < 0) - return XDP_ABORTED; - if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr, - hdr->tcp_len, IPPROTO_TCP, value) != 0) - return XDP_DROP; /* Bad TCP checksum. */ - - ip_len = sizeof(*hdr->ipv4); - - value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp, - hdr->tcp_len); - } else if (hdr->ipv6) { - /* Check the TCP checksum before creating a SYNACK. */ - value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); - if (value < 0) - return XDP_ABORTED; - if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr, - hdr->tcp_len, IPPROTO_TCP, value) != 0) - return XDP_DROP; /* Bad TCP checksum. */ - - ip_len = sizeof(*hdr->ipv6); - - value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp, - hdr->tcp_len); - } else { - return XDP_ABORTED; - } - - if (value < 0) - return XDP_ABORTED; - cookie = (__u32)value; - - if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, - &tsopt_buf[0], &tsopt_buf[1], data, data_end)) - tsopt = tsopt_buf; - - /* Check that there is enough space for a SYNACK. It also covers - * the check that the destination of the __builtin_memmove below - * doesn't overflow. - */ - if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end) - return XDP_ABORTED; - - if (hdr->ipv4) { - if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) { - struct tcphdr *new_tcp_header; - - new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4); - __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp)); - hdr->tcp = new_tcp_header; - - hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4; - } - - tcpv4_gen_synack(hdr, cookie, tsopt); - } else if (hdr->ipv6) { - tcpv6_gen_synack(hdr, cookie, tsopt); - } else { - return XDP_ABORTED; - } - - /* Recalculate checksums. */ - hdr->tcp->check = 0; - value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); - if (value < 0) - return XDP_ABORTED; - if (hdr->ipv4) { - hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr, - hdr->ipv4->daddr, - hdr->tcp_len, - IPPROTO_TCP, - value); - - hdr->ipv4->check = 0; - value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0); - if (value < 0) - return XDP_ABORTED; - hdr->ipv4->check = csum_fold(value); - } else if (hdr->ipv6) { - hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr, - &hdr->ipv6->daddr, - hdr->tcp_len, - IPPROTO_TCP, - value); - } else { - return XDP_ABORTED; - } - - /* Set the new packet size. */ - old_pkt_size = data_end - data; - new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4; - if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size)) - return XDP_ABORTED; - - values_inc_synacks(); - - return XDP_TX; + __u32 old_pkt_size, new_pkt_size; + /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the + * BPF verifier if tsopt is not volatile. Volatile forces it to store + * the pointer value and use it directly, otherwise tcp_mkoptions is + * (mis)compiled like this: + * if (!tsopt) + * return buf - start; + * reg = stored_return_value_of_tscookie_init; + * if (reg) + * tsopt = tsopt_buf; + * else + * tsopt = NULL; + * ... + * *buf++ = tsopt[1]; + * It creates a dead branch where tsopt is assigned NULL, but the + * verifier can't prove it's dead and blocks the program. + */ + __be32 *volatile tsopt = NULL; + __be32 tsopt_buf[2] = {}; + __u16 ip_len; + __u32 cookie; + __s64 value; + + /* Checksum is not yet verified, but both checksum failure and TCP + * header checks return XDP_DROP, so the order doesn't matter. + */ + if (hdr->tcp->fin || hdr->tcp->rst) + return XDP_DROP; + + if (hdr->ipv4) { + /* Check the IPv4 and TCP checksums before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_fold(value) != 0) + return XDP_DROP; /* Bad IPv4 checksum. */ + + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_tcpudp_magic( + hdr->ipv4->saddr, hdr->ipv4->daddr, hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv4); + + value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp, hdr->tcp_len); + } else if (hdr->ipv6) { + /* Check the TCP checksum before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_ipv6_magic( + &hdr->ipv6->saddr, &hdr->ipv6->daddr, hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv6); + + value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp, hdr->tcp_len); + } else { + return XDP_ABORTED; + } + + if (value < 0) + return XDP_ABORTED; + cookie = (__u32)value; + + if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, &tsopt_buf[0], &tsopt_buf[1], data, data_end)) + tsopt = tsopt_buf; + + /* Check that there is enough space for a SYNACK. It also covers + * the check that the destination of the __builtin_memmove below + * doesn't overflow. + */ + if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + if (hdr->ipv4) { + if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) { + struct tcphdr *new_tcp_header; + + new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4); + __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp)); + hdr->tcp = new_tcp_header; + + hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4; + } + + tcpv4_gen_synack(hdr, cookie, tsopt); + } else if (hdr->ipv6) { + tcpv6_gen_synack(hdr, cookie, tsopt); + } else { + return XDP_ABORTED; + } + + /* Recalculate checksums. */ + hdr->tcp->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (hdr->ipv4) { + hdr->tcp->check = csum_tcpudp_magic( + hdr->ipv4->saddr, hdr->ipv4->daddr, hdr->tcp_len, IPPROTO_TCP, value); + + hdr->ipv4->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0); + if (value < 0) + return XDP_ABORTED; + hdr->ipv4->check = csum_fold(value); + } else if (hdr->ipv6) { + hdr->tcp->check = csum_ipv6_magic( + &hdr->ipv6->saddr, &hdr->ipv6->daddr, hdr->tcp_len, IPPROTO_TCP, value); + } else { + return XDP_ABORTED; + } + + /* Set the new packet size. */ + old_pkt_size = data_end - data; + new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4; + if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size)) + return XDP_ABORTED; + + values_inc_synacks(); + + return XDP_TX; } static __always_inline int syncookie_handle_ack(struct header_pointers *hdr) { - int err; + int err; - if (hdr->tcp->rst) - return XDP_DROP; + if (hdr->tcp->rst) + return XDP_DROP; - if (hdr->ipv4) - err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp); - else if (hdr->ipv6) - err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp); - else - return XDP_ABORTED; - if (err) - return XDP_DROP; + if (hdr->ipv4) + err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp); + else if (hdr->ipv6) + err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp); + else + return XDP_ABORTED; + if (err) + return XDP_DROP; - return XDP_PASS; + return XDP_PASS; } -static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end, - struct header_pointers *hdr, bool xdp) +static __always_inline int syncookie_part1( + void *ctx, void *data, void *data_end, struct header_pointers *hdr, bool xdp) { - int ret; + int ret; - ret = tcp_dissect(data, data_end, hdr); - if (ret != XDP_TX) - return ret; + ret = tcp_dissect(data, data_end, hdr); + if (ret != XDP_TX) + return ret; - ret = tcp_lookup(ctx, hdr, xdp); - if (ret != XDP_TX) - return ret; + ret = tcp_lookup(ctx, hdr, xdp); + if (ret != XDP_TX) + return ret; - /* Pass to upper stack if port requires no syncookie handling */ - if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest))) - return XDP_PASS; + /* Pass to upper stack if port requires no syncookie handling */ + if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest))) + return XDP_PASS; - /* Packet is TCP and doesn't belong to an established connection. */ + /* Packet is TCP and doesn't belong to an established connection. */ - if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1) - return XDP_DROP; + if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1) + return XDP_DROP; - /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len - * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier. - */ - if (xdp) { - if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len)) - return XDP_ABORTED; - } + /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len + * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier. + */ + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len)) + return XDP_ABORTED; + } - return XDP_TX; + return XDP_TX; } -static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end, - struct header_pointers *hdr) +static __always_inline int syncookie_part2( + void *ctx, void *data, void *data_end, struct header_pointers *hdr) { - if (hdr->ipv4) { - hdr->eth = data; - hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); - /* IPV4_MAXLEN is needed when calculating checksum. - * At least sizeof(struct iphdr) is needed here to access ihl. - */ - if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end) - return XDP_ABORTED; - hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; - } else if (hdr->ipv6) { - hdr->eth = data; - hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); - hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); - } else { - return XDP_ABORTED; - } - - if ((void *)hdr->tcp + TCP_MAXLEN > data_end) - return XDP_ABORTED; - - /* We run out of registers, tcp_len gets spilled to the stack, and the - * verifier forgets its min and max values checked above in tcp_dissect. - */ - hdr->tcp_len = hdr->tcp->doff * 4; - if (hdr->tcp_len < sizeof(*hdr->tcp)) - return XDP_ABORTED; - - return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end) : - syncookie_handle_ack(hdr); + if (hdr->ipv4) { + hdr->eth = data; + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + /* IPV4_MAXLEN is needed when calculating checksum. + * At least sizeof(struct iphdr) is needed here to access ihl. + */ + if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end) + return XDP_ABORTED; + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + } else if (hdr->ipv6) { + hdr->eth = data; + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + } else { + return XDP_ABORTED; + } + + if ((void *)hdr->tcp + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + /* We run out of registers, tcp_len gets spilled to the stack, and the + * verifier forgets its min and max values checked above in tcp_dissect. + */ + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_ABORTED; + + return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end) + : syncookie_handle_ack(hdr); } SEC("xdp") int syncookie_xdp(struct xdp_md *ctx) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct header_pointers hdr; - int ret; + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct header_pointers hdr; + int ret; - ret = syncookie_part1(ctx, data, data_end, &hdr, true); - if (ret != XDP_TX) - return ret; + ret = syncookie_part1(ctx, data, data_end, &hdr, true); + if (ret != XDP_TX) + return ret; - data_end = (void *)(long)ctx->data_end; - data = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; - return syncookie_part2(ctx, data, data_end, &hdr); + return syncookie_part2(ctx, data, data_end, &hdr); } char _license[] SEC("license") = "GPL"; diff --git a/src/detect-byte-extract.c b/src/detect-byte-extract.c index 117cce597534..5d788bb79b7f 100644 --- a/src/detect-byte-extract.c +++ b/src/detect-byte-extract.c @@ -629,7 +629,7 @@ static int DetectByteExtractSetup(DetectEngineCtx *de_ctx, Signature *s, const c error: DetectByteExtractFree(de_ctx, data); return ret; -} + } /** * \brief Used to free instances of DetectByteExtractData. diff --git a/src/detect-bytejump.c b/src/detect-bytejump.c index e04d8a7a94fb..9ad2d0c4fade 100644 --- a/src/detect-bytejump.c +++ b/src/detect-bytejump.c @@ -599,7 +599,7 @@ static int DetectBytejumpSetup(DetectEngineCtx *de_ctx, Signature *s, const char } DetectBytejumpFree(de_ctx, data); return ret; -} + } /** * \brief this function will free memory associated with DetectBytejumpData diff --git a/src/detect-bytemath.c b/src/detect-bytemath.c index 49c2989f7dc9..b2529750a1f9 100644 --- a/src/detect-bytemath.c +++ b/src/detect-bytemath.c @@ -418,7 +418,7 @@ static int DetectByteMathSetup(DetectEngineCtx *de_ctx, Signature *s, const char SCFree(nbytes); DetectByteMathFree(de_ctx, data); return ret; -} + } /** * \brief Used to free instances of DetectByteMathractData. diff --git a/src/detect-bytetest.c b/src/detect-bytetest.c index e637c5999ce6..7d3624e92f27 100644 --- a/src/detect-bytetest.c +++ b/src/detect-bytetest.c @@ -714,7 +714,7 @@ static int DetectBytetestSetup(DetectEngineCtx *de_ctx, Signature *s, const char SCFree(nbytes); DetectBytetestFree(de_ctx, data); return ret; -} + } /** * \brief this function will free memory associated with DetectBytetestData diff --git a/src/detect-engine-build.c b/src/detect-engine-build.c index 604164b21c15..1eb7c1830238 100644 --- a/src/detect-engine-build.c +++ b/src/detect-engine-build.c @@ -1624,7 +1624,7 @@ int CreateGroupedPortList(DetectEngineCtx *de_ctx, DetectPort *port_list, Detect /* when a group's sigs are added to the joingr, we can free it */ gr->next = NULL; DetectPortFree(de_ctx, gr); - /* append */ + /* append */ } else { gr->next = NULL; diff --git a/src/detect-ipproto.c b/src/detect-ipproto.c index e5a0c7969b2f..40788d64446d 100644 --- a/src/detect-ipproto.c +++ b/src/detect-ipproto.c @@ -425,7 +425,7 @@ static int DetectIPProtoSetup(DetectEngineCtx *de_ctx, Signature *s, const char DetectIPProtoFree(de_ctx, data); return -1; -} + } void DetectIPProtoRemoveAllSMs(DetectEngineCtx *de_ctx, Signature *s) { diff --git a/src/runmode-af-packet.c b/src/runmode-af-packet.c index cf5839eb4cac..9cdd55326620 100644 --- a/src/runmode-af-packet.c +++ b/src/runmode-af-packet.c @@ -547,7 +547,7 @@ static void *ParseAFPConfig(const char *iface) #else SCLogWarning("%s: XDP filter set but XDP support is not built-in", iface); #endif - } + } if (ConfGetChildValueWithDefault(if_root, if_default, "xdp-syncookie-file", &ebpf_file) != 1) { aconf->xdp_syncookie_file = NULL; @@ -632,8 +632,7 @@ static void *ParseAFPConfig(const char *iface) if (aconf->xdp_syncookie_file) { #ifdef HAVE_PACKET_XDP int ret = EBPFLoadFile(aconf->iface, aconf->xdp_syncookie_file, "xdp", - &aconf->xdp_syncookie_fd, - &aconf->ebpf_t_config); + &aconf->xdp_syncookie_fd, &aconf->ebpf_t_config); switch (ret) { case 1: SCLogInfo("%s: loaded pinned maps from sysfs", iface);