diff --git a/bpf/headers/bpf/bpf_endian.h b/bpf/headers/bpf/bpf_endian.h new file mode 100644 index 00000000..32623544 --- /dev/null +++ b/bpf/headers/bpf/bpf_endian.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_ENDIAN__ +#define __BPF_ENDIAN__ + +/* + * Isolate byte #n and put it into byte #m, for __u##b type. + * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64: + * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx + * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000 + * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn + * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000 + */ +#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8)) + +#define ___bpf_swab16(x) ((__u16)( \ + ___bpf_mvb(x, 16, 0, 1) | \ + ___bpf_mvb(x, 16, 1, 0))) + +#define ___bpf_swab32(x) ((__u32)( \ + ___bpf_mvb(x, 32, 0, 3) | \ + ___bpf_mvb(x, 32, 1, 2) | \ + ___bpf_mvb(x, 32, 2, 1) | \ + ___bpf_mvb(x, 32, 3, 0))) + +#define ___bpf_swab64(x) ((__u64)( \ + ___bpf_mvb(x, 64, 0, 7) | \ + ___bpf_mvb(x, 64, 1, 6) | \ + ___bpf_mvb(x, 64, 2, 5) | \ + ___bpf_mvb(x, 64, 3, 4) | \ + ___bpf_mvb(x, 64, 4, 3) | \ + ___bpf_mvb(x, 64, 5, 2) | \ + ___bpf_mvb(x, 64, 6, 1) | \ + ___bpf_mvb(x, 64, 7, 0))) + +/* LLVM's BPF target selects the endianness of the CPU + * it compiles on, or the user specifies (bpfel/bpfeb), + * respectively. The used __BYTE_ORDER__ is defined by + * the compiler, we cannot rely on __BYTE_ORDER from + * libc headers, since it doesn't reflect the actual + * requested byte order. + * + * Note, LLVM's BPF target has different __builtin_bswapX() + * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE + * in bpfel and bpfeb case, which means below, that we map + * to cpu_to_be16(). We could use it unconditionally in BPF + * case, but better not rely on it, so that this header here + * can be used from application and BPF program side, which + * use different targets. + */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define __bpf_ntohs(x) __builtin_bswap16(x) +# define __bpf_htons(x) __builtin_bswap16(x) +# define __bpf_constant_ntohs(x) ___bpf_swab16(x) +# define __bpf_constant_htons(x) ___bpf_swab16(x) +# define __bpf_ntohl(x) __builtin_bswap32(x) +# define __bpf_htonl(x) __builtin_bswap32(x) +# define __bpf_constant_ntohl(x) ___bpf_swab32(x) +# define __bpf_constant_htonl(x) ___bpf_swab32(x) +# define __bpf_be64_to_cpu(x) __builtin_bswap64(x) +# define __bpf_cpu_to_be64(x) __builtin_bswap64(x) +# define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x) +# define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define __bpf_ntohs(x) (x) +# define __bpf_htons(x) (x) +# define __bpf_constant_ntohs(x) (x) +# define __bpf_constant_htons(x) (x) +# define __bpf_ntohl(x) (x) +# define __bpf_htonl(x) (x) +# define __bpf_constant_ntohl(x) (x) +# define __bpf_constant_htonl(x) (x) +# define __bpf_be64_to_cpu(x) (x) +# define __bpf_cpu_to_be64(x) (x) +# define __bpf_constant_be64_to_cpu(x) (x) +# define __bpf_constant_cpu_to_be64(x) (x) +#else +# error "Fix your compiler's __BYTE_ORDER__?!" +#endif + +#define bpf_htons(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_htons(x) : __bpf_htons(x)) +#define bpf_ntohs(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_ntohs(x) : __bpf_ntohs(x)) +#define bpf_htonl(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_htonl(x) : __bpf_htonl(x)) +#define bpf_ntohl(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_ntohl(x) : __bpf_ntohl(x)) +#define bpf_cpu_to_be64(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) +#define bpf_be64_to_cpu(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) + +#endif /* __BPF_ENDIAN__ */ \ No newline at end of file diff --git a/bpf/kprobe_pwru.c b/bpf/kprobe_pwru.c index 43896598..21bd9fba 100644 --- a/bpf/kprobe_pwru.c +++ b/bpf/kprobe_pwru.c @@ -6,6 +6,7 @@ #include "bpf/bpf_helpers.h" #include "bpf/bpf_core_read.h" #include "bpf/bpf_tracing.h" +#include "bpf/bpf_endian.h" #include "bpf/bpf_ipv6.h" #define PRINT_SKB_STR_SIZE 2048 @@ -13,6 +14,7 @@ #define ETH_P_IP 0x800 #define ETH_P_IPV6 0x86dd +#define ETH_P_8021Q 0x8100 const static bool TRUE = true; @@ -55,6 +57,12 @@ struct tuple { u64 print_skb_id = 0; u64 print_shinfo_id = 0; +enum event_type { + EVENT_TYPE_KPROBE = 0, + EVENT_TYPE_TC = 1, + EVENT_TYPE_XDP = 2, +}; + struct event_t { u32 pid; u32 type; @@ -233,24 +241,19 @@ set_meta(struct sk_buff *skb, struct skb_meta *meta) { } static __always_inline void -set_tuple(struct sk_buff *skb, struct tuple *tpl) { - void *skb_head = BPF_CORE_READ(skb, head); - u16 l3_off = BPF_CORE_READ(skb, network_header); +__set_tuple(struct tuple *tpl, void *data, u16 l3_off, bool is_ipv4) { u16 l4_off; - struct iphdr *l3_hdr = (struct iphdr *) (skb_head + l3_off); - u8 ip_vsn = BPF_CORE_READ_BITFIELD_PROBED(l3_hdr, version); - - if (ip_vsn == 4) { - struct iphdr *ip4 = (struct iphdr *) l3_hdr; + if (is_ipv4) { + struct iphdr *ip4 = (struct iphdr *) (data + l3_off); BPF_CORE_READ_INTO(&tpl->saddr, ip4, saddr); BPF_CORE_READ_INTO(&tpl->daddr, ip4, daddr); tpl->l4_proto = BPF_CORE_READ(ip4, protocol); tpl->l3_proto = ETH_P_IP; l4_off = l3_off + BPF_CORE_READ_BITFIELD_PROBED(ip4, ihl) * 4; - } else if (ip_vsn == 6) { - struct ipv6hdr *ip6 = (struct ipv6hdr *) l3_hdr; + } else { + struct ipv6hdr *ip6 = (struct ipv6hdr *) (data + l3_off); BPF_CORE_READ_INTO(&tpl->saddr, ip6, saddr); BPF_CORE_READ_INTO(&tpl->daddr, ip6, daddr); tpl->l4_proto = BPF_CORE_READ(ip6, nexthdr); // TODO: ipv6 l4 protocol @@ -259,16 +262,32 @@ set_tuple(struct sk_buff *skb, struct tuple *tpl) { } if (tpl->l4_proto == IPPROTO_TCP) { - struct tcphdr *tcp = (struct tcphdr *) (skb_head + l4_off); + struct tcphdr *tcp = (struct tcphdr *) (data + l4_off); tpl->sport= BPF_CORE_READ(tcp, source); tpl->dport= BPF_CORE_READ(tcp, dest); } else if (tpl->l4_proto == IPPROTO_UDP) { - struct udphdr *udp = (struct udphdr *) (skb_head + l4_off); + struct udphdr *udp = (struct udphdr *) (data + l4_off); tpl->sport= BPF_CORE_READ(udp, source); tpl->dport= BPF_CORE_READ(udp, dest); } } +static __always_inline void +set_tuple(struct sk_buff *skb, struct tuple *tpl) { + void *skb_head = BPF_CORE_READ(skb, head); + u16 l3_off = BPF_CORE_READ(skb, network_header); + + struct iphdr *l3_hdr = (struct iphdr *) (skb_head + l3_off); + u8 ip_vsn = BPF_CORE_READ_BITFIELD_PROBED(l3_hdr, version); + + if (ip_vsn !=4 && ip_vsn != 6) + return; + + bool is_ipv4 = ip_vsn == 4; + __set_tuple(tpl, skb_head, l3_off, is_ipv4); +} + + static __always_inline void set_skb_btf(struct sk_buff *skb, typeof(print_skb_id) *event_id) { #ifdef OUTPUT_SKB @@ -382,7 +401,7 @@ set_output(void *ctx, struct sk_buff *skb, struct event_t *event) { static __noinline bool handle_everything(struct sk_buff *skb, void *ctx, struct event_t *event, u64 *_stackid) { u8 tracked_by; - u64 skb_addr = (u64) skb; + u64 skb_addr = (u64) BPF_CORE_READ(skb, head); u64 stackid; if (cfg->track_skb_by_stackid) @@ -437,7 +456,7 @@ kprobe_skb(struct sk_buff *skb, struct pt_regs *ctx, bool has_get_func_ip, u64 * if (!handle_everything(skb, ctx, &event, _stackid)) return BPF_OK; - event.skb_addr = (u64) skb; + event.skb_addr = (u64) BPF_CORE_READ(skb, head); event.addr = has_get_func_ip ? bpf_get_func_ip(ctx) : PT_REGS_IP(ctx); event.param_second = PT_REGS_PARM2(ctx); if (CFG.output_caller) @@ -486,35 +505,44 @@ int kprobe_skb_by_stackid(struct pt_regs *ctx) { SEC("kprobe/skb_lifetime_termination") int kprobe_skb_lifetime_termination(struct pt_regs *ctx) { - u64 skb = (u64) PT_REGS_PARM1(ctx); + struct sk_buff *skb = (typeof(skb)) PT_REGS_PARM1(ctx); + u64 skb_addr = (u64) BPF_CORE_READ(skb, head); - bpf_map_delete_elem(&skb_addresses, &skb); + bpf_map_delete_elem(&skb_addresses, &skb_addr); if (cfg->track_skb_by_stackid) { u64 stackid = get_stackid(ctx); bpf_map_delete_elem(&stackid_skb, &stackid); - bpf_map_delete_elem(&skb_stackid, &skb); + bpf_map_delete_elem(&skb_stackid, &skb_addr); } return BPF_OK; } static __always_inline int -track_skb_clone(u64 old, u64 new) { - if (bpf_map_lookup_elem(&skb_addresses, &old)) - bpf_map_update_elem(&skb_addresses, &new, &TRUE, BPF_ANY); +track_skb_clone(struct sk_buff *old, struct sk_buff *new) { + u64 skb_addr_old = (u64) BPF_CORE_READ(old, head); + u64 skb_addr_new = (u64) BPF_CORE_READ(new, head); + if (bpf_map_lookup_elem(&skb_addresses, &skb_addr_old)) + bpf_map_update_elem(&skb_addresses, &skb_addr_new, &TRUE, BPF_ANY); return BPF_OK; } SEC("fexit/skb_clone") -int BPF_PROG(fexit_skb_clone, u64 old, gfp_t mask, u64 new) { - return track_skb_clone(old, new); +int BPF_PROG(fexit_skb_clone, struct sk_buff *old, gfp_t mask, struct sk_buff *new) { + if (new) + return track_skb_clone(old, new); + + return BPF_OK; } SEC("fexit/skb_copy") -int BPF_PROG(fexit_skb_copy, u64 old, gfp_t mask, u64 new) { - return track_skb_clone(old, new); +int BPF_PROG(fexit_skb_copy, struct sk_buff *old, gfp_t mask, struct sk_buff *new) { + if (new) + return track_skb_clone(old, new); + + return BPF_OK; } SEC("fentry/tc") @@ -524,8 +552,117 @@ int BPF_PROG(fentry_tc, struct sk_buff *skb) { if (!handle_everything(skb, ctx, &event, NULL)) return BPF_OK; - event.skb_addr = (u64) skb; + event.skb_addr = (u64) BPF_CORE_READ(skb, head); + event.addr = BPF_PROG_ADDR; + event.type = EVENT_TYPE_TC; + bpf_map_push_elem(&events, &event, BPF_EXIST); + + return BPF_OK; +} + + +static __always_inline bool +filter_xdp_netns(struct xdp_buff *xdp) { + if (cfg->netns && BPF_CORE_READ(xdp, rxq, dev, nd_net.net, ns.inum) != cfg->netns) + return false; + + return true; +} + +static __always_inline bool +filter_xdp_ifindex(struct xdp_buff *xdp) { + if (cfg->ifindex && BPF_CORE_READ(xdp, rxq, dev, ifindex) != cfg->ifindex) + return false; + + return true; +} + +static __always_inline bool +filter_xdp_meta(struct xdp_buff *xdp) { + return filter_xdp_netns(xdp) && filter_xdp_ifindex(xdp); +} + +static __always_inline bool +filter_xdp_pcap(struct xdp_buff *xdp) { + void *data = (void *)(long) BPF_CORE_READ(xdp, data); + void *data_end = (void *)(long) BPF_CORE_READ(xdp, data_end); + return filter_pcap_ebpf_l2((void *)xdp, (void *)xdp, (void *)xdp, data, data_end); +} + +static __always_inline bool +filter_xdp(struct xdp_buff *xdp) { + return filter_xdp_pcap(xdp) && filter_xdp_meta(xdp); +} + +static __always_inline void +set_xdp_meta(struct xdp_buff *xdp, struct skb_meta *meta) { + struct net_device *dev = BPF_CORE_READ(xdp, rxq, dev); + meta->netns = BPF_CORE_READ(dev, nd_net.net, ns.inum); + meta->ifindex = BPF_CORE_READ(dev, ifindex); + meta->mtu = BPF_CORE_READ(dev, mtu); + meta->len = BPF_CORE_READ(xdp, data_end) - BPF_CORE_READ(xdp, data); +} + +static __always_inline void +set_xdp_tuple(struct xdp_buff *xdp, struct tuple *tpl) { + void *data = (void *)(long) BPF_CORE_READ(xdp, data); + void *data_end = (void *)(long) BPF_CORE_READ(xdp, data_end); + struct ethhdr *eth = (struct ethhdr *) data; + u16 l3_off = sizeof(*eth); + u16 l4_off; + + __be16 proto = BPF_CORE_READ(eth, h_proto); + if (proto == bpf_htons(ETH_P_8021Q)) { + struct vlan_hdr *vlan = (struct vlan_hdr *) (eth + 1); + proto = BPF_CORE_READ(vlan, h_vlan_encapsulated_proto); + l3_off += sizeof(*vlan); + } + if (proto != bpf_htons(ETH_P_IP) && proto != bpf_htons(ETH_P_IPV6)) + return; + + bool is_ipv4 = proto == bpf_htons(ETH_P_IP); + __set_tuple(tpl, data, l3_off, is_ipv4); +} + +static __always_inline void +set_xdp_output(void *ctx, struct xdp_buff *xdp, struct event_t *event) { + if (cfg->output_meta) + set_xdp_meta(xdp, &event->meta); + + if (cfg->output_tuple) + set_xdp_tuple(xdp, &event->tuple); + + if (cfg->output_stack) + event->print_stack_id = bpf_get_stackid(ctx, &print_stack_map, BPF_F_FAST_STACK_CMP); +} + +SEC("fentry/xdp") +int BPF_PROG(fentry_xdp, struct xdp_buff *xdp) { + u64 skb_addr = (u64) BPF_CORE_READ(xdp, data_hard_start); + struct event_t event = {}; + + if (cfg->is_set) { + if (cfg->track_skb) { + if (!bpf_map_lookup_elem(&skb_addresses, &skb_addr)) { + if (!filter_xdp(xdp)) + return BPF_OK; + + bpf_map_update_elem(&skb_addresses, &skb_addr, &TRUE, BPF_ANY); + } + + } else if (!filter_xdp(xdp)) { + return BPF_OK; + } + + set_xdp_output(ctx, xdp, &event); + } + + event.pid = bpf_get_current_pid_tgid() >> 32; + event.ts = bpf_ktime_get_ns(); + event.cpu_id = bpf_get_smp_processor_id(); + event.skb_addr = (u64) skb_addr; event.addr = BPF_PROG_ADDR; + event.type = EVENT_TYPE_XDP; bpf_map_push_elem(&events, &event, BPF_EXIST); return BPF_OK; diff --git a/internal/libpcap/inject.go b/internal/libpcap/inject.go index e8955b73..ee5a5cbe 100644 --- a/internal/libpcap/inject.go +++ b/internal/libpcap/inject.go @@ -8,6 +8,10 @@ import ( "github.com/cloudflare/cbpfc" ) +func InjectL2Filter(program *ebpf.ProgramSpec, filterExpr string) (err error) { + return injectFilter(program, filterExpr, false) +} + func InjectFilters(program *ebpf.ProgramSpec, filterExpr string) (err error) { if err = injectFilter(program, filterExpr, false); err != nil { return diff --git a/internal/pwru/bpf_prog.go b/internal/pwru/bpf_prog.go index 760d559d..25c3ee36 100644 --- a/internal/pwru/bpf_prog.go +++ b/internal/pwru/bpf_prog.go @@ -11,6 +11,8 @@ import ( "golang.org/x/sys/unix" ) +var errNotFound = errors.New("not found") + type BpfProgName2Addr map[string]uint64 func listBpfProgs(typ ebpf.ProgramType) ([]*ebpf.Program, error) { @@ -46,6 +48,14 @@ func getEntryFuncName(prog *ebpf.Program) (string, string, error) { return "", "", fmt.Errorf("failed to get program info: %w", err) } + _, ok := info.BTFID() + if !ok { + // FENTRY/FEXIT program can only be attached to another program + // annotated with BTF. So if the BTF ID is not found, it means + // the program is not annotated with BTF. + return "", "", errNotFound + } + insns, err := info.Instructions() if err != nil { return "", "", fmt.Errorf("failed to get program instructions: %w", err) @@ -58,5 +68,5 @@ func getEntryFuncName(prog *ebpf.Program) (string, string, error) { } } - return "", "", fmt.Errorf("no function found in %s bpf prog", info.Name) + return "", "", errNotFound } diff --git a/internal/pwru/kprobe.go b/internal/pwru/kprobe.go index f59cd45f..32cd6722 100644 --- a/internal/pwru/kprobe.go +++ b/internal/pwru/kprobe.go @@ -198,7 +198,7 @@ func AttachKprobeMulti(ctx context.Context, bar *pb.ProgressBar, kprobes []Kprob return } -func KprobeSkbFuncs(ctx context.Context, funcs Funcs, coll *ebpf.Collection, a2n Addr2Name, useKprobeMulti bool, batch uint) *kprober { +func NewKprober(ctx context.Context, funcs Funcs, coll *ebpf.Collection, a2n Addr2Name, useKprobeMulti bool, batch uint) *kprober { msg := "kprobe" if useKprobeMulti { msg = "kprobe-multi" @@ -244,7 +244,7 @@ func KprobeSkbFuncs(ctx context.Context, funcs Funcs, coll *ebpf.Collection, a2n return &k } -func KprobeNonSkbFuncs(nonSkbFuncs []string, funcs Funcs, coll *ebpf.Collection) *kprober { +func NewNonSkbFuncsKprober(nonSkbFuncs []string, funcs Funcs, coll *ebpf.Collection) *kprober { var k kprober k.kprobeBatch = uint(len(nonSkbFuncs)) diff --git a/internal/pwru/output.go b/internal/pwru/output.go index b65182f2..7b70b99b 100644 --- a/internal/pwru/output.go +++ b/internal/pwru/output.go @@ -31,6 +31,12 @@ import ( const absoluteTS string = "15:04:05.000" +const ( + eventTypeKprobe = 0 + eventTypeTracingTc = 1 + eventTypeTracingXdp = 2 +) + type output struct { flags *Flags lastSeenSkb map[uint64]uint64 // skb addr => last seen TS @@ -211,7 +217,6 @@ func (o *output) PrintJson(event *Event) { encoder.SetEscapeHTML(false) err := encoder.Encode(d) - if err != nil { log.Fatalf("Error encoding JSON: %s", err) } @@ -351,6 +356,15 @@ func getOutFuncName(o *output, event *Event, addr uint64) string { } } + if event.Type != eventTypeKprobe { + switch event.Type { + case eventTypeTracingTc: + outFuncName += "(tc)" + case eventTypeTracingXdp: + outFuncName += "(xdp)" + } + } + return outFuncName } diff --git a/internal/pwru/tc_tracer.go b/internal/pwru/tracing.go similarity index 55% rename from internal/pwru/tc_tracer.go rename to internal/pwru/tracing.go index 871a304d..7d9c16b8 100644 --- a/internal/pwru/tc_tracer.go +++ b/internal/pwru/tracing.go @@ -14,32 +14,62 @@ import ( "golang.org/x/sync/errgroup" ) -type tcTracer struct { +type tracing struct { sync.Mutex links []link.Link + progs []*ebpf.Program } -func (t *tcTracer) close() { +func (t *tracing) HaveTracing() bool { t.Lock() defer t.Unlock() + return len(t.links) > 0 +} + +func (t *tracing) Detach() { + t.Lock() + defer t.Unlock() + + t.detach() + + for _, p := range t.progs { + _ = p.Close() + } + t.progs = nil +} + +func (t *tracing) detach() { + var errg errgroup.Group + for _, l := range t.links { - _ = l.Close() + l := l + errg.Go(func() error { + _ = l.Close() + return nil + }) } + + _ = errg.Wait() } -func (t *tcTracer) addLink(l link.Link) { +func (t *tracing) addLink(l link.Link) { t.Lock() defer t.Unlock() t.links = append(t.links, l) } -func (t *tcTracer) trace(spec *ebpf.CollectionSpec, +func (t *tracing) traceProg(spec *ebpf.CollectionSpec, opts *ebpf.CollectionOptions, prog *ebpf.Program, n2a BpfProgName2Addr, + tracingName string, ) error { entryFn, name, err := getEntryFuncName(prog) if err != nil { + if errors.Is(err, errNotFound) { + log.Printf("Skip tracing bpf prog %s because cannot find its entry function name", prog) + return nil + } return fmt.Errorf("failed to get entry function name: %w", err) } @@ -54,7 +84,7 @@ func (t *tcTracer) trace(spec *ebpf.CollectionSpec, if !ok { addr, ok = n2a[name] if !ok { - return fmt.Errorf("failed to find address for function %s of bpf prog %s", name, prog) + return fmt.Errorf("failed to find address for function %s of bpf prog %v", name, prog) } } @@ -65,8 +95,8 @@ func (t *tcTracer) trace(spec *ebpf.CollectionSpec, return fmt.Errorf("failed to rewrite bpf prog addr: %w", err) } - spec.Programs["fentry_tc"].AttachTarget = prog - spec.Programs["fentry_tc"].AttachTo = entryFn + spec.Programs[tracingName].AttachTarget = prog + spec.Programs[tracingName].AttachTo = entryFn coll, err := ebpf.NewCollectionWithOptions(spec, *opts) if err != nil { var ( @@ -82,7 +112,7 @@ func (t *tcTracer) trace(spec *ebpf.CollectionSpec, defer coll.Close() tracing, err := link.AttachTracing(link.TracingOptions{ - Program: coll.Programs["fentry_tc"], + Program: coll.Programs[tracingName], }) if err != nil { return fmt.Errorf("failed to attach tracing: %w", err) @@ -93,12 +123,13 @@ func (t *tcTracer) trace(spec *ebpf.CollectionSpec, return nil } -func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, - opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, n2a BpfProgName2Addr, -) func() { - progs, err := listBpfProgs(ebpf.SchedCLS) +func (t *tracing) trace(coll *ebpf.Collection, spec *ebpf.CollectionSpec, + opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, + n2a BpfProgName2Addr, progType ebpf.ProgramType, tracingName string, +) error { + progs, err := listBpfProgs(progType) if err != nil { - log.Fatalf("Failed to list TC bpf progs: %v", err) + return fmt.Errorf("failed to list bpf progs: %w", err) } // Reusing maps from previous collection is to handle the events together @@ -115,27 +146,50 @@ func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, } opts.MapReplacements = replacedMaps - var tt tcTracer - tt.links = make([]link.Link, 0, len(progs)) + t.links = make([]link.Link, 0, len(progs)) + t.progs = progs var errg errgroup.Group for _, prog := range progs { prog := prog errg.Go(func() error { - return tt.trace(spec, opts, prog, n2a) + return t.traceProg(spec, opts, prog, n2a, tracingName) }) } if err := errg.Wait(); err != nil { - log.Fatalf("Failed to trace TC: %v", err) + t.Detach() + return fmt.Errorf("failed to trace bpf progs: %w", err) } - return func() { - tt.close() + return nil +} - for _, prog := range progs { - _ = prog.Close() - } +func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, + opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, + n2a BpfProgName2Addr, +) *tracing { + log.Printf("Attaching tc-bpf progs...\n") + + var t tracing + if err := t.trace(coll, spec, opts, outputSkb, outputShinfo, n2a, ebpf.SchedCLS, "fentry_tc"); err != nil { + log.Fatalf("failed to trace TC progs: %v", err) } + + return &t +} + +func TraceXDP(coll *ebpf.Collection, spec *ebpf.CollectionSpec, + opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, + n2a BpfProgName2Addr, +) *tracing { + log.Printf("Attaching xdp progs...\n") + + var t tracing + if err := t.trace(coll, spec, opts, outputSkb, outputShinfo, n2a, ebpf.XDP, "fentry_xdp"); err != nil { + log.Fatalf("failed to trace XDP progs: %v", err) + } + + return &t } diff --git a/internal/pwru/types.go b/internal/pwru/types.go index 1ce4a946..414ece52 100644 --- a/internal/pwru/types.go +++ b/internal/pwru/types.go @@ -33,6 +33,7 @@ type Flags struct { FilterTrackSkb bool FilterTrackSkbByStackid bool FilterTraceTc bool + FilterTraceXdp bool FilterIfname string FilterPcap string FilterKprobeBatch uint @@ -69,6 +70,7 @@ func (f *Flags) SetFlags() { flag.BoolVar(&f.FilterTrackSkb, "filter-track-skb", false, "trace a packet even if it does not match given filters (e.g., after NAT or tunnel decapsulation)") flag.BoolVar(&f.FilterTrackSkbByStackid, "filter-track-skb-by-stackid", false, "trace a packet even after it is kfreed (e.g., traffic going through bridge)") flag.BoolVar(&f.FilterTraceTc, "filter-trace-tc", false, "trace TC bpf progs") + flag.BoolVar(&f.FilterTraceXdp, "filter-trace-xdp", false, "trace XDP bpf progs") flag.StringVar(&f.FilterIfname, "filter-ifname", "", "filter skb ifname in --filter-netns (if not specified, use current netns)") flag.UintVar(&f.FilterKprobeBatch, "filter-kprobe-batch", 10, "batch size for kprobe attaching/detaching") flag.StringVar(&f.OutputTS, "timestamp", "none", "print timestamp per skb (\"current\", \"relative\", \"absolute\", \"none\")") diff --git a/main.go b/main.go index e256cb17..a7cc5b7c 100644 --- a/main.go +++ b/main.go @@ -92,13 +92,14 @@ func main() { if err != nil { log.Fatalf("Failed to get skb-accepting functions: %s", err) } - if len(funcs) <= 0 { + if len(funcs) == 0 && !flags.FilterTraceTc && !flags.FilterTraceXdp { log.Fatalf("Cannot find a matching kernel function") } - // If --filter-trace-tc, it's to retrieve and print bpf prog's name. + // If --filter-trace-tc/--filter-trace-xdp, it's to retrieve and print bpf + // prog's name. addr2name, name2addr, err := pwru.ParseKallsyms(funcs, flags.OutputStack || - len(flags.KMods) != 0 || flags.FilterTraceTc || len(flags.FilterNonSkbFuncs) > 0 || - flags.OutputCaller) + len(flags.KMods) != 0 || flags.FilterTraceTc || flags.FilterTraceXdp || + len(flags.FilterNonSkbFuncs) > 0 || flags.OutputCaller) if err != nil { log.Fatalf("Failed to get function addrs: %s", err) } @@ -130,6 +131,12 @@ func main() { name == "fexit_skb_copy" { continue } + if name == "fentry_xdp" { + if err := libpcap.InjectL2Filter(program, flags.FilterPcap); err != nil { + log.Fatalf("Failed to inject filter ebpf for %s: %v", name, err) + } + continue + } if err = libpcap.InjectFilters(program, flags.FilterPcap); err != nil { log.Fatalf("Failed to inject filter ebpf for %s: %v", name, err) } @@ -146,25 +153,33 @@ func main() { } haveFexit := pwru.HaveBPFLinkTracing() - if flags.FilterTraceTc && !haveFexit { - log.Fatalf("Current kernel does not support fentry/fexit to run with --filter-trace-tc") + if (flags.FilterTraceTc || flags.FilterTraceXdp) && !haveFexit { + log.Fatalf("Current kernel does not support fentry/fexit to run with --filter-trace-tc/--filter-trace-xdp") } // As we know, for every fentry tracing program, there is a corresponding // bpf prog spec with attaching target and attaching function. So, we can - // just copy the spec and keep the fentry_tc program spec only in the copied - // spec. - var bpfSpecFentry *ebpf.CollectionSpec + // just copy the spec and keep the fentry_tc/fentry_xdp program spec only in + // the copied spec. + var bpfSpecFentryTc *ebpf.CollectionSpec if flags.FilterTraceTc { - bpfSpecFentry = bpfSpec.Copy() - bpfSpecFentry.Programs = map[string]*ebpf.ProgramSpec{ - "fentry_tc": bpfSpec.Programs["fentry_tc"], + bpfSpecFentryTc = bpfSpec.Copy() + bpfSpecFentryTc.Programs = map[string]*ebpf.ProgramSpec{ + "fentry_tc": bpfSpecFentryTc.Programs["fentry_tc"], + } + } + var bpfSpecFentryXdp *ebpf.CollectionSpec + if flags.FilterTraceXdp { + bpfSpecFentryXdp = bpfSpec.Copy() + bpfSpecFentryXdp.Programs = map[string]*ebpf.ProgramSpec{ + "fentry_xdp": bpfSpecFentryXdp.Programs["fentry_xdp"], } } - // fentry_tc is not used in the kprobe/kprobe-multi cases. So, it should be - // deleted from the spec. + // fentry_tc&fentry_xdp are not used in the kprobe/kprobe-multi cases. So, + // they should be deleted from the spec. delete(bpfSpec.Programs, "fentry_tc") + delete(bpfSpec.Programs, "fentry_xdp") // If not tracking skb, deleting the skb-tracking programs to reduce loading // time. @@ -191,9 +206,22 @@ func main() { } defer coll.Close() + traceTc := false if flags.FilterTraceTc { - close := pwru.TraceTC(coll, bpfSpecFentry, &opts, flags.OutputSkb, flags.OutputShinfo, name2addr) - defer close() + t := pwru.TraceTC(coll, bpfSpecFentryTc, &opts, flags.OutputSkb, flags.OutputShinfo, name2addr) + defer t.Detach() + traceTc = t.HaveTracing() + } + + traceXdp := false + if flags.FilterTraceXdp { + t := pwru.TraceXDP(coll, bpfSpecFentryXdp, &opts, flags.OutputSkb, flags.OutputShinfo, name2addr) + defer t.Detach() + traceXdp = t.HaveTracing() + } + + if !traceTc && !traceXdp && len(funcs) == 0 { + log.Fatalf("No kprobe/tc-bpf/xdp to trace!") } if flags.FilterTrackSkb || flags.FilterTrackSkbByStackid { @@ -202,12 +230,12 @@ func main() { } if nonSkbFuncs := flags.FilterNonSkbFuncs; len(nonSkbFuncs) != 0 { - k := pwru.KprobeNonSkbFuncs(nonSkbFuncs, funcs, coll) + k := pwru.NewNonSkbFuncsKprober(nonSkbFuncs, funcs, coll) defer k.DetachKprobes() } if len(funcs) != 0 { - k := pwru.KprobeSkbFuncs(ctx, funcs, coll, addr2name, useKprobeMulti, flags.FilterKprobeBatch) + k := pwru.NewKprober(ctx, funcs, coll, addr2name, useKprobeMulti, flags.FilterKprobeBatch) defer k.DetachKprobes() }