From 9e3697bf7f84d9b0585d22e63950ba8839fbd5cb Mon Sep 17 00:00:00 2001 From: Nikola Grcevski <6207777+grcevski@users.noreply.github.com> Date: Fri, 24 Jan 2025 08:56:17 -0500 Subject: [PATCH] Fix context propagation edge cases (#1572) --- bpf/protocol_http.h | 13 +- bpf/tc_common.h | 1 + bpf/tc_sock.h | 52 ++- bpf/tc_tracer.c | 8 + bpf/tc_tracer_l7.h | 177 +++++---- .../ebpf/generictracer/bpf_arm64_bpfel.o | 4 +- .../generictracer/bpf_debug_arm64_bpfel.o | 4 +- .../ebpf/generictracer/bpf_debug_x86_bpfel.o | 4 +- .../ebpf/generictracer/bpf_tp_arm64_bpfel.o | 4 +- .../generictracer/bpf_tp_debug_arm64_bpfel.o | 4 +- .../generictracer/bpf_tp_debug_x86_bpfel.o | 4 +- .../ebpf/generictracer/bpf_tp_x86_bpfel.o | 4 +- .../ebpf/generictracer/bpf_x86_bpfel.o | 4 +- .../ebpf/gotracer/bpf_debug_x86_bpfel.o | 2 +- .../ebpf/httptracer/bpf_arm64_bpfel.o | 4 +- .../ebpf/httptracer/bpf_debug_arm64_bpfel.o | 4 +- .../ebpf/httptracer/bpf_debug_x86_bpfel.o | 4 +- pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o | 4 +- pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o | 4 +- .../ebpf/tctracer/bpf_debug_arm64_bpfel.o | 4 +- .../ebpf/tctracer/bpf_debug_x86_bpfel.o | 4 +- pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o | 4 +- pkg/internal/ebpf/tracer_linux.go | 4 +- test/ebpf/tc_split_packets/main.c | 356 ++++++++++++++++++ 24 files changed, 540 insertions(+), 137 deletions(-) create mode 100644 test/ebpf/tc_split_packets/main.c diff --git a/bpf/protocol_http.h b/bpf/protocol_http.h index dc377d3ab..74357a738 100644 --- a/bpf/protocol_http.h +++ b/bpf/protocol_http.h @@ -90,8 +90,6 @@ static __always_inline void http_get_or_create_trace_info(http_connection_metada //make_tp_string(tp_buf, &tp_p->tp); //bpf_dbg_printk("tp: %s", tp_buf); - u8 tp_in_headers = false; - if (k_bpf_traceparent_enabled) { // The below buffer scan can be expensive on high volume of requests. We make it optional // for customers to enable it. Off by default. @@ -113,7 +111,6 @@ static __always_inline void http_get_or_create_trace_info(http_connection_metada unsigned char *res = bpf_strstr_tp_loop(buf, buf_len); if (res) { - tp_in_headers = true; bpf_dbg_printk("Found traceparent %s", res); unsigned char *t_id = extract_trace_id(res); unsigned char *s_id = extract_span_id(res); @@ -137,11 +134,11 @@ static __always_inline void http_get_or_create_trace_info(http_connection_metada if (meta) { u32 type = trace_type_from_meta(meta); set_trace_info_for_connection(conn, type, tp_p); - // If the user code setup traceparent manually, don't interfere and add - // something else with TC L7 - if (meta->type == EVENT_HTTP_CLIENT && tp_in_headers) { - return; - } + // TODO: If the user code setup traceparent manually, don't interfere and add + // something else with TC L7. The main challenge is that with kprobes, the + // sock_msg program has already punched a hole in the HTTP headers and has made + // the HTTP header invalid. We need to add more smarts there or pull the + // sock msg information here and mark it so that we don't override the span_id. server_or_client_trace(meta->type, conn, tp_p); } } diff --git a/bpf/tc_common.h b/bpf/tc_common.h index a94b3269f..258d7539e 100644 --- a/bpf/tc_common.h +++ b/bpf/tc_common.h @@ -7,6 +7,7 @@ enum { MAX_INLINE_LEN = 0x3ff }; const char TP[] = "Traceparent: 00-00000000000000000000000000000000-0000000000000000-01\r\n"; +const char INV_TP[] = "W3C-BeylaID: 00-00000000000000000000000000000000-0000000000000000-01\r\n"; const u32 EXTEND_SIZE = sizeof(TP) - 1; const char TP_PREFIX[] = "Traceparent: "; const u32 TP_PREFIX_SIZE = sizeof(TP_PREFIX) - 1; diff --git a/bpf/tc_sock.h b/bpf/tc_sock.h index bad720bd7..050b3aa74 100644 --- a/bpf/tc_sock.h +++ b/bpf/tc_sock.h @@ -16,6 +16,8 @@ // A map of sockets which we track with sock_ops. The sock_msg // program subscribes to this map and runs for each new socket // activity +// The map size must be max u16 to avoid accidentally losing +// the socket information struct { __uint(type, BPF_MAP_TYPE_SOCKHASH); __uint(max_entries, SOCKOPS_MAP_SIZE); @@ -34,11 +36,13 @@ typedef struct tc_http_ctx { // A map that keeps all the HTTP packets we've extended with // the sock_msg program and that Traffic Control needs to write to. +// The map size must be max u16 to avoid accidentally overwriting +// prior information of a live extended header. struct tc_http_ctx_map { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, u32); __type(value, struct tc_http_ctx); - __uint(max_entries, 10240); + __uint(max_entries, SOCKOPS_MAP_SIZE); } tc_http_ctx_map SEC(".maps"); // Memory buffer and a map bellow as temporary storage for @@ -209,7 +213,10 @@ static __always_inline u8 protocol_detector(struct sk_msg_md *msg, // actual 'Traceparent:...' string. if (is_http_request_buf((const unsigned char *)msg_buf.buf)) { bpf_dbg_printk("Setting up request to be extended"); - bpf_map_update_elem(&msg_buffers, &e_key, &msg_buf, BPF_ANY); + if (bpf_map_update_elem(&msg_buffers, &e_key, &msg_buf, BPF_ANY)) { + // fail if we can't setup a msg buffer + return 0; + } return 1; } @@ -266,19 +273,36 @@ int beyla_packet_extender(struct sk_msg_md *msg) { if (newline_pos >= 0) { newline_pos++; - // Push extends the packet with empty space and sets up the - // metadata for Traffic Control to finish the writing - if (!bpf_msg_push_data(msg, newline_pos, EXTEND_SIZE, 0)) { - tc_http_ctx_t ctx = { - .offset = newline_pos, - .seen = 0, - .written = 0, - }; - u32 port = msg->local_port; - - bpf_map_update_elem(&tc_http_ctx_map, &port, &ctx, BPF_ANY); + tc_http_ctx_t ctx = { + .offset = newline_pos, + .seen = 0, + .written = 0, + }; + u32 port = msg->local_port; + + // We first attempt to register the metadata for TC to work with. If we + // fail, we shouldn't expand the packet! + long failed = bpf_map_update_elem(&tc_http_ctx_map, &port, &ctx, BPF_ANY); + + if (!failed) { + // Push extends the packet with empty space and sets up the + // metadata for Traffic Control to finish the writing. If we + // fail (non-zero return value), we delete the metadata. + if (bpf_msg_push_data(msg, newline_pos, EXTEND_SIZE, 0)) { + // We two things to disable this context, we set the written + // and seen to their max value to disable the TC code, and then + // we also attempt delete. This is to ensure that we still have + // disabled the TC code if delete failed. + if (bpf_map_delete_elem(&tc_http_ctx_map, &port)) { + tc_http_ctx_t *bad_ctx = bpf_map_lookup_elem(&tc_http_ctx_map, &port); + if (bad_ctx) { + bad_ctx->written = EXTEND_SIZE; + bad_ctx->seen = bad_ctx->offset; + } + } + bpf_dbg_printk("offset to split %d", newline_pos); + } } - bpf_dbg_printk("offset to split %d", newline_pos); } } diff --git a/bpf/tc_tracer.c b/bpf/tc_tracer.c index f4d7fbab1..eabbff22f 100644 --- a/bpf/tc_tracer.c +++ b/bpf/tc_tracer.c @@ -134,6 +134,14 @@ int beyla_app_egress(struct __sk_buff *skb) { if (tp->valid) { encode_data_in_ip_options(skb, &conn, &tcp, tp, &e_key); } + } else { + u32 s_port = e_key.s_port; + tc_http_ctx_t *ctx = (tc_http_ctx_t *)bpf_map_lookup_elem(&tc_http_ctx_map, &s_port); + + if (ctx) { + bpf_dbg_printk("No trace-map info, filling up the hole setup by sk_msg"); + write_traceparent(skb, &tcp, &e_key, ctx, (unsigned char *)INV_TP); + } } return TC_ACT_UNSPEC; diff --git a/bpf/tc_tracer_l7.h b/bpf/tc_tracer_l7.h index 3222e09a1..540b25554 100644 --- a/bpf/tc_tracer_l7.h +++ b/bpf/tc_tracer_l7.h @@ -110,7 +110,8 @@ static __always_inline unsigned char *tp_buf_mem(tp_info_t *tp) { } static __always_inline void l7_app_ctx_cleanup(egress_key_t *e_key) { - bpf_map_delete_elem(&tc_http_ctx_map, &e_key->s_port); + u32 s_port = e_key->s_port; + bpf_map_delete_elem(&tc_http_ctx_map, &s_port); bpf_map_delete_elem(&outgoing_trace_map, e_key); } @@ -129,6 +130,99 @@ static __always_inline struct bpf_sock *lookup_sock_from_tuple(struct __sk_buff return 0; } +static __always_inline int write_traceparent(struct __sk_buff *skb, + protocol_info_t *tcp, + egress_key_t *e_key, + tc_http_ctx_t *ctx, + unsigned char *tp_buf) { + u32 tot_len = (u64)ctx_data_end(skb) - (u64)ctx_data(skb); + u32 packet_size = tot_len - tcp->hdr_len; + bpf_dbg_printk("Writing traceparent packet_size %d, offset %d, tot_len %d", + packet_size, + ctx->offset, + tot_len); + bpf_dbg_printk("seen %d, written %d", ctx->seen, ctx->written); + + if (packet_size > 0) { + u32 len = 0; + u32 off = tcp->hdr_len; + // picked a value large enough to support TCP headers + bpf_clamp_umax(off, 128); + void *start = ctx_data(skb) + off; + + // We haven't seen enough bytes coming through from the start + // until we did the split (at ctx->offset is where we injected) + // the empty space. + if (ctx->seen < ctx->offset) { + // Diff = How much more before we cross offset + u32 diff = ctx->offset - ctx->seen; + // We received less or equal bytes to what we want to + // reach ctx->offset, i.e the split point. + if (diff > packet_size) { + ctx->seen += packet_size; + return 0; + } else { + // We went over the split point, calculate how much can we + // write, but cap it to the max size = 70 bytes. + bpf_clamp_umax(diff, 2048); + start += diff; + ctx->seen = ctx->offset; + len = packet_size - diff; + bpf_clamp_umax(len, EXTEND_SIZE); + } + } else { + // Fast path. We are exactly at the offset, we've written + // nothing of the 'Traceparent: ...' text yet and the packet + // is exactly 70 bytes. + if (ctx->written == 0 && packet_size == EXTEND_SIZE) { + if ((start + EXTEND_SIZE) <= ctx_data_end(skb)) { + __builtin_memcpy(start, tp_buf, EXTEND_SIZE); + bpf_dbg_printk("Set the string fast_path!"); + ctx->written = EXTEND_SIZE; + l7_app_ctx_cleanup(e_key); + return 0; + } + } + + // Nope, we've written some bytes in another packet and we + // are not done writing yet. + if (ctx->written < EXTEND_SIZE) { + len = EXTEND_SIZE - ctx->written; + bpf_clamp_umax(len, EXTEND_SIZE); + + if (len > packet_size) { + len = packet_size; + } + } else { + // We've written everything already, just clean up + l7_app_ctx_cleanup(e_key); + return 0; + } + } + + if (len > 0) { + u32 tp_off = ctx->written; + // Keeps verifier happy + bpf_clamp_umax(tp_off, EXTEND_SIZE); + bpf_clamp_umax(len, EXTEND_SIZE); + + if ((start + len) <= ctx_data_end(skb)) { + buf_memcpy((char *)start, (char *)tp_buf + tp_off, len, ctx_data_end(skb)); + bpf_dbg_printk("Set the string off = %d, len = %d!", tp_off, len); + } + + ctx->written += len; + // If we've written the full string this time around + // cleanup the metadata. + if (ctx->written >= EXTEND_SIZE) { + l7_app_ctx_cleanup(e_key); + } + } + } + + return 0; +} + // This function does two things: // 1. It adds sockets in the socket hash map which have already been // established and we see them for the first time in Traffic Control, i.e @@ -194,9 +288,7 @@ static __always_inline int l7_app_egress(struct __sk_buff *skb, } } - u32 tot_len = (u64)ctx_data_end(skb) - (u64)ctx_data(skb); - bpf_dbg_printk( - "egress, tot_len %d, s_port %d, data_start %d", tot_len, conn->s_port, tcp->hdr_len); + bpf_dbg_printk("egress, s_port %d, data_start %d", conn->s_port, tcp->hdr_len); unsigned char *tp_buf = tp_buf_mem(&tp->tp); @@ -211,82 +303,7 @@ static __always_inline int l7_app_egress(struct __sk_buff *skb, // the case, seems plausible, but then the code below tries to handle any // split. The 'fast path' handles the exact split as above. if (ctx) { - u32 packet_size = tot_len - tcp->hdr_len; - bpf_dbg_printk("Found it! packet_size %d, offset %d", packet_size, ctx->offset); - bpf_dbg_printk("seen %d, written %d", ctx->seen, ctx->written); - - if (packet_size > 0) { - u32 len = 0; - u32 off = tcp->hdr_len; - // picked a value large enough to support TCP headers - bpf_clamp_umax(off, 128); - void *start = ctx_data(skb) + off; - - // We haven't seen enough bytes coming through from the start - // until we did the split (at ctx->offset is where we injected) - // the empty space. - if (ctx->seen < ctx->offset) { - // Diff = How much more before we cross offset - u32 diff = ctx->offset - ctx->seen; - // We received less or equal bytes to what we want to - // reach ctx->offset, i.e the split point. - if (diff <= packet_size) { - ctx->seen += packet_size; - return 0; - } else { - // We went over the split point, calculate how much can we - // write, but cap it to the max size = 70 bytes. - len = packet_size - diff; - bpf_clamp_umax(len, EXTEND_SIZE); - } - } else { - // Fast path. We are exactly at the offset, we've written - // nothing of the 'Traceparent: ...' text yet and the packet - // is exactly 70 bytes. - if (ctx->written == 0 && packet_size == EXTEND_SIZE) { - if ((start + EXTEND_SIZE) <= ctx_data_end(skb)) { - __builtin_memcpy(start, tp_buf, EXTEND_SIZE); - bpf_dbg_printk("Set the string fast_path!"); - l7_app_ctx_cleanup(e_key); - return 0; - } - } - - // Nope, we've written some bytes in another packet and we - // are not done writing yet. - if (ctx->written < EXTEND_SIZE) { - len = EXTEND_SIZE - ctx->written; - bpf_clamp_umax(len, EXTEND_SIZE); - - if (len > packet_size) { - len = packet_size; - } - } else { - // We've written everything already, just clean up - l7_app_ctx_cleanup(e_key); - return 0; - } - } - - if (len > 0) { - u32 tp_off = ctx->written; - // Keeps verifier happy - bpf_clamp_umax(tp_off, EXTEND_SIZE); - bpf_clamp_umax(len, EXTEND_SIZE); - - if ((start + len) <= ctx_data_end(skb)) { - buf_memcpy((char *)start, (char *)tp_buf + tp_off, len, ctx_data_end(skb)); - bpf_dbg_printk("Set the string off = %d, len = %d!", tp_off, len); - } - - ctx->written += len; - // If we've written the full string this time around - // cleanup the metadata. - if (ctx->written >= EXTEND_SIZE) { - l7_app_ctx_cleanup(e_key); - } - } - } + return write_traceparent(skb, tcp, e_key, ctx, tp_buf); } return 0; diff --git a/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.o index 87bbc6d81..a674b18d9 100644 --- a/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44713caef7e0cfe0c53f182832ef91e1d592e9e3cd580736d88b82c201437833 -size 646992 +oid sha256:e5d54f883eb57b6630df13dc26bd6839ce611590c0053a33bdbd0bd3bad38f5b +size 647128 diff --git a/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.o index 7334f8a0c..04f7121e0 100644 --- a/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01b90e99757de549ef754e1947054514d74df04334a926785883b5d84176bb13 -size 1086440 +oid sha256:a052f35a52b4a5571a242a9d3101b3ad4bb23926e72f481b46929ae67350033c +size 1086576 diff --git a/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.o index d388107d4..0a7d28734 100644 --- a/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9348601fe73c88e173cfc085dd9128775dc0f9b2dca697f5af8266ca44d812d5 -size 1086008 +oid sha256:5e6f8229a2a852cd88e7051c16c2acfd5910d77f48a75d2c6e7cca4cb5e4449a +size 1086144 diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.o index dd853a369..91f040a96 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_tp_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:22f8ff432348893f8743a94e119890b9d6979a99bf498e63c41274be5ffcfe5a -size 662184 +oid sha256:4462d9fe9cfd5c3bfa4366f864922aa10f9f0fceeec16f6c071d49f544865f1f +size 661648 diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.o index d5093ea8b..e494cca5c 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_tp_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c6d98af82ad7c4ab7f8e3088bd30b595c92e688616bc6be4cc5ced2bf3c09b87 -size 1105264 +oid sha256:e86c073d4e68641cb7eea8cceb326d5613cd3e5ce616f5240d700e708ab21193 +size 1104944 diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.o index 3c5c8c4a7..71c13b7e4 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_tp_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bc58348b1ef375f67b6a2e73d36a3656128bc669f63215e36142f6872392db5 -size 1104832 +oid sha256:1e1d36722e76b98a46f2066bdf0078c9f7df5083b1b7d25929004885dbb8d674 +size 1104520 diff --git a/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.o index 7252da255..8416357cf 100644 --- a/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4ec6044908a59fb5921cd3765dad56469dee7f1501fb328f185033873d88cf31 -size 661576 +oid sha256:4771d98f6691399d407cf2fe5655561687e54b81ebb5aa083b476752b278efe1 +size 661032 diff --git a/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.o b/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.o index 989447df3..409cc557f 100644 --- a/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/generictracer/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14abb5039bbcbc844dcc5b4458e44135db4d15a84127b911da8ab65928dc1a65 -size 646368 +oid sha256:a3d88b415f455471352194891bbe114659377856cc465faa1cd5f78e77a6d994 +size 646512 diff --git a/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.o index e79fda777..a2b95f608 100644 --- a/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/gotracer/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f77109cf9b4ebd2c64619c76405a381c1a3b529fb5a3f382f4b05110be50ef81 +oid sha256:6717be37737a28cd6830ba4731e190ecab9133746277218ecf7d21cf907e0f27 size 943472 diff --git a/pkg/internal/ebpf/httptracer/bpf_arm64_bpfel.o b/pkg/internal/ebpf/httptracer/bpf_arm64_bpfel.o index 3151aa74a..8e71a6f90 100644 --- a/pkg/internal/ebpf/httptracer/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/httptracer/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a3ff765845b4f3aabce4ad69f6cd15a93de2909b0b5b40e137d0504bc777ea9 -size 43088 +oid sha256:4ee73e5c26ba0ed1e47349039ee34848e7d1e0169b0edb790ca829b354d13861 +size 43240 diff --git a/pkg/internal/ebpf/httptracer/bpf_debug_arm64_bpfel.o b/pkg/internal/ebpf/httptracer/bpf_debug_arm64_bpfel.o index 86263825d..dc6544ee7 100644 --- a/pkg/internal/ebpf/httptracer/bpf_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/httptracer/bpf_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0abbd5cad95609e163550388185b552c0d521e8c8e30d606cbc678c7a7be6c1f -size 43328 +oid sha256:6d4cbdddf692871df0ce00ceb6d3affa904bb4d296ba3d0c4934176ff25620e4 +size 43480 diff --git a/pkg/internal/ebpf/httptracer/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/httptracer/bpf_debug_x86_bpfel.o index 6ef1e1209..6505a1172 100644 --- a/pkg/internal/ebpf/httptracer/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/httptracer/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1f1a24355bfcfce4e63be60e88737624f5435d0f910f6825c382cff619d9e669 -size 43208 +oid sha256:1d1c2f4f78da66f689ce4db8a2b84de56e5481b2ee993599e81ec91c7e0ee3b2 +size 43368 diff --git a/pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o b/pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o index 913abd24d..b12eb7621 100644 --- a/pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/httptracer/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cd3d07881b23186a82efba2ce07829843490a2f9a6f503ceab163ed13fb546b2 -size 42968 +oid sha256:bfb2a6f8b71e03f28afdb2acf26c1f0cb6835b57c5a287f63a4549d294c5512e +size 43128 diff --git a/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o b/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o index 88140315e..422fedff7 100644 --- a/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o +++ b/pkg/internal/ebpf/tctracer/bpf_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cacf4ce26d1d3dbea69bf254e8114f7249b4271b5892ec208b90e3c41668b2ae -size 221368 +oid sha256:69624056dd6df7074341c94ac88cee3ffd1f7090aecefb3a7de49972964f37e5 +size 229224 diff --git a/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.o b/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.o index eb6a350a2..69bd293ee 100644 --- a/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.o +++ b/pkg/internal/ebpf/tctracer/bpf_debug_arm64_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b896b19786477f7125710c6f6102d44d10d5df6a9a3b583bc88f0fe300badc0e -size 402904 +oid sha256:5d0ab9aadfc5096a5e7b388aa02f7a00180bbaaeab08237148e86470691991bd +size 419256 diff --git a/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.o b/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.o index fc6efede1..5d662784b 100644 --- a/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.o +++ b/pkg/internal/ebpf/tctracer/bpf_debug_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4193325d60be433316960c023602d30c449e2b3d5283e6dc3721a4ca9b10abc -size 404208 +oid sha256:d5a76352071de6954ddc853466ef30d572744a0c16995d8dc6ad127bce83ff7d +size 420568 diff --git a/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o b/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o index 31a9b93f5..b4a416b7d 100644 --- a/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o +++ b/pkg/internal/ebpf/tctracer/bpf_x86_bpfel.o @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b7ed24b958ccef86b4f0be7113684ed232c6c96a32558c53d8e556ee38d6935e -size 222680 +oid sha256:421a18d31e803831c70dfae0c9b2670071756e7c18cea445ba5463ea84dc014f +size 230536 diff --git a/pkg/internal/ebpf/tracer_linux.go b/pkg/internal/ebpf/tracer_linux.go index fe2329b5a..fb5202973 100644 --- a/pkg/internal/ebpf/tracer_linux.go +++ b/pkg/internal/ebpf/tracer_linux.go @@ -162,10 +162,10 @@ func (pt *ProcessTracer) loadTracer(p Tracer, log *slog.Logger) error { err := pt.loadAndAssign(p) if err != nil && strings.Contains(err.Error(), "unknown func bpf_probe_write_user") { - plog.Warn("Failed to enable distributed tracing context-propagation on a " + + plog.Warn("Failed to enable Go write memory distributed tracing context-propagation on a " + "Linux Kernel without write memory support. " + "To avoid seeing this message, please ensure you have correctly mounted /sys/kernel/security. " + - "and ensure beyla has the SYS_ADMIN linux capability" + + "and ensure beyla has the SYS_ADMIN linux capability. " + "For more details set BEYLA_LOG_LEVEL=DEBUG.") common.IntegrityModeOverride = true diff --git a/test/ebpf/tc_split_packets/main.c b/test/ebpf/tc_split_packets/main.c new file mode 100644 index 000000000..8d96c6530 --- /dev/null +++ b/test/ebpf/tc_split_packets/main.c @@ -0,0 +1,356 @@ +/** + * Tests the following function, adapted from it: + * static __always_inline write_traceparent(struct __sk_buff *skb, + protocol_info_t *tcp, + egress_key_t *e_key, + tc_http_ctx_t *ctx, + unsigned char *tp_buf) { + */ + +#include +#include +#include + +typedef struct sk_buf { + int tot_len; + char data[256]; +} sk_buf_t; + +typedef struct tc_http_ctx { + unsigned int offset; // where inside the original packet we saw '\n` + unsigned int seen; // how many bytes we've seen before the offset + unsigned int written; // how many of the Traceparent field we've written +} tc_http_ctx_t; + +const int hdr_len = 12; + +const unsigned char TP[] = + "Traceparent: 12-34567891234567891234567891234567-8912345678912345-67\r\n"; +const unsigned int EXTEND_SIZE = sizeof(TP) - 1; + +void bpf_clamp_umax(unsigned int *off, unsigned int val) { + if (*off > val) { + *off = val; + } +} + +int write_traceparent(sk_buf_t *skb, tc_http_ctx_t *ctx, + unsigned char *tp_buf) { + unsigned int tot_len = skb->tot_len; + unsigned int packet_size = tot_len - hdr_len; + printf("Writing traceparent packet_size %d, offset %d, tot_len %d\n", + packet_size, ctx->offset, tot_len); + printf("seen %d, written %d\n", ctx->seen, ctx->written); + + if (packet_size > 0) { + unsigned int len = 0; + unsigned int off = hdr_len; + // picked a value large enough to support TCP headers + bpf_clamp_umax(&off, 128); + char *start = skb->data + off; + + // We haven't seen enough bytes coming through from the start + // until we did the split (at ctx->offset is where we injected) + // the empty space. + if (ctx->seen < ctx->offset) { + // Diff = How much more before we cross offset + unsigned int diff = ctx->offset - ctx->seen; + // We received less or equal bytes to what we want to + // reach ctx->offset, i.e the split point. + if (diff > packet_size) { + ctx->seen += packet_size; + return 0; + } else { + start += diff; + ctx->seen = ctx->offset; + // We went over the split point, calculate how much can we + // write, but cap it to the max size = 70 bytes. + len = packet_size - diff; + bpf_clamp_umax(&len, EXTEND_SIZE); + } + } else { + // Fast path. We are exactly at the offset, we've written + // nothing of the 'Traceparent: ...' text yet and the packet + // is exactly 70 bytes. + if (ctx->written == 0 && packet_size == EXTEND_SIZE) { + if ((start + EXTEND_SIZE) <= &skb->data[255]) { + memcpy(start, tp_buf, EXTEND_SIZE); + printf("Set the string fast_path!\n"); + + return 0; + } + } + + // Nope, we've written some bytes in another packet and we + // are not done writing yet. + if (ctx->written < EXTEND_SIZE) { + len = EXTEND_SIZE - ctx->written; + bpf_clamp_umax(&len, EXTEND_SIZE); + + if (len > packet_size) { + len = packet_size; + } + } else { + // We've written everything already, just clean up + return 0; + } + } + + if (len > 0) { + unsigned int tp_off = ctx->written; + // Keeps verifier happy + bpf_clamp_umax(&tp_off, EXTEND_SIZE); + bpf_clamp_umax(&len, EXTEND_SIZE); + + if ((start + len) <= &skb->data[255]) { + memcpy((char *)start, (char *)tp_buf + tp_off, len); + printf("Set the string off = %d, len = %d!\n", tp_off, len); + } + + ctx->written += len; + // If we've written the full string this time around + // cleanup the metadata. + if (ctx->written >= EXTEND_SIZE) { + return 0; + } + } + } + + return 1; +} + +void assert_equals(int expected, int actual, const char *message) { + if (expected != actual) { + fprintf(stderr, "Assertion failed: %s\nExpected: %d\nActual: %d\n", message, + expected, actual); + exit(EXIT_FAILURE); + } +} + +void s_assert_equals(char *expected, char *actual, int len, + const char *message) { + if (strncmp(expected, actual, len)) { + fprintf(stderr, "Assertion failed: %s\nExpected: %s\nActual: %s\n", message, + expected, actual); + exit(EXIT_FAILURE); + } +} + +int test1() { + sk_buf_t skb1 = {0}; + skb1.tot_len = hdr_len; + + sk_buf_t skb2 = {0}; + skb2.tot_len = hdr_len + 10; + + sk_buf_t skb3 = {0}; + skb3.tot_len = hdr_len + 5; + + sk_buf_t skb4 = {0}; + skb4.tot_len = hdr_len + 80; + + tc_http_ctx_t ctx = { + .offset = 17, + .seen = 0, + .written = 0, + }; + + printf("Test 1\n"); + + write_traceparent(&skb1, &ctx, (unsigned char *)TP); + assert_equals(0, ctx.seen, "empty packet didn't move anything"); + + write_traceparent(&skb2, &ctx, (unsigned char *)TP); + assert_equals(10, ctx.seen, "seen 10 up"); + assert_equals(0, ctx.written, "written 0"); + + write_traceparent(&skb3, &ctx, (unsigned char *)TP); + assert_equals(15, ctx.seen, "seen 5 up"); + assert_equals(0, ctx.written, "written 0"); + + write_traceparent(&skb4, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen 2 up"); + assert_equals(EXTEND_SIZE, ctx.written, "written extend size"); + s_assert_equals((unsigned char *)TP, &skb4.data[ctx.offset - 15 + hdr_len], + EXTEND_SIZE, "data is in the right location and equal"); + + printf("Worked!\n"); +} + +int test2() { + sk_buf_t skb1 = {0}; + skb1.tot_len = hdr_len; + + sk_buf_t skb2 = {0}; + skb2.tot_len = hdr_len + 10; + + sk_buf_t skb3 = {0}; + skb3.tot_len = hdr_len + 25; + + sk_buf_t skb4 = {0}; + skb4.tot_len = hdr_len + 80; + + tc_http_ctx_t ctx = { + .offset = 17, + .seen = 0, + .written = 0, + }; + + printf("Test 2\n"); + + write_traceparent(&skb1, &ctx, (unsigned char *)TP); + assert_equals(0, ctx.seen, "empty packet didn't move anything"); + + write_traceparent(&skb2, &ctx, (unsigned char *)TP); + assert_equals(10, ctx.seen, "seen 10 up"); + assert_equals(0, ctx.written, "written 0"); + + write_traceparent(&skb3, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen 25 up"); + assert_equals(25 - ctx.offset + 10, ctx.written, "written 0"); + s_assert_equals((unsigned char *)TP, &skb3.data[ctx.offset - 10 + hdr_len], + ctx.written, "data is in the right location and equal"); + + unsigned int prev_written = ctx.written; + + write_traceparent(&skb4, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen remains the same"); + assert_equals(EXTEND_SIZE, ctx.written, "written extend size"); + s_assert_equals((unsigned char *)TP + prev_written, &skb4.data[hdr_len], + EXTEND_SIZE - prev_written, + "data is in the right location and equal"); + + printf("Worked!\n"); +} + +int test3() { + sk_buf_t skb1 = {0}; + skb1.tot_len = hdr_len + 80; + + sk_buf_t skb2 = {0}; + skb2.tot_len = hdr_len + 80; + + tc_http_ctx_t ctx = { + .offset = 17, + .seen = 0, + .written = 0, + }; + + printf("Test 3\n"); + + write_traceparent(&skb1, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen remains the same"); + assert_equals(80 - ctx.offset, ctx.written, "written extend size"); + s_assert_equals((unsigned char *)TP, &skb1.data[hdr_len + ctx.offset], + 80 - ctx.offset, "data is in the right location and equal"); + + unsigned int prev_written = ctx.written; + + write_traceparent(&skb2, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen remains the same"); + assert_equals(EXTEND_SIZE, ctx.written, "written extend size"); + s_assert_equals((unsigned char *)TP + prev_written, &skb2.data[hdr_len], + EXTEND_SIZE - prev_written, + "data is in the right location and equal"); + + printf("Worked!\n"); +} + +int test4() { + sk_buf_t skb1 = {0}; + skb1.tot_len = hdr_len + 17; + + sk_buf_t skb2 = {0}; + skb2.tot_len = hdr_len + 80; + + tc_http_ctx_t ctx = { + .offset = 17, + .seen = 0, + .written = 0, + }; + + printf("Test 4\n"); + + write_traceparent(&skb1, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen remains the same"); + assert_equals(0, ctx.written, "written 0"); + + write_traceparent(&skb2, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen remains the same"); + assert_equals(EXTEND_SIZE, ctx.written, "written extend size"); + s_assert_equals((unsigned char *)TP, &skb2.data[hdr_len], EXTEND_SIZE, + "data is in the right location and equal"); + + printf("Worked!\n"); +} + +int test5() { + sk_buf_t skb1 = {0}; + skb1.tot_len = hdr_len + 150; + + tc_http_ctx_t ctx = { + .offset = 17, + .seen = 0, + .written = 0, + }; + + printf("Test 5\n"); + + write_traceparent(&skb1, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen remains the same"); + assert_equals(EXTEND_SIZE, ctx.written, "written extend size"); + s_assert_equals((unsigned char *)TP, &skb1.data[hdr_len + ctx.offset], + EXTEND_SIZE, "data is in the right location and equal"); + + printf("Worked!\n"); +} + +int test6() { + sk_buf_t skb1 = {0}; + skb1.tot_len = hdr_len; + + sk_buf_t skb2 = {0}; + skb2.tot_len = hdr_len + 10; + + sk_buf_t skb3 = {0}; + skb3.tot_len = hdr_len + 7; + + sk_buf_t skb4 = {0}; + skb4.tot_len = hdr_len + 80; + + tc_http_ctx_t ctx = { + .offset = 17, + .seen = 0, + .written = 0, + }; + + printf("Test 6\n"); + + write_traceparent(&skb1, &ctx, (unsigned char *)TP); + assert_equals(0, ctx.seen, "empty packet didn't move anything"); + + write_traceparent(&skb2, &ctx, (unsigned char *)TP); + assert_equals(10, ctx.seen, "seen 10 up"); + assert_equals(0, ctx.written, "written 0"); + + write_traceparent(&skb3, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen 7 up"); + assert_equals(0, ctx.written, "written 0"); + + write_traceparent(&skb4, &ctx, (unsigned char *)TP); + assert_equals(ctx.offset, ctx.seen, "seen 0 up"); + assert_equals(EXTEND_SIZE, ctx.written, "written extend size"); + s_assert_equals((unsigned char *)TP, &skb4.data[hdr_len], EXTEND_SIZE, + "data is in the right location and equal"); + + printf("Worked!\n"); +} + +int main(int argc, char **argv) { + test1(); + test2(); + test3(); + test4(); + test5(); + test6(); +}