Skip to content

Commit

Permalink
Add a path for BPF-accelerated async signal emulation.
Browse files Browse the repository at this point in the history
Starting in kernel 6.10 BPF filters can choose whether or not to trigger
the SIGIO behavior for a perf event that becomes readable. We combine that
with a hardware breakpoint and a BPF filter that matches the GPRs to produce
an accelerated internal breakpoint type that can fast forward through loop
iterations to deliver async signals. On one trace this reduced rr's replay
overhead by 94%.

This adds a runtime dependency on libbpf and a compile time dependency on
clang --target bpf. rr also needs CAP_BPF and CAP_PERFMON to use this feature.
Because of all of that, this isn't really suitable for wide use at this point
and is instead a CMake feature usebpf. Set -Dusebpf=ON to test it.
  • Loading branch information
khuey committed May 15, 2024
1 parent b7c3913 commit 743aafe
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 16 deletions.
26 changes: 25 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,16 @@ set(REQUIRED_LIBS
zlib
)

option(usebpf "Enable bpf acceleration")

if(usebpf)
add_definitions(-DUSEBPF=1)
set(REQUIRED_LIBS
${REQUIRED_LIBS}
libbpf
)
endif(usebpf)

foreach(required_lib ${REQUIRED_LIBS})
string(TOUPPER ${required_lib} PKG)
if(NOT SKIP_PKGCONFIG)
Expand Down Expand Up @@ -679,6 +689,19 @@ post_build_executable(rr)
set(RR_BIN rr)
add_dependencies(rr Generated)

if(usebpf)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/async_event_filter.c
COMMAND clang -g -target bpf -Wall -O2 -c ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/async_event_filter.c -o ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o)

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o
DESTINATION ${CMAKE_INSTALL_DATADIR}/rr)

add_custom_target(BPF DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o)

add_dependencies(rr BPF)
endif()

option(strip "Strip debug info from rr binary")

set(RR_MAIN_LINKER_FLAGS ${LINKER_FLAGS})
Expand Down Expand Up @@ -711,12 +734,13 @@ endif()
target_link_libraries(rr
${CMAKE_DL_LIBS}
${ZLIB_LDFLAGS}
${LIBBPF_LDFLAGS}
brotli
)

if(staticlibs)
# Urgh ... this might not work for everyone, but there doesn't seem to be
# a way to persuade pkg-confing/pkg_check_modules to produce the right flags
# a way to persuade pkg-config/pkg_check_modules to produce the right flags
target_link_libraries(rr -L/home/roc/lib -l:libcapnp.a -l:libkj.a)
# Note that this works for both clang++ and g++
set(RR_MAIN_LINKER_FLAGS "-static-libstdc++ ${RR_MAIN_LINKER_FLAGS}")
Expand Down
99 changes: 99 additions & 0 deletions src/PerfCounters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
#include <unistd.h>
#include <time.h>

#ifdef USEBPF
#include <bpf/libbpf.h>
#include <linux/hw_breakpoint.h>
#endif

#include <algorithm>
#include <fstream>
#include <limits>
Expand Down Expand Up @@ -954,6 +959,7 @@ void PerfCounters::close() {
fd_minus_ticks_measure.close();
fd_useless_counter.close();
fd_ticks_in_transaction.close();
fd_async_signal_accelerator.close();
}

Ticks PerfCounters::stop(Task* t, Error* error) {
Expand All @@ -980,6 +986,7 @@ Ticks PerfCounters::stop(Task* t, Error* error) {
if (pt_state) {
infallible_perf_event_disable_if_open(pt_state->pt_perf_event_fd);
}
infallible_perf_event_disable_if_open(fd_async_signal_accelerator);
}
return ticks;
}
Expand Down Expand Up @@ -1090,4 +1097,96 @@ Ticks PerfCounters::read_ticks(Task* t, Error* error) {
return ret;
}

#ifdef USEBPF
bool PerfCounters::accelerate_async_signal(const Registers& regs) {
static int initialized;
static struct perf_event_attr attr;
static int bpf_prog_fd;
static struct user_regs_struct* bpf_regs;

if (!fd_async_signal_accelerator.is_open()) {
if (!initialized) {
initialized = -1;

attr.type = PERF_TYPE_BREAKPOINT;
attr.size = sizeof(attr);
attr.bp_type = HW_BREAKPOINT_X;
attr.bp_len = sizeof(long);
attr.sample_period = 1;
attr.sample_type = PERF_SAMPLE_IP;
attr.pinned = 1;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
attr.wakeup_events = 1;
attr.precise_ip = 3;
attr.disabled = 1;

libbpf_set_strict_mode(LIBBPF_STRICT_DIRECT_ERRS);
string path = resource_path() + "share/rr/async_event_filter.o";
struct bpf_object* obj = bpf_object__open(path.c_str());
if ((intptr_t)obj <= 0) {
return false;
}
if (bpf_object__load(obj) < 0) {
return false;
}
int bpf_map_fd = ScopedFd(bpf_object__find_map_fd_by_name(obj, "registers"));
if (bpf_map_fd < 0) {
return false;
}
struct bpf_program* prog = bpf_program__next(NULL, obj);
if (!prog) {
return false;
}
bpf_prog_fd = bpf_program__fd(prog);
if (bpf_prog_fd < 0) {
return false;
}

bpf_regs = (struct user_regs_struct*)
mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_SHARED, bpf_map_fd, 0);
if (!bpf_regs) {
return false;
}

initialized = 1;
} else if (initialized < 0) {
return false;
}

attr.bp_addr = 0;
fd_async_signal_accelerator = start_counter(tid, -1, &attr);

struct f_owner_ex own;
own.type = F_OWNER_TID;
own.pid = tid;
if (fcntl(fd_async_signal_accelerator, F_SETOWN_EX, &own)) {
FATAL() << "Failed to SETOWN_EX bpf-accelerated breakpoint fd";
}

make_counter_async(fd_async_signal_accelerator, SIGTRAP);

if (ioctl(fd_async_signal_accelerator, PERF_EVENT_IOC_SET_BPF, bpf_prog_fd)) {
FATAL() << "Failed PERF_EVENT_IOC_SET_BPF";
}
}

if (!fd_async_signal_accelerator.is_open()) {
return false;
}

attr.bp_addr = regs.ip().register_value();
if (ioctl(fd_async_signal_accelerator, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr)) {
FATAL() << "Failed PERF_EVENT_IOC_MODIFY_ATTRIBUTES";
}

auto r = regs.get_ptrace();
memcpy(bpf_regs, &r, sizeof(struct user_regs_struct));

infallible_perf_event_enable_if_open(fd_async_signal_accelerator);
return true;
}
#endif

} // namespace rr
15 changes: 15 additions & 0 deletions src/PerfCounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

namespace rr {

class Registers;
class Task;

enum TicksSemantics {
Expand Down Expand Up @@ -175,6 +176,17 @@ class PerfCounters {
*/
static void start_pt_copy_thread();

/**
* Try to use BPF to accelerate async signal processing
*/
#ifdef USEBPF
bool accelerate_async_signal(const Registers& regs);
#else
bool accelerate_async_signal(const Registers&) {
return false;
}
#endif

private:
template <typename Arch> void reset_arch_extras();

Expand Down Expand Up @@ -212,6 +224,9 @@ class PerfCounters {
// aarch64 specific counter to detect use of ll/sc instructions
ScopedFd fd_strex_counter;

// BPF-enabled hardware breakpoint for fast async signal emulation.
ScopedFd fd_async_signal_accelerator;

std::unique_ptr<PTState> pt_state;

TicksSemantics ticks_semantics_;
Expand Down
35 changes: 23 additions & 12 deletions src/ReplaySession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,7 @@ Completion ReplaySession::emulate_async_signal(
* be dealt with. */
bool pending_SIGTRAP = false;
bool did_set_internal_breakpoints = false;
bool did_set_bpf_breakpoint = false;
RunCommand SIGTRAP_run_command = RUN_CONTINUE;

/* Step 2: more slowly, find our way to the target ticks and
Expand Down Expand Up @@ -1042,25 +1043,29 @@ Completion ReplaySession::emulate_async_signal(
// breakpoint instruction in the tracee would have triggered a
// deterministic signal instead of an async one.
// So we must have hit our internal breakpoint.
ASSERT(t, did_set_internal_breakpoints);
ASSERT(t, did_set_internal_breakpoints || did_set_bpf_breakpoint);
// We didn't do an internal singlestep, and if we'd done a
// user-requested singlestep we would have hit the above case.
ASSERT(t, !trap_reasons.singlestep);
if (t->ip().undo_executed_bkpt(t->arch()) == in_syscallbuf_syscall_hook) {
t->vm()->remove_breakpoint(ip, BKPT_INTERNAL);
t->vm()->remove_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL);
t->move_ip_before_breakpoint();
return COMPLETE;
if (did_set_internal_breakpoints) {
if (t->ip().undo_executed_bkpt(t->arch()) == in_syscallbuf_syscall_hook) {
t->vm()->remove_breakpoint(ip, BKPT_INTERNAL);
t->vm()->remove_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL);
t->move_ip_before_breakpoint();
return COMPLETE;
}
ASSERT(t, regs.ip() == t->ip().undo_executed_bkpt(t->arch()));
}
ASSERT(t, regs.ip() == t->ip().undo_executed_bkpt(t->arch()));
/* Case (1) above: cover the tracks of
* our internal breakpoint, and go
* check again if we're at the
* target. */
LOG(debug) << " trap was for target $ip";

pending_SIGTRAP = false;
t->move_ip_before_breakpoint();
if (did_set_internal_breakpoints) {
t->move_ip_before_breakpoint();
}
/* We just backed up the $ip, but
* rewound it over an |int $3|
* instruction, which couldn't have
Expand Down Expand Up @@ -1093,6 +1098,7 @@ Completion ReplaySession::emulate_async_signal(
}
did_set_internal_breakpoints = false;
}
did_set_bpf_breakpoint = false;

if (at_target) {
/* Case (2) above: done. */
Expand All @@ -1117,11 +1123,16 @@ Completion ReplaySession::emulate_async_signal(
* no slower than single-stepping our way to
* the target execution point. */
LOG(debug) << " breaking on target $ip";
t->vm()->add_breakpoint(ip, BKPT_INTERNAL);
if (in_syscallbuf_syscall_hook) {
t->vm()->add_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL);
if (is_x86_string_instruction_at(t, ip) || !t->hpc.accelerate_async_signal(regs)) {
t->vm()->add_breakpoint(ip, BKPT_INTERNAL);

if (in_syscallbuf_syscall_hook) {
t->vm()->add_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL);
}
did_set_internal_breakpoints = true;
} else {
did_set_bpf_breakpoint = true;
}
did_set_internal_breakpoints = true;
continue_or_step(t, constraints, RESUME_UNLIMITED_TICKS);
SIGTRAP_run_command = constraints.command;
} else {
Expand Down
3 changes: 3 additions & 0 deletions src/Task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,9 @@ TrapReasons Task::compute_trap_reasons() {
<< " expected breakpoint at " << ip_at_breakpoint << ", got siginfo "
<< si;
}
// If we got a SIGTRAP via a FASYNC signal it must be our bpf-enabled
// hardware breakpoint.
reasons.breakpoint |= si.si_code == SI_SIGIO;
}
return reasons;
}
Expand Down
50 changes: 50 additions & 0 deletions src/bpf/async_event_filter.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <linux/bpf.h>
#include <linux/bpf_perf_event.h>
#include <bpf/bpf_helpers.h>
#include <stdint.h>

const uint32_t REGISTER_COUNT = sizeof(struct pt_regs)/sizeof(uint64_t);

struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, REGISTER_COUNT);
__uint(map_flags, BPF_F_MMAPABLE);
__type(key, uint32_t);
__type(value, uint64_t);
} registers SEC(".maps");

SEC("perf_event")
int match_registers(struct bpf_perf_event_data* event) {
#define CHECK_REG(name) \
{ \
const uint32_t i = offsetof(struct pt_regs, name) / sizeof(uint64_t); \
uint64_t* reg = bpf_map_lookup_elem(&registers, &i); \
if (!reg) { \
return 1; \
} \
if (event->regs.name != *reg) { \
return 0; \
} \
}

CHECK_REG(r15)
CHECK_REG(r14)
CHECK_REG(r13)
CHECK_REG(r12)
CHECK_REG(rbp)
CHECK_REG(rbx)
CHECK_REG(r11)
CHECK_REG(r10)
CHECK_REG(r9)
CHECK_REG(r8)
CHECK_REG(rax)
CHECK_REG(rcx)
CHECK_REG(rdx)
CHECK_REG(rsi)
CHECK_REG(rdi)
CHECK_REG(rip)

return 1;
}

char _license[] SEC("license") = "GPL";
6 changes: 3 additions & 3 deletions src/fast_forward.cc
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ static int fallible_read_byte(Task* t, remote_ptr<uint8_t> ip) {
return byte;
}

bool is_string_instruction_at(Task* t, remote_code_ptr ip) {
bool is_x86_string_instruction_at(Task* t, remote_code_ptr ip) {
bool found_rep = false;
remote_ptr<uint8_t> bare_ip = ip.to_data_ptr<uint8_t>();
while (true) {
Expand Down Expand Up @@ -447,7 +447,7 @@ bool maybe_at_or_after_x86_string_instruction(Task* t) {
return false;
}

return is_string_instruction_at(t, t->ip()) ||
return is_x86_string_instruction_at(t, t->ip()) ||
is_string_instruction_before(t, t->ip());
}

Expand All @@ -456,7 +456,7 @@ bool at_x86_string_instruction(Task* t) {
return false;
}

return is_string_instruction_at(t, t->ip());
return is_x86_string_instruction_at(t, t->ip());
}

} // namespace rr
8 changes: 8 additions & 0 deletions src/fast_forward.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ bool maybe_at_or_after_x86_string_instruction(Task* t);
/* Return true if the instruction at t->ip() is a string instruction */
bool at_x86_string_instruction(Task* t);

#if defined(__i386__) || defined(__x86_64__)
bool is_x86_string_instruction_at(Task* t, remote_code_ptr ip);
#else
bool is_x86_string_instruction_at(Task*, remote_code_ptr) {
return false;
}
#endif

} // namespace rr

#endif // RR_FAST_FORWARD_H_

0 comments on commit 743aafe

Please sign in to comment.