diff --git a/CMakeLists.txt b/CMakeLists.txt index 22ef4ff3c36..55436b4d7de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -225,6 +225,16 @@ set(REQUIRED_LIBS zlib ) +option(usebpf "Enable bpf acceleration") + +if(usebpf) + add_definitions(-DUSEBPF=1) + set(REQUIRED_LIBS + ${REQUIRED_LIBS} + libbpf + ) +endif(usebpf) + foreach(required_lib ${REQUIRED_LIBS}) string(TOUPPER ${required_lib} PKG) if(NOT SKIP_PKGCONFIG) @@ -679,6 +689,19 @@ post_build_executable(rr) set(RR_BIN rr) add_dependencies(rr Generated) +if(usebpf) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/async_event_filter.c + COMMAND clang -g -target bpf -Wall -O2 -c ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/async_event_filter.c -o ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o) + + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o + DESTINATION ${CMAKE_INSTALL_DATADIR}/rr) + + add_custom_target(BPF DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o) + + add_dependencies(rr BPF) +endif() + option(strip "Strip debug info from rr binary") set(RR_MAIN_LINKER_FLAGS ${LINKER_FLAGS}) @@ -711,12 +734,13 @@ endif() target_link_libraries(rr ${CMAKE_DL_LIBS} ${ZLIB_LDFLAGS} + ${LIBBPF_LDFLAGS} brotli ) if(staticlibs) # Urgh ... this might not work for everyone, but there doesn't seem to be - # a way to persuade pkg-confing/pkg_check_modules to produce the right flags + # a way to persuade pkg-config/pkg_check_modules to produce the right flags target_link_libraries(rr -L/home/roc/lib -l:libcapnp.a -l:libkj.a) # Note that this works for both clang++ and g++ set(RR_MAIN_LINKER_FLAGS "-static-libstdc++ ${RR_MAIN_LINKER_FLAGS}") diff --git a/src/PerfCounters.cc b/src/PerfCounters.cc index 31d2ae7f665..6a4fa246c3c 100644 --- a/src/PerfCounters.cc +++ b/src/PerfCounters.cc @@ -17,6 +17,11 @@ #include #include +#ifdef USEBPF +#include +#include +#endif + #include #include #include @@ -954,6 +959,7 @@ void PerfCounters::close() { fd_minus_ticks_measure.close(); fd_useless_counter.close(); fd_ticks_in_transaction.close(); + fd_async_signal_accelerator.close(); } Ticks PerfCounters::stop(Task* t, Error* error) { @@ -980,6 +986,7 @@ Ticks PerfCounters::stop(Task* t, Error* error) { if (pt_state) { infallible_perf_event_disable_if_open(pt_state->pt_perf_event_fd); } + infallible_perf_event_disable_if_open(fd_async_signal_accelerator); } return ticks; } @@ -1090,4 +1097,96 @@ Ticks PerfCounters::read_ticks(Task* t, Error* error) { return ret; } +#ifdef USEBPF +bool PerfCounters::accelerate_async_signal(const Registers& regs) { + static int initialized; + static struct perf_event_attr attr; + static int bpf_prog_fd; + static struct user_regs_struct* bpf_regs; + + if (!fd_async_signal_accelerator.is_open()) { + if (initialized == 0) { + initialized = -1; + + attr.type = PERF_TYPE_BREAKPOINT; + attr.size = sizeof(attr); + attr.bp_type = HW_BREAKPOINT_X; + attr.bp_len = sizeof(long); + attr.sample_period = 1; + attr.sample_type = PERF_SAMPLE_IP; + attr.pinned = 1; + attr.exclude_kernel = 1; + attr.exclude_hv = 1; + attr.wakeup_events = 1; + attr.precise_ip = 3; + attr.disabled = 1; + + libbpf_set_strict_mode(LIBBPF_STRICT_DIRECT_ERRS); + string path = resource_path() + "share/rr/async_event_filter.o"; + struct bpf_object* obj = bpf_object__open(path.c_str()); + if ((intptr_t)obj <= 0) { + return false; + } + if (bpf_object__load(obj) < 0) { + return false; + } + int bpf_map_fd = ScopedFd(bpf_object__find_map_fd_by_name(obj, "registers")); + if (bpf_map_fd < 0) { + return false; + } + struct bpf_program* prog = bpf_program__next(NULL, obj); + if (!prog) { + return false; + } + bpf_prog_fd = bpf_program__fd(prog); + if (bpf_prog_fd < 0) { + return false; + } + + bpf_regs = (struct user_regs_struct*) + mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_SHARED, bpf_map_fd, 0); + if (!bpf_regs) { + return false; + } + + initialized = 1; + } else if (initialized < 0) { + return false; + } + + attr.bp_addr = 0; + fd_async_signal_accelerator = start_counter(tid, -1, &attr); + + struct f_owner_ex own; + own.type = F_OWNER_TID; + own.pid = tid; + if (fcntl(fd_async_signal_accelerator, F_SETOWN_EX, &own)) { + FATAL() << "Failed to SETOWN_EX bpf-accelerated breakpoint fd"; + } + + make_counter_async(fd_async_signal_accelerator, SIGTRAP); + + if (ioctl(fd_async_signal_accelerator, PERF_EVENT_IOC_SET_BPF, bpf_prog_fd)) { + FATAL() << "Failed PERF_EVENT_IOC_SET_BPF"; + } + } + + if (!fd_async_signal_accelerator.is_open()) { + return false; + } + + attr.bp_addr = regs.ip().register_value(); + if (ioctl(fd_async_signal_accelerator, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr)) { + FATAL() << "Failed PERF_EVENT_IOC_MODIFY_ATTRIBUTES"; + } + + auto r = regs.get_ptrace(); + memcpy(bpf_regs, &r, sizeof(struct user_regs_struct)); + + infallible_perf_event_enable_if_open(fd_async_signal_accelerator); + return true; +} +#endif + } // namespace rr diff --git a/src/PerfCounters.h b/src/PerfCounters.h index 744c9d6daad..f8bdc64cad6 100644 --- a/src/PerfCounters.h +++ b/src/PerfCounters.h @@ -21,6 +21,7 @@ namespace rr { +class Registers; class Task; enum TicksSemantics { @@ -175,6 +176,17 @@ class PerfCounters { */ static void start_pt_copy_thread(); + /** + * Try to use BPF to accelerate async signal processing + */ +#ifdef USEBPF + bool accelerate_async_signal(const Registers& regs); +#else + bool accelerate_async_signal(const Registers&) { + return false; + } +#endif + private: template void reset_arch_extras(); @@ -212,6 +224,9 @@ class PerfCounters { // aarch64 specific counter to detect use of ll/sc instructions ScopedFd fd_strex_counter; + // BPF-enabled hardware breakpoint for fast async signal emulation. + ScopedFd fd_async_signal_accelerator; + std::unique_ptr pt_state; TicksSemantics ticks_semantics_; diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index 0f041270e31..7fa20683e9e 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -982,6 +982,7 @@ Completion ReplaySession::emulate_async_signal( * be dealt with. */ bool pending_SIGTRAP = false; bool did_set_internal_breakpoints = false; + bool did_set_bpf_breakpoint = false; RunCommand SIGTRAP_run_command = RUN_CONTINUE; /* Step 2: more slowly, find our way to the target ticks and @@ -1042,17 +1043,19 @@ Completion ReplaySession::emulate_async_signal( // breakpoint instruction in the tracee would have triggered a // deterministic signal instead of an async one. // So we must have hit our internal breakpoint. - ASSERT(t, did_set_internal_breakpoints); + ASSERT(t, did_set_internal_breakpoints || did_set_bpf_breakpoint); // We didn't do an internal singlestep, and if we'd done a // user-requested singlestep we would have hit the above case. ASSERT(t, !trap_reasons.singlestep); - if (t->ip().undo_executed_bkpt(t->arch()) == in_syscallbuf_syscall_hook) { - t->vm()->remove_breakpoint(ip, BKPT_INTERNAL); - t->vm()->remove_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL); - t->move_ip_before_breakpoint(); - return COMPLETE; + if (did_set_internal_breakpoints) { + if (t->ip().undo_executed_bkpt(t->arch()) == in_syscallbuf_syscall_hook) { + t->vm()->remove_breakpoint(ip, BKPT_INTERNAL); + t->vm()->remove_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL); + t->move_ip_before_breakpoint(); + return COMPLETE; + } + ASSERT(t, regs.ip() == t->ip().undo_executed_bkpt(t->arch())); } - ASSERT(t, regs.ip() == t->ip().undo_executed_bkpt(t->arch())); /* Case (1) above: cover the tracks of * our internal breakpoint, and go * check again if we're at the @@ -1060,7 +1063,9 @@ Completion ReplaySession::emulate_async_signal( LOG(debug) << " trap was for target $ip"; pending_SIGTRAP = false; - t->move_ip_before_breakpoint(); + if (did_set_internal_breakpoints) { + t->move_ip_before_breakpoint(); + } /* We just backed up the $ip, but * rewound it over an |int $3| * instruction, which couldn't have @@ -1093,6 +1098,7 @@ Completion ReplaySession::emulate_async_signal( } did_set_internal_breakpoints = false; } + did_set_bpf_breakpoint = false; if (at_target) { /* Case (2) above: done. */ @@ -1117,11 +1123,16 @@ Completion ReplaySession::emulate_async_signal( * no slower than single-stepping our way to * the target execution point. */ LOG(debug) << " breaking on target $ip"; - t->vm()->add_breakpoint(ip, BKPT_INTERNAL); - if (in_syscallbuf_syscall_hook) { - t->vm()->add_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL); + if (is_x86_string_instruction_at(t, ip) || !t->hpc.accelerate_async_signal(regs)) { + t->vm()->add_breakpoint(ip, BKPT_INTERNAL); + + if (in_syscallbuf_syscall_hook) { + t->vm()->add_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL); + } + did_set_internal_breakpoints = true; + } else { + did_set_bpf_breakpoint = true; } - did_set_internal_breakpoints = true; continue_or_step(t, constraints, RESUME_UNLIMITED_TICKS); SIGTRAP_run_command = constraints.command; } else { diff --git a/src/Task.cc b/src/Task.cc index 8952929dc94..fbb962668ad 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -1356,6 +1356,9 @@ TrapReasons Task::compute_trap_reasons() { << " expected breakpoint at " << ip_at_breakpoint << ", got siginfo " << si; } + // If we got a SIGTRAP via a FASYNC signal it must be our bpf-enabled + // hardware breakpoint. + reasons.breakpoint |= si.si_code == SI_SIGIO; } return reasons; } diff --git a/src/bpf/async_event_filter.c b/src/bpf/async_event_filter.c new file mode 100644 index 00000000000..bd5d5cbb3c0 --- /dev/null +++ b/src/bpf/async_event_filter.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +const uint32_t REGISTER_COUNT = sizeof(struct pt_regs)/sizeof(uint64_t); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, REGISTER_COUNT); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, uint32_t); + __type(value, uint64_t); +} registers SEC(".maps"); + +SEC("perf_event") +int match_registers(struct bpf_perf_event_data* event) { +#define CHECK_REG(name) \ + { \ + const uint32_t i = offsetof(struct pt_regs, name) / sizeof(uint64_t); \ + uint64_t* reg = bpf_map_lookup_elem(®isters, &i); \ + if (!reg) { \ + return 1; \ + } \ + if (event->regs.name != *reg) { \ + return 0; \ + } \ + } + + CHECK_REG(r15) + CHECK_REG(r14) + CHECK_REG(r13) + CHECK_REG(r12) + CHECK_REG(rbp) + CHECK_REG(rbx) + CHECK_REG(r11) + CHECK_REG(r10) + CHECK_REG(r9) + CHECK_REG(r8) + CHECK_REG(rax) + CHECK_REG(rcx) + CHECK_REG(rdx) + CHECK_REG(rsi) + CHECK_REG(rdi) + CHECK_REG(rip) + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/src/fast_forward.cc b/src/fast_forward.cc index 1eeeb86dc1e..30531c60ee2 100644 --- a/src/fast_forward.cc +++ b/src/fast_forward.cc @@ -404,7 +404,7 @@ static int fallible_read_byte(Task* t, remote_ptr ip) { return byte; } -bool is_string_instruction_at(Task* t, remote_code_ptr ip) { +bool is_x86_string_instruction_at(Task* t, remote_code_ptr ip) { bool found_rep = false; remote_ptr bare_ip = ip.to_data_ptr(); while (true) { @@ -447,7 +447,7 @@ bool maybe_at_or_after_x86_string_instruction(Task* t) { return false; } - return is_string_instruction_at(t, t->ip()) || + return is_x86_string_instruction_at(t, t->ip()) || is_string_instruction_before(t, t->ip()); } @@ -456,7 +456,7 @@ bool at_x86_string_instruction(Task* t) { return false; } - return is_string_instruction_at(t, t->ip()); + return is_x86_string_instruction_at(t, t->ip()); } } // namespace rr diff --git a/src/fast_forward.h b/src/fast_forward.h index 944d93c8ffa..c0398a6889d 100644 --- a/src/fast_forward.h +++ b/src/fast_forward.h @@ -60,6 +60,14 @@ bool maybe_at_or_after_x86_string_instruction(Task* t); /* Return true if the instruction at t->ip() is a string instruction */ bool at_x86_string_instruction(Task* t); +#if defined(__i386__) || defined(__x86_64__) +bool is_x86_string_instruction_at(Task* t, remote_code_ptr ip); +#else +bool is_x86_string_instruction_at(Task*, remote_code_ptr) { + return false; +} +#endif + } // namespace rr #endif // RR_FAST_FORWARD_H_