diff --git a/CMakeLists.txt b/CMakeLists.txt index 0327d605641..15158548e33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -237,6 +237,16 @@ if(NOT ANDROID) add_definitions(-DZSTD=1) endif() +option(bpf "Enable bpf acceleration") + +if(bpf) + add_definitions(-DBPF=1) + set(REQUIRED_LIBS + ${REQUIRED_LIBS} + libbpf + ) +endif(bpf) + foreach(required_lib ${REQUIRED_LIBS}) string(TOUPPER ${required_lib} PKG) if(NOT SKIP_PKGCONFIG) @@ -692,6 +702,19 @@ post_build_executable(rr) set(RR_BIN rr) add_dependencies(rr Generated) +if(bpf) + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/async_event_filter.c + COMMAND clang -g -target bpf -Wall -O2 -c ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/async_event_filter.c -o ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o) + + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o + DESTINATION ${CMAKE_INSTALL_DATADIR}/rr) + + add_custom_target(BPF DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o) + + add_dependencies(rr BPF) +endif() + option(strip "Strip debug info from rr binary") set(RR_MAIN_LINKER_FLAGS ${LINKER_FLAGS}) @@ -724,6 +747,7 @@ endif() target_link_libraries(rr ${CMAKE_DL_LIBS} ${ZLIB_LDFLAGS} + ${LIBBPF_LDFLAGS} brotli ) @@ -733,7 +757,7 @@ endif() if(staticlibs) # Urgh ... this might not work for everyone, but there doesn't seem to be - # a way to persuade pkg-confing/pkg_check_modules to produce the right flags + # a way to persuade pkg-config/pkg_check_modules to produce the right flags target_link_libraries(rr -L/home/roc/lib -l:libcapnp.a -l:libkj.a) # Note that this works for both clang++ and g++ set(RR_MAIN_LINKER_FLAGS "-static-libstdc++ ${RR_MAIN_LINKER_FLAGS}") diff --git a/src/PerfCounters.cc b/src/PerfCounters.cc index 85ef2fd9c7b..294776e6558 100644 --- a/src/PerfCounters.cc +++ b/src/PerfCounters.cc @@ -17,6 +17,11 @@ #include #include +#ifdef BPF +#include +#include +#endif + #include #include #include @@ -981,6 +986,7 @@ void PerfCounters::close() { fd_minus_ticks_measure.close(); fd_useless_counter.close(); fd_ticks_in_transaction.close(); + fd_async_signal_accelerator.close(); } Ticks PerfCounters::stop(Task* t, Error* error) { @@ -1007,6 +1013,7 @@ Ticks PerfCounters::stop(Task* t, Error* error) { if (pt_state) { infallible_perf_event_disable_if_open(pt_state->pt_perf_event_fd); } + infallible_perf_event_disable_if_open(fd_async_signal_accelerator); } return ticks; } @@ -1120,4 +1127,183 @@ Ticks PerfCounters::read_ticks(Task* t, Error* error) { return ret; } +#ifdef BPF +class BpfAccelerator { +public: + static std::shared_ptr get_or_create(); + + ScopedFd create_counter(pid_t tid); + void match_regs_and_open_counter(const Registers& regs, ScopedFd& counter); + uint64_t skips() const { + return *bpf_skips; + } + + // Can't be private because of make_shared. + BpfAccelerator(struct bpf_object* bpf_obj, int bpf_prog_fd, + user_regs_struct* bpf_regs, uint64_t* bpf_skips) + : bpf_obj(bpf_obj), bpf_prog_fd(bpf_prog_fd), bpf_regs(bpf_regs), bpf_skips(bpf_skips) + {} + + ~BpfAccelerator() { + munmap(bpf_skips, 4096); + munmap(bpf_regs, 4096); + bpf_object__close(bpf_obj); + } + +private: + static std::shared_ptr singleton; + + struct perf_event_attr attr; + struct bpf_object* bpf_obj; + // Not a ScopedFd because the bpf_object maintains ownership. + int bpf_prog_fd; + user_regs_struct* bpf_regs; + uint64_t* bpf_skips; +}; + +std::shared_ptr BpfAccelerator::singleton; + +/* static */ std::shared_ptr BpfAccelerator::get_or_create() { + static int initialized; + if (BpfAccelerator::singleton) { + return BpfAccelerator::singleton; + } + + if (!initialized) { + initialized = -1; + + libbpf_set_strict_mode(LIBBPF_STRICT_DIRECT_ERRS); + string path = resource_path() + "share/rr/async_event_filter.o"; + struct bpf_object* obj = bpf_object__open(path.c_str()); + if ((intptr_t)obj <= 0) { + LOG(error) << "Failed to find bpf at " << path; + return nullptr; + } + if (bpf_object__load(obj) < 0) { + LOG(error) << "Failed to load bpf at " << path << " into the kernel. Do we have permissions?"; + bpf_object__close(obj); + return nullptr; + } + int bpf_map_fd = bpf_object__find_map_fd_by_name(obj, "registers"); + if (bpf_map_fd < 0) { + CLEAN_FATAL() << "rr's bpf at " << path << " is corrupt"; + return nullptr; + } + struct bpf_program* prog = bpf_program__next(NULL, obj); + if (!prog) { + CLEAN_FATAL() << "rr's bpf at " << path << " is corrupt"; + return nullptr; + } + int bpf_prog_fd = bpf_program__fd(prog); + if (bpf_prog_fd < 0) { + CLEAN_FATAL() << "rr's bpf at " << path << " is corrupt"; + return nullptr; + } + + auto bpf_regs = (struct user_regs_struct*) + mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_SHARED, bpf_map_fd, 0); + if (bpf_regs == MAP_FAILED) { + CLEAN_FATAL() << "Failed to mmap bpf maps"; + return nullptr; + } + + bpf_map_fd = bpf_object__find_map_fd_by_name(obj, "skips"); + if (bpf_map_fd < 0) { + CLEAN_FATAL() << "rr's bpf at " << path << " is corrupt"; + return nullptr; + } + + auto bpf_skips = (uint64_t*) + mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_SHARED, bpf_map_fd, 0); + if (bpf_regs == MAP_FAILED) { + CLEAN_FATAL() << "Failed to mmap bpf maps"; + return nullptr; + } + + BpfAccelerator::singleton = + std::make_shared(obj, bpf_prog_fd, bpf_regs, bpf_skips); + memset(&singleton->attr, 0, sizeof(singleton->attr)); + singleton->attr.type = PERF_TYPE_BREAKPOINT; + singleton->attr.size = sizeof(attr); + singleton->attr.bp_type = HW_BREAKPOINT_X; + singleton->attr.bp_len = sizeof(long); + singleton->attr.sample_period = 1; + singleton->attr.sample_type = PERF_SAMPLE_IP; + singleton->attr.pinned = 1; + singleton->attr.exclude_kernel = 1; + singleton->attr.exclude_hv = 1; + singleton->attr.wakeup_events = 1; + singleton->attr.precise_ip = 3; + singleton->attr.disabled = 1; + initialized = 1; + } + + return BpfAccelerator::singleton; +} + +ScopedFd BpfAccelerator::create_counter(pid_t tid) { + attr.bp_addr = 0; + ScopedFd fd = start_counter(tid, -1, &attr); + + struct f_owner_ex own; + own.type = F_OWNER_TID; + own.pid = tid; + if (fcntl(fd, F_SETOWN_EX, &own)) { + FATAL() << "Failed to SETOWN_EX bpf-accelerated breakpoint fd"; + } + + make_counter_async(fd, SIGTRAP); + + if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, bpf_prog_fd)) { + FATAL() << "Failed PERF_EVENT_IOC_SET_BPF"; + } + + return fd; +} + +void BpfAccelerator::match_regs_and_open_counter(const Registers& regs, ScopedFd& fd) { + attr.bp_addr = regs.ip().register_value(); + if (ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr)) { + FATAL() << "Failed PERF_EVENT_IOC_MODIFY_ATTRIBUTES"; + } + + auto r = regs.get_ptrace(); + memcpy(bpf_regs, &r, sizeof(struct user_regs_struct)); + *bpf_skips = 0; + + infallible_perf_event_enable_if_open(fd); +} + +bool PerfCounters::accelerate_async_signal(const Registers& regs) { + if (!fd_async_signal_accelerator.is_open()) { + if (!bpf) { + bpf = BpfAccelerator::get_or_create(); + } + + if (!bpf) { + return false; + } + + fd_async_signal_accelerator = bpf->create_counter(tid); + } + + if (!fd_async_signal_accelerator.is_open()) { + return false; + } + + bpf->match_regs_and_open_counter(regs, fd_async_signal_accelerator); + return true; +} + +uint64_t PerfCounters::bpf_skips() const { + if (!bpf) { + return 0; + } + + return bpf->skips(); +} +#endif + } // namespace rr diff --git a/src/PerfCounters.h b/src/PerfCounters.h index 744c9d6daad..c9982f6a1dd 100644 --- a/src/PerfCounters.h +++ b/src/PerfCounters.h @@ -21,7 +21,9 @@ namespace rr { +class Registers; class Task; +class BpfAccelerator; enum TicksSemantics { TICKS_RETIRED_CONDITIONAL_BRANCHES, @@ -175,6 +177,21 @@ class PerfCounters { */ static void start_pt_copy_thread(); + /** + * Try to use BPF to accelerate async signal processing + */ +#ifdef BPF + bool accelerate_async_signal(const Registers& regs); + uint64_t bpf_skips() const; +#else + bool accelerate_async_signal(const Registers&) { + return false; + } + uint64_t bpf_skips() const { + return 0; + } +#endif + private: template void reset_arch_extras(); @@ -212,6 +229,11 @@ class PerfCounters { // aarch64 specific counter to detect use of ll/sc instructions ScopedFd fd_strex_counter; + // BPF-enabled hardware breakpoint for fast async signal emulation. + ScopedFd fd_async_signal_accelerator; + + std::shared_ptr bpf; + std::unique_ptr pt_state; TicksSemantics ticks_semantics_; diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index 40da2d5d009..6c42c896a56 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -982,6 +982,7 @@ Completion ReplaySession::emulate_async_signal( * be dealt with. */ bool pending_SIGTRAP = false; bool did_set_internal_breakpoints = false; + bool did_set_bpf_breakpoint = false; RunCommand SIGTRAP_run_command = RUN_CONTINUE; /* Step 2: more slowly, find our way to the target ticks and @@ -1042,17 +1043,21 @@ Completion ReplaySession::emulate_async_signal( // breakpoint instruction in the tracee would have triggered a // deterministic signal instead of an async one. // So we must have hit our internal breakpoint. - ASSERT(t, did_set_internal_breakpoints); + ASSERT(t, did_set_internal_breakpoints || did_set_bpf_breakpoint); // We didn't do an internal singlestep, and if we'd done a // user-requested singlestep we would have hit the above case. ASSERT(t, !trap_reasons.singlestep); - if (t->ip().undo_executed_bkpt(t->arch()) == in_syscallbuf_syscall_hook) { - t->vm()->remove_breakpoint(ip, BKPT_INTERNAL); - t->vm()->remove_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL); - t->move_ip_before_breakpoint(); - return COMPLETE; + if (did_set_internal_breakpoints) { + if (t->ip().undo_executed_bkpt(t->arch()) == in_syscallbuf_syscall_hook) { + t->vm()->remove_breakpoint(ip, BKPT_INTERNAL); + t->vm()->remove_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL); + t->move_ip_before_breakpoint(); + return COMPLETE; + } + ASSERT(t, regs.ip() == t->ip().undo_executed_bkpt(t->arch())); + } else { + LOG(debug) << " fast-forwarded through " << t->hpc.bpf_skips() << " breakpoint hits with bpf"; } - ASSERT(t, regs.ip() == t->ip().undo_executed_bkpt(t->arch())); /* Case (1) above: cover the tracks of * our internal breakpoint, and go * check again if we're at the @@ -1060,7 +1065,9 @@ Completion ReplaySession::emulate_async_signal( LOG(debug) << " trap was for target $ip"; pending_SIGTRAP = false; - t->move_ip_before_breakpoint(); + if (did_set_internal_breakpoints) { + t->move_ip_before_breakpoint(); + } /* We just backed up the $ip, but * rewound it over an |int $3| * instruction, which couldn't have @@ -1093,6 +1100,7 @@ Completion ReplaySession::emulate_async_signal( } did_set_internal_breakpoints = false; } + did_set_bpf_breakpoint = false; if (at_target) { /* Case (2) above: done. */ @@ -1117,11 +1125,16 @@ Completion ReplaySession::emulate_async_signal( * no slower than single-stepping our way to * the target execution point. */ LOG(debug) << " breaking on target $ip"; - t->vm()->add_breakpoint(ip, BKPT_INTERNAL); - if (in_syscallbuf_syscall_hook) { - t->vm()->add_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL); + if (is_x86_string_instruction_at(t, ip) || !t->hpc.accelerate_async_signal(regs)) { + t->vm()->add_breakpoint(ip, BKPT_INTERNAL); + + if (in_syscallbuf_syscall_hook) { + t->vm()->add_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL); + } + did_set_internal_breakpoints = true; + } else { + did_set_bpf_breakpoint = true; } - did_set_internal_breakpoints = true; continue_or_step(t, constraints, RESUME_UNLIMITED_TICKS); SIGTRAP_run_command = constraints.command; } else { diff --git a/src/Task.cc b/src/Task.cc index 5ffae6915ea..728668133b8 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -1387,6 +1387,9 @@ TrapReasons Task::compute_trap_reasons() { << " expected breakpoint at " << ip_at_breakpoint << ", got siginfo " << si; } + // If we got a SIGTRAP via a FASYNC signal it must be our bpf-enabled + // hardware breakpoint. + reasons.breakpoint |= si.si_code == SI_SIGIO; } return reasons; } diff --git a/src/bpf/async_event_filter.c b/src/bpf/async_event_filter.c new file mode 100644 index 00000000000..7ee2f997b49 --- /dev/null +++ b/src/bpf/async_event_filter.c @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include +#include +#include +#include + +const uint32_t REGISTER_COUNT = sizeof(struct pt_regs)/sizeof(uint64_t); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, REGISTER_COUNT); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, uint32_t); + __type(value, uint64_t); +} registers SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, uint32_t); + __type(value, uint64_t); +} skips SEC(".maps"); + +SEC("perf_event") +int match_registers(struct bpf_perf_event_data* event) { +#define CHECK_REG(name) \ + do { \ + const uint32_t i = offsetof(struct pt_regs, name) / sizeof(uint64_t); \ + uint64_t* reg = bpf_map_lookup_elem(®isters, &i); \ + if (!reg) { \ + return 1; \ + } \ + if (event->regs.name != *reg) { \ + const uint32_t j = 0; \ + uint64_t* s = bpf_map_lookup_elem(&skips, &j); \ + if (s) { \ + *s += 1; \ + } \ + return 0; \ + } \ + } while(0) + + CHECK_REG(r15); + CHECK_REG(r14); + CHECK_REG(r13); + CHECK_REG(r12); + CHECK_REG(rbp); + CHECK_REG(rbx); + CHECK_REG(r11); + CHECK_REG(r10); + CHECK_REG(r9); + CHECK_REG(r8); + CHECK_REG(rax); + CHECK_REG(rcx); + CHECK_REG(rdx); + CHECK_REG(rsi); + CHECK_REG(rdi); + CHECK_REG(rsp); + + return 1; +} + +char _license[] SEC("license") = "Dual MIT/GPL"; diff --git a/src/fast_forward.cc b/src/fast_forward.cc index 1eeeb86dc1e..4bc371055eb 100644 --- a/src/fast_forward.cc +++ b/src/fast_forward.cc @@ -404,7 +404,8 @@ static int fallible_read_byte(Task* t, remote_ptr ip) { return byte; } -bool is_string_instruction_at(Task* t, remote_code_ptr ip) { +#if defined(__i386__) || defined(__x86_64__) +bool is_x86_string_instruction_at(Task* t, remote_code_ptr ip) { bool found_rep = false; remote_ptr bare_ip = ip.to_data_ptr(); while (true) { @@ -421,6 +422,7 @@ bool is_string_instruction_at(Task* t, remote_code_ptr ip) { ++bare_ip; } } +#endif static bool is_string_instruction_before(Task* t, remote_code_ptr ip) { remote_ptr bare_ip = ip.to_data_ptr(); @@ -447,7 +449,7 @@ bool maybe_at_or_after_x86_string_instruction(Task* t) { return false; } - return is_string_instruction_at(t, t->ip()) || + return is_x86_string_instruction_at(t, t->ip()) || is_string_instruction_before(t, t->ip()); } @@ -456,7 +458,7 @@ bool at_x86_string_instruction(Task* t) { return false; } - return is_string_instruction_at(t, t->ip()); + return is_x86_string_instruction_at(t, t->ip()); } } // namespace rr diff --git a/src/fast_forward.h b/src/fast_forward.h index 944d93c8ffa..18620689b98 100644 --- a/src/fast_forward.h +++ b/src/fast_forward.h @@ -60,6 +60,14 @@ bool maybe_at_or_after_x86_string_instruction(Task* t); /* Return true if the instruction at t->ip() is a string instruction */ bool at_x86_string_instruction(Task* t); +#if defined(__i386__) || defined(__x86_64__) +bool is_x86_string_instruction_at(Task* t, remote_code_ptr ip); +#else +inline bool is_x86_string_instruction_at(Task*, remote_code_ptr) { + return false; +} +#endif + } // namespace rr #endif // RR_FAST_FORWARD_H_