Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a path for BPF-accelerated async signal emulation. #3731

Merged
merged 1 commit into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,16 @@ if(NOT ANDROID)
add_definitions(-DZSTD=1)
endif()

option(bpf "Enable bpf acceleration")

if(bpf)
add_definitions(-DBPF=1)
set(REQUIRED_LIBS
${REQUIRED_LIBS}
libbpf
)
endif(bpf)

foreach(required_lib ${REQUIRED_LIBS})
string(TOUPPER ${required_lib} PKG)
if(NOT SKIP_PKGCONFIG)
Expand Down Expand Up @@ -692,6 +702,19 @@ post_build_executable(rr)
set(RR_BIN rr)
add_dependencies(rr Generated)

if(bpf)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/async_event_filter.c
COMMAND clang -g -target bpf -Wall -O2 -c ${CMAKE_CURRENT_SOURCE_DIR}/src/bpf/async_event_filter.c -o ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o)

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o
DESTINATION ${CMAKE_INSTALL_DATADIR}/rr)

add_custom_target(BPF DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/share/rr/async_event_filter.o)

add_dependencies(rr BPF)
endif()

option(strip "Strip debug info from rr binary")

set(RR_MAIN_LINKER_FLAGS ${LINKER_FLAGS})
Expand Down Expand Up @@ -724,6 +747,7 @@ endif()
target_link_libraries(rr
${CMAKE_DL_LIBS}
${ZLIB_LDFLAGS}
${LIBBPF_LDFLAGS}
brotli
)

Expand All @@ -733,7 +757,7 @@ endif()

if(staticlibs)
# Urgh ... this might not work for everyone, but there doesn't seem to be
# a way to persuade pkg-confing/pkg_check_modules to produce the right flags
# a way to persuade pkg-config/pkg_check_modules to produce the right flags
target_link_libraries(rr -L/home/roc/lib -l:libcapnp.a -l:libkj.a)
# Note that this works for both clang++ and g++
set(RR_MAIN_LINKER_FLAGS "-static-libstdc++ ${RR_MAIN_LINKER_FLAGS}")
Expand Down
186 changes: 186 additions & 0 deletions src/PerfCounters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
#include <unistd.h>
#include <time.h>

#ifdef BPF
#include <bpf/libbpf.h>
#include <linux/hw_breakpoint.h>
#endif

#include <algorithm>
#include <fstream>
#include <limits>
Expand Down Expand Up @@ -981,6 +986,7 @@ void PerfCounters::close() {
fd_minus_ticks_measure.close();
fd_useless_counter.close();
fd_ticks_in_transaction.close();
fd_async_signal_accelerator.close();
}

Ticks PerfCounters::stop(Task* t, Error* error) {
Expand All @@ -1007,6 +1013,7 @@ Ticks PerfCounters::stop(Task* t, Error* error) {
if (pt_state) {
infallible_perf_event_disable_if_open(pt_state->pt_perf_event_fd);
}
infallible_perf_event_disable_if_open(fd_async_signal_accelerator);
}
return ticks;
}
Expand Down Expand Up @@ -1120,4 +1127,183 @@ Ticks PerfCounters::read_ticks(Task* t, Error* error) {
return ret;
}

#ifdef BPF
class BpfAccelerator {
public:
static std::shared_ptr<BpfAccelerator> get_or_create();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking we could just create one BpfAccelerator in ReplaySession and copy the reference when we clone ReplaySessions so we don't need a static variable here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not convinced this is a great idea. It means moving BpfAccelerator into the header so ReplaySession can get at it. Is that really better than a static singleton?


ScopedFd create_counter(pid_t tid);
void match_regs_and_open_counter(const Registers& regs, ScopedFd& counter);
uint64_t skips() const {
return *bpf_skips;
}

// Can't be private because of make_shared.
BpfAccelerator(struct bpf_object* bpf_obj, int bpf_prog_fd,
user_regs_struct* bpf_regs, uint64_t* bpf_skips)
: bpf_obj(bpf_obj), bpf_prog_fd(bpf_prog_fd), bpf_regs(bpf_regs), bpf_skips(bpf_skips)
{}

~BpfAccelerator() {
munmap(bpf_skips, 4096);
munmap(bpf_regs, 4096);
bpf_object__close(bpf_obj);
}

private:
static std::shared_ptr<BpfAccelerator> singleton;

struct perf_event_attr attr;
struct bpf_object* bpf_obj;
// Not a ScopedFd because the bpf_object maintains ownership.
int bpf_prog_fd;
user_regs_struct* bpf_regs;
uint64_t* bpf_skips;
};

std::shared_ptr<BpfAccelerator> BpfAccelerator::singleton;

/* static */ std::shared_ptr<BpfAccelerator> BpfAccelerator::get_or_create() {
static int initialized;
if (BpfAccelerator::singleton) {
return BpfAccelerator::singleton;
}

if (!initialized) {
initialized = -1;

libbpf_set_strict_mode(LIBBPF_STRICT_DIRECT_ERRS);
string path = resource_path() + "share/rr/async_event_filter.o";
struct bpf_object* obj = bpf_object__open(path.c_str());
khuey marked this conversation as resolved.
Show resolved Hide resolved
if ((intptr_t)obj <= 0) {
LOG(error) << "Failed to find bpf at " << path;
return nullptr;
khuey marked this conversation as resolved.
Show resolved Hide resolved
}
if (bpf_object__load(obj) < 0) {
LOG(error) << "Failed to load bpf at " << path << " into the kernel. Do we have permissions?";
bpf_object__close(obj);
return nullptr;
}
int bpf_map_fd = bpf_object__find_map_fd_by_name(obj, "registers");
if (bpf_map_fd < 0) {
CLEAN_FATAL() << "rr's bpf at " << path << " is corrupt";
return nullptr;
}
struct bpf_program* prog = bpf_program__next(NULL, obj);
if (!prog) {
CLEAN_FATAL() << "rr's bpf at " << path << " is corrupt";
return nullptr;
}
int bpf_prog_fd = bpf_program__fd(prog);
if (bpf_prog_fd < 0) {
CLEAN_FATAL() << "rr's bpf at " << path << " is corrupt";
return nullptr;
}

auto bpf_regs = (struct user_regs_struct*)
mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_SHARED, bpf_map_fd, 0);
if (bpf_regs == MAP_FAILED) {
CLEAN_FATAL() << "Failed to mmap bpf maps";
return nullptr;
}

bpf_map_fd = bpf_object__find_map_fd_by_name(obj, "skips");
if (bpf_map_fd < 0) {
CLEAN_FATAL() << "rr's bpf at " << path << " is corrupt";
return nullptr;
}

auto bpf_skips = (uint64_t*)
mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_SHARED, bpf_map_fd, 0);
if (bpf_regs == MAP_FAILED) {
CLEAN_FATAL() << "Failed to mmap bpf maps";
return nullptr;
}

BpfAccelerator::singleton =
std::make_shared<BpfAccelerator>(obj, bpf_prog_fd, bpf_regs, bpf_skips);
memset(&singleton->attr, 0, sizeof(singleton->attr));
singleton->attr.type = PERF_TYPE_BREAKPOINT;
singleton->attr.size = sizeof(attr);
singleton->attr.bp_type = HW_BREAKPOINT_X;
singleton->attr.bp_len = sizeof(long);
singleton->attr.sample_period = 1;
singleton->attr.sample_type = PERF_SAMPLE_IP;
singleton->attr.pinned = 1;
singleton->attr.exclude_kernel = 1;
singleton->attr.exclude_hv = 1;
singleton->attr.wakeup_events = 1;
singleton->attr.precise_ip = 3;
singleton->attr.disabled = 1;
initialized = 1;
}

return BpfAccelerator::singleton;
}

ScopedFd BpfAccelerator::create_counter(pid_t tid) {
attr.bp_addr = 0;
ScopedFd fd = start_counter(tid, -1, &attr);

struct f_owner_ex own;
own.type = F_OWNER_TID;
own.pid = tid;
if (fcntl(fd, F_SETOWN_EX, &own)) {
FATAL() << "Failed to SETOWN_EX bpf-accelerated breakpoint fd";
}

make_counter_async(fd, SIGTRAP);

if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, bpf_prog_fd)) {
FATAL() << "Failed PERF_EVENT_IOC_SET_BPF";
}

return fd;
}

void BpfAccelerator::match_regs_and_open_counter(const Registers& regs, ScopedFd& fd) {
attr.bp_addr = regs.ip().register_value();
if (ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr)) {
FATAL() << "Failed PERF_EVENT_IOC_MODIFY_ATTRIBUTES";
}

auto r = regs.get_ptrace();
memcpy(bpf_regs, &r, sizeof(struct user_regs_struct));
*bpf_skips = 0;

infallible_perf_event_enable_if_open(fd);
}

bool PerfCounters::accelerate_async_signal(const Registers& regs) {
if (!fd_async_signal_accelerator.is_open()) {
if (!bpf) {
bpf = BpfAccelerator::get_or_create();
}

if (!bpf) {
return false;
}

fd_async_signal_accelerator = bpf->create_counter(tid);
}

if (!fd_async_signal_accelerator.is_open()) {
return false;
}

bpf->match_regs_and_open_counter(regs, fd_async_signal_accelerator);
return true;
}

uint64_t PerfCounters::bpf_skips() const {
if (!bpf) {
return 0;
}

return bpf->skips();
}
#endif

} // namespace rr
22 changes: 22 additions & 0 deletions src/PerfCounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

namespace rr {

class Registers;
class Task;
class BpfAccelerator;

enum TicksSemantics {
TICKS_RETIRED_CONDITIONAL_BRANCHES,
Expand Down Expand Up @@ -175,6 +177,21 @@ class PerfCounters {
*/
static void start_pt_copy_thread();

/**
* Try to use BPF to accelerate async signal processing
*/
#ifdef BPF
bool accelerate_async_signal(const Registers& regs);
uint64_t bpf_skips() const;
#else
bool accelerate_async_signal(const Registers&) {
return false;
}
uint64_t bpf_skips() const {
return 0;
}
#endif

private:
template <typename Arch> void reset_arch_extras();

Expand Down Expand Up @@ -212,6 +229,11 @@ class PerfCounters {
// aarch64 specific counter to detect use of ll/sc instructions
ScopedFd fd_strex_counter;

// BPF-enabled hardware breakpoint for fast async signal emulation.
ScopedFd fd_async_signal_accelerator;

std::shared_ptr<BpfAccelerator> bpf;

std::unique_ptr<PTState> pt_state;

TicksSemantics ticks_semantics_;
Expand Down
37 changes: 25 additions & 12 deletions src/ReplaySession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,7 @@ Completion ReplaySession::emulate_async_signal(
* be dealt with. */
bool pending_SIGTRAP = false;
bool did_set_internal_breakpoints = false;
bool did_set_bpf_breakpoint = false;
RunCommand SIGTRAP_run_command = RUN_CONTINUE;

/* Step 2: more slowly, find our way to the target ticks and
Expand Down Expand Up @@ -1042,25 +1043,31 @@ Completion ReplaySession::emulate_async_signal(
// breakpoint instruction in the tracee would have triggered a
// deterministic signal instead of an async one.
// So we must have hit our internal breakpoint.
ASSERT(t, did_set_internal_breakpoints);
ASSERT(t, did_set_internal_breakpoints || did_set_bpf_breakpoint);
khuey marked this conversation as resolved.
Show resolved Hide resolved
// We didn't do an internal singlestep, and if we'd done a
// user-requested singlestep we would have hit the above case.
ASSERT(t, !trap_reasons.singlestep);
if (t->ip().undo_executed_bkpt(t->arch()) == in_syscallbuf_syscall_hook) {
t->vm()->remove_breakpoint(ip, BKPT_INTERNAL);
t->vm()->remove_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL);
t->move_ip_before_breakpoint();
return COMPLETE;
if (did_set_internal_breakpoints) {
if (t->ip().undo_executed_bkpt(t->arch()) == in_syscallbuf_syscall_hook) {
t->vm()->remove_breakpoint(ip, BKPT_INTERNAL);
t->vm()->remove_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL);
t->move_ip_before_breakpoint();
return COMPLETE;
}
ASSERT(t, regs.ip() == t->ip().undo_executed_bkpt(t->arch()));
} else {
LOG(debug) << " fast-forwarded through " << t->hpc.bpf_skips() << " breakpoint hits with bpf";
}
ASSERT(t, regs.ip() == t->ip().undo_executed_bkpt(t->arch()));
/* Case (1) above: cover the tracks of
* our internal breakpoint, and go
* check again if we're at the
* target. */
LOG(debug) << " trap was for target $ip";

pending_SIGTRAP = false;
t->move_ip_before_breakpoint();
if (did_set_internal_breakpoints) {
t->move_ip_before_breakpoint();
}
/* We just backed up the $ip, but
* rewound it over an |int $3|
* instruction, which couldn't have
Expand Down Expand Up @@ -1093,6 +1100,7 @@ Completion ReplaySession::emulate_async_signal(
}
did_set_internal_breakpoints = false;
}
did_set_bpf_breakpoint = false;

if (at_target) {
/* Case (2) above: done. */
Expand All @@ -1117,11 +1125,16 @@ Completion ReplaySession::emulate_async_signal(
* no slower than single-stepping our way to
* the target execution point. */
LOG(debug) << " breaking on target $ip";
t->vm()->add_breakpoint(ip, BKPT_INTERNAL);
if (in_syscallbuf_syscall_hook) {
t->vm()->add_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL);
if (is_x86_string_instruction_at(t, ip) || !t->hpc.accelerate_async_signal(regs)) {
t->vm()->add_breakpoint(ip, BKPT_INTERNAL);

if (in_syscallbuf_syscall_hook) {
t->vm()->add_breakpoint(in_syscallbuf_syscall_hook, BKPT_INTERNAL);
}
did_set_internal_breakpoints = true;
} else {
did_set_bpf_breakpoint = true;
}
did_set_internal_breakpoints = true;
continue_or_step(t, constraints, RESUME_UNLIMITED_TICKS);
SIGTRAP_run_command = constraints.command;
} else {
Expand Down
3 changes: 3 additions & 0 deletions src/Task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,9 @@ TrapReasons Task::compute_trap_reasons() {
<< " expected breakpoint at " << ip_at_breakpoint << ", got siginfo "
<< si;
}
// If we got a SIGTRAP via a FASYNC signal it must be our bpf-enabled
// hardware breakpoint.
reasons.breakpoint |= si.si_code == SI_SIGIO;
}
return reasons;
}
Expand Down
Loading