From cd6d95e41aae5a7048f624cca38b74b195e705f2 Mon Sep 17 00:00:00 2001 From: Sidharth Kshatriya Date: Sat, 28 Sep 2024 14:09:25 +0530 Subject: [PATCH] When using `PERF_RECORD_SWITCH`, allow ContextSwitchEvent initialization to fail The tracee may have died unexpectedly or been killed. This commit allows the `cont_race` test to consistently pass when the test is run under all the following conditions: (a) kernel.perf_event_paranoid = 2 (b) kernel supports PERF_RECORD_SWITCH (Linux kernel >= 6.10) (c) syscallbuf is enabled (so here we're talking only about `cont_race` and NOT `cont_race-no-syscallbuf` which doesn't need switch events anyways) Without this commit, I noticed the occasional failure in (syscallbuf enabled) `cont_race`: [FATAL src/PerfCounterBuffers.cc:32:allocate() errno: EBADF] Can't allocate memory for PT DATA area --- src/ContextSwitchEvent.cc | 7 +++++-- src/ContextSwitchEvent.h | 2 +- src/PerfCounterBuffers.cc | 21 ++++++++++++++++++--- src/PerfCounterBuffers.h | 2 +- src/RecordTask.cc | 6 +++++- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/ContextSwitchEvent.cc b/src/ContextSwitchEvent.cc index 94aecab83af..3004bb4ca44 100644 --- a/src/ContextSwitchEvent.cc +++ b/src/ContextSwitchEvent.cc @@ -144,12 +144,15 @@ ContextSwitchEventStrategy ContextSwitchEvent::strategy() { return strat; } -void ContextSwitchEvent::init(ScopedFd tracee_fd) { +bool ContextSwitchEvent::init(ScopedFd tracee_fd) { tracee_fd_ = std::move(tracee_fd); if (strategy() == ContextSwitchEventStrategy::STRATEGY_RECORD_SWITCH) { mmap_buffer = make_unique(); - mmap_buffer->allocate(tracee_fd_, page_size(), 0); + bool ok = false; + mmap_buffer->allocate(tracee_fd_, page_size(), 0, &ok); + return ok; } + return true; } void ContextSwitchEvent::drain_events() { diff --git a/src/ContextSwitchEvent.h b/src/ContextSwitchEvent.h index 7753059f818..e9d13635238 100644 --- a/src/ContextSwitchEvent.h +++ b/src/ContextSwitchEvent.h @@ -47,7 +47,7 @@ namespace rr { */ class ContextSwitchEvent { public: - void init(ScopedFd tracee_fd); + bool init(ScopedFd tracee_fd); ScopedFd& tracee_fd() { return tracee_fd_; } diff --git a/src/PerfCounterBuffers.cc b/src/PerfCounterBuffers.cc index 68dc16b7ad4..6e6c8a201f7 100644 --- a/src/PerfCounterBuffers.cc +++ b/src/PerfCounterBuffers.cc @@ -23,13 +23,22 @@ void PerfCounterBuffers::destroy() { } void PerfCounterBuffers::allocate(ScopedFd& perf_event_fd, - uint64_t buffer_size, uint64_t aux_size) { + uint64_t buffer_size, uint64_t aux_size, bool *ok) { this->buffer_size = buffer_size; + if (ok) { + *ok = true; + } void* base = mmap(NULL, page_size() + buffer_size, PROT_READ | PROT_WRITE, MAP_SHARED, perf_event_fd, 0); if (base == MAP_FAILED) { - FATAL() << "Can't allocate memory for PT DATA area"; + const auto msg = "Can't allocate memory for PT DATA area"; + if (!ok) { + FATAL() << msg; + } + LOG(warn) << msg; + *ok = false; + return; } mmap_header = static_cast(base); @@ -40,7 +49,13 @@ void PerfCounterBuffers::allocate(ScopedFd& perf_event_fd, void* aux = mmap(NULL, mmap_header->aux_size, PROT_READ | PROT_WRITE, MAP_SHARED, perf_event_fd, mmap_header->aux_offset); if (aux == MAP_FAILED) { - FATAL() << "Can't allocate memory for PT AUX area"; + const auto msg = "Can't allocate memory for PT AUX area"; + if (!ok) { + FATAL() << msg; + } + LOG(warn) << msg; + *ok = false; + return; } mmap_aux_buffer = static_cast(aux); } diff --git a/src/PerfCounterBuffers.h b/src/PerfCounterBuffers.h index d1ae4892ebb..19f96ef038f 100644 --- a/src/PerfCounterBuffers.h +++ b/src/PerfCounterBuffers.h @@ -32,7 +32,7 @@ class PerfCounterBuffers { buffer_size(0), packet_in_use(false) {} ~PerfCounterBuffers() { destroy(); } - void allocate(ScopedFd& perf_event_fd, uint64_t buffer_size, uint64_t aux_size); + void allocate(ScopedFd& perf_event_fd, uint64_t buffer_size, uint64_t aux_size, bool *ok = nullptr); void destroy(); bool allocated() const { return mmap_header != nullptr; } diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 52fba523ae1..4f930e17fdf 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -526,7 +526,11 @@ template void RecordTask::init_buffers_arch() { desched_fd_child = args.desched_counter_fd; // Prevent the child from closing this fd fds->add_monitor(this, desched_fd_child, new PreserveFileMonitor()); - desched_fd.init(remote.retrieve_fd(desched_fd_child)); + if (!desched_fd.init(remote.retrieve_fd(desched_fd_child))) { + LOG(warn) + << "ContextSwitchEvent initialization with strategy " + "STRATEGY_RECORD_SWITCH failed. tracee died unexpectedly or killed ??"; + } if (trace_writer().supports_file_data_cloning() && session().use_read_cloning()) {