Skip to content

Commit

Permalink
Test toleration of transient errors in "rr replay -a"
Browse files Browse the repository at this point in the history
  • Loading branch information
rocallahan committed Mar 5, 2024
1 parent 6160be7 commit 8489b9b
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 1 deletion.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,7 @@ set(TESTS_WITHOUT_PROGRAM
trace_version
term_trace_cpu
trace_events
transient_fault_replay_all
tty
unmap_vdso
unwind_on_signal
Expand Down
5 changes: 5 additions & 0 deletions src/ReplaySession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2160,6 +2160,11 @@ void ReplaySession::reattach_tasks(ScopedFd new_tracee_socket, ScopedFd new_trac
}
}

bool ReplaySession::mark_stdio() const {
return Session::mark_stdio() &&
current_frame_time() >= suppress_stdio_before_event_;
}

bool ReplaySession::echo_stdio() const {
return flags().redirect_stdio &&
current_frame_time() >= suppress_stdio_before_event_;
Expand Down
1 change: 1 addition & 0 deletions src/ReplaySession.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ class ReplaySession final : public Session {
void notify_detected_transient_error() { detected_transient_error_ = true; }

void set_suppress_stdio_before_event(FrameTime event) { suppress_stdio_before_event_ = event; }
bool mark_stdio() const override;
bool echo_stdio() const;

private:
Expand Down
2 changes: 1 addition & 1 deletion src/Session.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ class Session {
or replay. */
bool intel_pt_enabled() const { return intel_pt_; }

bool mark_stdio() const;
virtual bool mark_stdio() const;

protected:
Session();
Expand Down
23 changes: 23 additions & 0 deletions src/Task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2139,6 +2139,26 @@ void Task::set_aarch64_tls_register(uintptr_t val) {
we tried to set. */
}

static FrameTime simulate_error_at_event() {
const char* s = getenv("RR_SIMULATE_ERROR_AT_EVENT");
if (s) {
return atoi(s);
}
return INT64_MAX;
}

static bool simulate_transient_error(Task* t) {
static bool simulated_error = false;
static FrameTime simulate_error_at_event_ = simulate_error_at_event();

if (simulated_error || !t->session().is_replaying() ||
static_cast<ReplayTask*>(t)->session().trace_stream()->time() < simulate_error_at_event_) {
return false;
}
simulated_error = true;
return true;
}

bool Task::did_waitpid(WaitStatus status) {
if (is_detached_proxy() &&
(status.stop_sig() == SIGSTOP || status.stop_sig() == SIGCONT)) {
Expand Down Expand Up @@ -2275,6 +2295,9 @@ bool Task::did_waitpid(WaitStatus status) {
in_unexpected_exit = true;
return false;
}
if (simulate_transient_error(this)) {
error_state = PerfCounters::Error::Transient;
}
if (detect_transient_error &&
*detect_transient_error == PerfCounters::Error::Transient) {
session().as_replay()->notify_detected_transient_error();
Expand Down
8 changes: 8 additions & 0 deletions src/test/transient_fault_replay_all.run
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
source `dirname $0`/util.sh

RECORD_ARGS=-M
just_record seq "1 100"
RR_SIMULATE_ERROR_AT_EVENT=300 replay -M
check 100 || exit 1
RR_SIMULATE_ERROR_AT_EVENT=301 replay -M
check 100

0 comments on commit 8489b9b

Please sign in to comment.