Skip to content

Commit

Permalink
Reset cpu_id(_start) rseq fields every time a new task is scheduled.
Browse files Browse the repository at this point in the history
Some software assumes the kernel does this, and depends on it by caching user data
in these fields, on the assumption that the kernel will clear out the cached data
when the task is newly scheduled onto a core.
  • Loading branch information
rocallahan committed Aug 2, 2024
1 parent 75b9ed3 commit d5ed041
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 1 deletion.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,7 @@ set(BASIC_TESTS
redzone_integrity
rename
rlimit
rseq_cpu_id_reset
rusage
samask
save_data_fd
Expand Down
3 changes: 3 additions & 0 deletions src/RecordSession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2647,6 +2647,9 @@ RecordSession::RecordResult RecordSession::record_step() {
}
prev_task->pop_event(EV_SCHED);
}
if (t->tuid() != prev_task_tuid) {
t->will_schedule();
}

// Have to disable context-switching until we know it's safe
// to allow switching the context.
Expand Down
5 changes: 5 additions & 0 deletions src/ReplaySession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1820,6 +1820,11 @@ ReplayTask* ReplaySession::setup_replay_one_trace_frame(ReplayTask* t) {
t = revive_task_for_exec();
}

if (t->tuid() != last_task_tuid) {
t->will_schedule();
last_task_tuid = t->tuid();
}

LOG(debug) << "[event " << trace_frame.time() << "] " << t->rec_tid
<< ": replaying " << Event(ev) << "; state "
<< (ev.is_syscall_event() ? state_name(ev.Syscall().state)
Expand Down
1 change: 1 addition & 0 deletions src/ReplaySession.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ class ReplaySession final : public Session {
siginfo_t last_siginfo_;
Flags flags_;
FastForwardStatus fast_forward_status;
TaskUid last_task_tuid;
bool skip_next_execution_event;
bool replay_stops_at_first_execve_;
bool detected_transient_error_;
Expand Down
13 changes: 13 additions & 0 deletions src/Task.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3331,6 +3331,19 @@ void Task::write_zeroes(unique_ptr<AutoRemoteSyscalls>* remote, remote_ptr<void>
vm()->notify_written(initial_addr, initial_size, 0);
}

void Task::will_schedule() {
if (rseq_state) {
// Relying on rseq_t being the same across architectures.
int cpu = session().trace_stream()->bound_to_cpu();
uint32_t cpu_id = cpu >= 0 ? cpu : 0;
auto addr = REMOTE_PTR_FIELD(rseq_state->ptr.cast<typename NativeArch::rseq_t>(), cpu_id_start);
bool ok = true;
write_mem(addr, cpu_id, &ok);
addr = REMOTE_PTR_FIELD(rseq_state->ptr.cast<typename NativeArch::rseq_t>(), cpu_id);
write_mem(addr, cpu_id, &ok);
}
}

const TraceStream* Task::trace_stream() const {
if (session().as_record()) {
return &session().as_record()->trace_writer();
Expand Down
5 changes: 5 additions & 0 deletions src/Task.h
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,11 @@ class Task {
const void* buf, bool* ok = nullptr,
uint32_t flags = 0);

/**
* This task has been selected to run next.
*/
void will_schedule();

SupportedArch detect_syscall_arch();

/**
Expand Down
3 changes: 2 additions & 1 deletion src/record_syscall.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4317,7 +4317,8 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t,
t->write_mem(addr, cpu_id);
t->record_local(addr, &cpu_id);
addr = REMOTE_PTR_FIELD(rseq, cpu_id_start);
uint32_t cpu_id_start = 0;
int cpu = t->session().trace_stream()->bound_to_cpu();
uint32_t cpu_id_start = cpu >= 0 ? cpu : 0;
t->write_mem(addr, cpu_id_start);
t->record_local(addr, &cpu_id_start);
t->rseq_state = nullptr;
Expand Down
74 changes: 74 additions & 0 deletions src/test/rseq_cpu_id_reset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */

#include "util.h"

static const uint32_t RSEQ_SIG = 0x12345678;

static int to_main_fds[2];
static int from_main_fds[2];

static const int PING_PONG_ITERATIONS = 5;

static const uint32_t CPU_INVALID = 10000000;

static int main_child(void) {
struct rseq* rs_ptr =
(struct rseq*)mmap(NULL, sizeof(struct rseq), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
test_assert(rs_ptr != MAP_FAILED);
rs_ptr->cpu_id_start = CPU_INVALID;
rs_ptr->cpu_id = CPU_INVALID;

int ret = syscall(RR_rseq, rs_ptr, sizeof(*rs_ptr), 0, RSEQ_SIG);
if (ret == -1 && errno == ENOSYS) {
atomic_puts("rseq not supported; ignoring test");
atomic_puts("EXIT-SUCCESS");
return 0;
}
test_assert(ret == 0);
test_assert(rs_ptr->cpu_id_start < CPU_INVALID);
test_assert(rs_ptr->cpu_id < CPU_INVALID);

for (int i = 0; i < PING_PONG_ITERATIONS; ++i) {
char ch;
rs_ptr->cpu_id_start = CPU_INVALID;
rs_ptr->cpu_id = CPU_INVALID;
test_assert(1 == write(to_main_fds[1], "y", 1));
// Under rr (or taskset to a single core) there must be a
// context switch here. Some code exists that expects
// rseq::cpu_id(_start) to be reset to the current core index
// every time a task is scheduled onto a core.
test_assert(1 == read(from_main_fds[0], &ch, 1));
test_assert(rs_ptr->cpu_id_start < CPU_INVALID);
test_assert(rs_ptr->cpu_id < CPU_INVALID);
}

return 0;
}

static int main_child_wrapper(__attribute__((unused)) void* arg) {
exit(main_child());
}

int main(void) {
test_assert(0 == pipe(to_main_fds));
test_assert(0 == pipe(from_main_fds));

const size_t stack_size = 1 << 20;
void* stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
test_assert(stack != MAP_FAILED);

/* Do the real work in a thread that doesn't have glibc's rseq setup installed */
clone(main_child_wrapper, stack + stack_size,
CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_THREAD | CLONE_SIGHAND,
NULL, NULL, NULL, NULL);

for (int i = 0; i < PING_PONG_ITERATIONS; ++i) {
char ch;
test_assert(1 == read(to_main_fds[0], &ch, 1));
test_assert(1 == write(from_main_fds[1], "x", 1));
}

atomic_puts("EXIT-SUCCESS");
return 0;
}

0 comments on commit d5ed041

Please sign in to comment.