Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly track deferred syscall patching with the syscall event #3881

Merged
merged 3 commits into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,7 @@ set(BASIC_TESTS
x86/cross_arch
cwd_inaccessible
daemon
x86/deferred_patch
desched_blocking_poll
desched_sigkill
detach_huge_mmap
Expand Down
2 changes: 2 additions & 0 deletions src/Event.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ struct SyscallEvent {
bool failed_during_preparation;
// Syscall is being emulated via PTRACE_SYSEMU.
bool in_sysemu;
// True if we should retry patching on exit from this syscall
bool should_retry_patch;
};

struct syscall_interruption_t {
Expand Down
76 changes: 41 additions & 35 deletions src/Monkeypatcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -981,7 +981,9 @@ static uint64_t jump_patch_size(SupportedArch arch)
const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t,
remote_code_ptr ip,
bool entering_syscall,
size_t instruction_length) {
size_t instruction_length,
bool &should_retry,
bool &transient_failure) {
/* we need to inspect this many bytes before the start of the instruction,
to find every short jump that might land after it. Conservative. */
static const intptr_t LOOK_BACK = 0x80;
Expand Down Expand Up @@ -1035,9 +1037,9 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t,
// A patch that uses bytes before the syscall can't be done when
// entering the syscall, it must be done when exiting. So set a flag on
// the Task that tells us to come back later.
t->retry_syscall_patching = true;
should_retry = true;
LOG(debug) << "Deferring syscall patching at " << ip << " in " << t
<< " until syscall exit.";
<< " until syscall exit.";
return nullptr;
}
matches_hook = true;
Expand Down Expand Up @@ -1111,6 +1113,7 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t,
end_range = ip + instruction_length + hook.patch_region_length;
}
if (!safe_for_syscall_patching(start_range, end_range, t)) {
transient_failure = true;
LOG(debug)
<< "Temporarily declining to patch syscall at " << ip
<< " because a different task has its ip in the patched range";
Expand Down Expand Up @@ -1141,18 +1144,18 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t,
// The `entering_syscall` flag tells us whether or not we're at syscall entry.
// If we are, and we find a pattern that can only be patched at exit, we'll
// set a flag on the RecordTask telling it to try again after syscall exit.
bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscall,
SupportedArch arch) {
Registers r = t->regs();
remote_code_ptr ip = r.ip();

bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, remote_code_ptr ip, bool entering_syscall,
SupportedArch arch, bool &should_retry) {
ASSERT(t, is_x86ish(arch)) << "Unsupported architecture";

size_t instruction_length = rr::syscall_instruction_length(arch);
bool transient_failure = false;
const syscall_patch_hook* hook_ptr = find_syscall_hook(t, ip - instruction_length,
entering_syscall, instruction_length);
entering_syscall, instruction_length, should_retry, transient_failure);
bool success = false;
intptr_t syscallno = r.original_syscallno();
// `syscallno` isn't necessarily correct here (in the extremely rare corner case that we
// deferred a patch and the signal handler changed it), but we only use it for logging.
intptr_t syscallno = t->regs().original_syscallno();
if (hook_ptr) {
// Get out of executing the current syscall before we patch it.
if (entering_syscall && !t->exit_syscall_and_prepare_restart()) {
Expand All @@ -1170,7 +1173,7 @@ bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscal
}

if (!success) {
if (!t->retry_syscall_patching) {
if (!should_retry && !transient_failure) {
LOG(debug) << "Failed to patch syscall at " << ip << " syscall "
<< syscall_name(syscallno, t->arch()) << " tid " << t->tid;
tried_to_patch_syscall_addresses.insert(ip);
Expand All @@ -1181,15 +1184,12 @@ bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscal
return true;
}

bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_syscall) {
Registers r = t->regs();
remote_code_ptr ip = r.ip() - 4;

bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, remote_code_ptr ip, bool entering_syscall) {
uint32_t inst[2] = {0, 0};
size_t bytes_count = t->read_bytes_fallible(ip.to_data_ptr<uint8_t>() - 4, 8, &inst);
size_t bytes_count = t->read_bytes_fallible(ip.to_data_ptr<uint8_t>() - 8, 8, &inst);
if (bytes_count < sizeof(inst) || inst[1] != 0xd4000001) {
LOG(debug) << "Declining to patch syscall at "
<< ip << " for unexpected instruction";
<< ip - 4 << " for unexpected instruction";
tried_to_patch_syscall_addresses.insert(ip);
return false;
}
Expand All @@ -1201,7 +1201,7 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca
// Our syscall hook cannot do that so this would have to be a raw syscall.
// We can handle this at runtime but if we know the call is definitely
// a clone we can avoid patching it here.
LOG(debug) << "Declining to patch clone syscall at " << ip;
LOG(debug) << "Declining to patch clone syscall at " << ip - 4;
tried_to_patch_syscall_addresses.insert(ip);
return false;
}
Expand All @@ -1210,9 +1210,9 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca
memcmp(syscall_hooks[0].patch_region_bytes, &inst[1], 4) == 0))
<< "Unknown syscall hook";

if (!safe_for_syscall_patching(ip, ip + 4, t)) {
if (!safe_for_syscall_patching(ip - 4, ip, t)) {
LOG(debug)
<< "Temporarily declining to patch syscall at " << ip
<< "Temporarily declining to patch syscall at " << ip - 4
<< " because a different task has its ip in the patched range";
return false;
}
Expand All @@ -1222,10 +1222,10 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca
return false;
}

LOG(debug) << "Patching syscall at " << ip << " syscall "
<< syscall_name(r.original_syscallno(), aarch64) << " tid " << t->tid;
LOG(debug) << "Patching syscall at " << ip - 4 << " syscall "
<< syscall_name(t->regs().original_syscallno(), aarch64) << " tid " << t->tid;

auto success = patch_syscall_with_hook(*this, t, syscall_hooks[0], ip, 4, 0);
auto success = patch_syscall_with_hook(*this, t, syscall_hooks[0], ip - 4, 4, 0);
if (!success && entering_syscall) {
// Need to reenter the syscall to undo exit_syscall_and_prepare_restart
if (!t->enter_syscall()) {
Expand All @@ -1234,16 +1234,23 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca
}

if (!success) {
LOG(debug) << "Failed to patch syscall at " << ip << " syscall "
<< syscall_name(r.original_syscallno(), aarch64) << " tid " << t->tid;
LOG(debug) << "Failed to patch syscall at " << ip - 4 << " syscall "
<< syscall_name(t->regs().original_syscallno(), aarch64) << " tid " << t->tid;
tried_to_patch_syscall_addresses.insert(ip);
return false;
}

return true;
}

bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) {

bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry) {
Registers r = t->regs();
remote_code_ptr ip = r.ip();
return try_patch_syscall(t, entering_syscall, should_retry, ip);
}

bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry, remote_code_ptr ip) {
if (syscall_hooks.empty()) {
// Syscall hooks not set up yet. Don't spew warnings, and don't
// fill tried_to_patch_syscall_addresses with addresses that we might be
Expand All @@ -1261,14 +1268,12 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) {
return false;
}

Registers r = t->regs();
remote_code_ptr ip = r.ip();
// We should not get here for untraced syscalls or anything else from the rr page.
// These should be normally prevented by our seccomp filter
// and in the case of syscalls interrupted by signals,
// the check for the syscall restart should prevent us from reaching here.
DEBUG_ASSERT(ip.to_data_ptr<void>() < AddressSpace::rr_page_start() ||
ip.to_data_ptr<void>() >= AddressSpace::rr_page_end());
ASSERT(t, ip.to_data_ptr<void>() < AddressSpace::rr_page_start() ||
ip.to_data_ptr<void>() >= AddressSpace::rr_page_end());
if (tried_to_patch_syscall_addresses.count(ip) || is_jump_stub_instruction(ip, true)) {
return false;
}
Expand Down Expand Up @@ -1303,13 +1308,13 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) {
}

if (arch == aarch64) {
return try_patch_syscall_aarch64(t, entering_syscall);
return try_patch_syscall_aarch64(t, ip, entering_syscall);
}
return try_patch_syscall_x86ish(t, entering_syscall, arch);
return try_patch_syscall_x86ish(t, ip, entering_syscall, arch, should_retry);
}

bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruction_length,
bool before_instruction) {
bool before_instruction, bool &should_retry) {
if (syscall_hooks.empty()) {
// Syscall hooks not set up yet. Don't spew warnings, and don't
// fill tried_to_patch_syscall_addresses with addresses that we might be
Expand All @@ -1332,8 +1337,9 @@ bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruc
// event, not a FLUSH_SYSCALLBUF event.
t->maybe_flush_syscallbuf();

bool transient_failure = false;
const syscall_patch_hook* hook_ptr =
find_syscall_hook(t, ip_of_instruction, before_instruction, instruction_length);
find_syscall_hook(t, ip_of_instruction, before_instruction, instruction_length, should_retry, transient_failure);
bool success = false;
if (hook_ptr) {
LOG(debug) << "Patching trapping instruction at " << ip_of_instruction << " tid " << t->tid;
Expand All @@ -1343,7 +1349,7 @@ bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruc
}

if (!success) {
if (!t->retry_syscall_patching) {
if (!should_retry && !transient_failure) {
LOG(debug) << "Failed to patch trapping instruction at " << ip_of_instruction << " tid " << t->tid;
tried_to_patch_syscall_addresses.insert(ip_of_instruction + instruction_length);
}
Expand Down
17 changes: 11 additions & 6 deletions src/Monkeypatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ class Monkeypatcher {
* Zero or more mapping operations are also recorded to the trace and must
* be replayed.
*/
bool try_patch_syscall(RecordTask* t, bool entering_syscall = true);
bool try_patch_syscall_x86ish(RecordTask* t, bool entering_syscall,
SupportedArch arch);
bool try_patch_syscall_aarch64(RecordTask* t, bool entering_syscall);
bool try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry);
bool try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry, remote_code_ptr ip);

bool try_patch_syscall_x86ish(RecordTask* t, remote_code_ptr ip, bool entering_syscall,
SupportedArch arch, bool &should_retry);
bool try_patch_syscall_aarch64(RecordTask* t, remote_code_ptr ip, bool entering_syscall);

/**
* Try to patch the trapping instruction that |t| just trapped on. If this
Expand All @@ -78,7 +80,8 @@ class Monkeypatcher {
* be replayed.
*/
bool try_patch_trapping_instruction(RecordTask* t, size_t instruction_length,
bool before_instruction = true);
bool before_instruction,
bool &should_retry);

/**
* Replace all extended jumps by syscalls again. Note that we do not try to
Expand Down Expand Up @@ -161,7 +164,9 @@ class Monkeypatcher {
const syscall_patch_hook* find_syscall_hook(RecordTask* t,
remote_code_ptr ip,
bool entering_syscall,
size_t instruction_length);
size_t instruction_length,
bool &should_retry,
bool &transient_failure);

/**
* The list of supported syscall patches obtained from the preload
Expand Down
58 changes: 37 additions & 21 deletions src/RecordSession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1025,13 +1025,32 @@ void RecordSession::desched_state_changed(RecordTask* t) {
(uint8_t)1);
}

static void retry_syscall_patch(RecordTask* t, remote_code_ptr orig_syscall_ip)
{
bool should_retry = false;
if (t->vm()->monkeypatcher().try_patch_syscall(t, false, should_retry, orig_syscall_ip)) {
// Syscall was patched. Emit event and continue execution.
LOG(debug) << "Retried patch applied successfully";
auto ev = Event::patch_syscall();
ev.PatchSyscall().patch_after_syscall = true;
t->record_event(ev);
}
ASSERT(t, !should_retry);
}

static void syscall_not_restarted(RecordTask* t) {
LOG(debug) << " " << t->tid << ": popping abandoned interrupted " << t->ev()
<< "; pending events:";
if (IS_LOGGING(debug)) {
t->log_pending_events();
}
bool should_retry_patch = t->ev().Syscall().should_retry_patch;
remote_code_ptr orig_syscall_ip = t->ev().Syscall().regs.ip();
t->pop_syscall_interruption();
if (should_retry_patch) {
LOG(debug) << "Retrying deferred syscall patching non-restarted syscall at " << orig_syscall_ip << " (current ip " << t->regs().ip() << ")";
retry_syscall_patch(t, orig_syscall_ip);
}
}

/**
Expand Down Expand Up @@ -1080,15 +1099,12 @@ static void maybe_discard_syscall_interruption(RecordTask* t, intptr_t ret) {
if (0 > ret) {
syscall_not_restarted(t);
} else if (t->arch() == x86 || t->arch() == x86_64) {
// On x86, we would have expected this to get restored to the syscallno.
// Since the syscallno is in a different register on other platforms, this
// assert does not apply.
// We have seen cases where `ret` is `restart_syscall`.
ASSERT(t, syscallno == ret ||
is_restart_syscall_syscall(ret, t->ev().Syscall().arch()))
<< "Interrupted call was " << t->ev().Syscall().syscall_name()
<< " and sigreturn claims to be restarting "
<< syscall_name(ret, t->ev().Syscall().arch());
SupportedArch arch;
if (get_syscall_instruction_arch(t, t->regs().ip(), &arch) && arch == t->arch() &&
(syscallno == ret || is_restart_syscall_syscall(ret, t->ev().Syscall().arch()))) {
return;
}
syscall_not_restarted(t);
}
}

Expand Down Expand Up @@ -1345,7 +1361,13 @@ void RecordSession::syscall_state_changed(RecordTask* t,
* done with it. But if we are, "freeze" it on the
* event stack until the execution point where it
* might be restarted. */
bool need_patch_retry = false;
remote_code_ptr orig_syscall_ip;
if (!may_restart) {
need_patch_retry = t->ev().Syscall().should_retry_patch;
if (need_patch_retry) {
orig_syscall_ip = t->ev().Syscall().regs.ip();
}
t->pop_syscall();
if (EV_DESCHED == t->ev().type()) {
LOG(debug) << " exiting desched critical section";
Expand All @@ -1358,17 +1380,9 @@ void RecordSession::syscall_state_changed(RecordTask* t,

t->canonicalize_regs(syscall_arch);

if (!may_restart) {
if (t->retry_syscall_patching) {
LOG(debug) << "Retrying deferred syscall patching";
t->retry_syscall_patching = false;
if (t->vm()->monkeypatcher().try_patch_syscall(t, false)) {
// Syscall was patched. Emit event and continue execution.
auto ev = Event::patch_syscall();
ev.PatchSyscall().patch_after_syscall = true;
t->record_event(ev);
}
}
if (need_patch_retry) {
LOG(debug) << "Retrying deferred syscall patching at " << orig_syscall_ip << " (current ip " << t->regs().ip() << ")";
retry_syscall_patch(t, orig_syscall_ip);
}
}

Expand Down Expand Up @@ -2061,8 +2075,9 @@ bool RecordSession::process_syscall_entry(RecordTask* t, StepState* step_state,
}

// Don't ever patch a sigreturn syscall. These can't go through the syscallbuf.
bool should_retry = false;
if (!is_sigreturn(t->regs().original_syscallno(), t->arch())) {
if (t->vm()->monkeypatcher().try_patch_syscall(t)) {
if (t->vm()->monkeypatcher().try_patch_syscall(t, true, should_retry)) {
// Syscall was patched. Emit event and continue execution.
t->record_event(Event::patch_syscall());
return true;
Expand All @@ -2076,6 +2091,7 @@ bool RecordSession::process_syscall_entry(RecordTask* t, StepState* step_state,
}

t->push_event(SyscallEvent(t->regs().original_syscallno(), syscall_arch));
t->ev().Syscall().should_retry_patch = should_retry;
}

check_initial_task_syscalls(t, step_result);
Expand Down
1 change: 0 additions & 1 deletion src/RecordTask.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,6 @@ RecordTask::RecordTask(RecordSession& session, pid_t _tid, uint32_t serial,
did_record_robust_futex_changes(false),
waiting_for_reap(false),
waiting_for_ptrace_exit(false),
retry_syscall_patching(false),
sent_shutdown_kill(false),
did_execveat(false),
tick_request_override((TicksRequest)0),
Expand Down
3 changes: 0 additions & 3 deletions src/RecordTask.h
Original file line number Diff line number Diff line change
Expand Up @@ -808,9 +808,6 @@ class RecordTask final : public Task {
// be manually run.
bool waiting_for_ptrace_exit;

// When exiting a syscall, we should call MonkeyPatcher::try_patch_syscall again.
bool retry_syscall_patching;

// We've sent a SIGKILL during shutdown for this task.
bool sent_shutdown_kill;

Expand Down
9 changes: 9 additions & 0 deletions src/preload/rrcalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@
* presence of absence of rr.
*/
#define SYS_rrcall_check_presence (RR_CALL_BASE + 8)
/*
* If `arg1` is `RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED`,
* SYS_rrcall_check_presence returns 0 only if
* 1) The syscallbuf was used to service the syscall
* 2) The syscallbuf is disabled
*
* Otherwise returns ENOTSUP.
*/
#define RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED 1
/**
* Requests that rr detach from this process and re-create outside of its
* process tree, such that it may run without seccomp.
Expand Down
Loading
Loading