From 384fcbc938ba86a04feedd54382ebe03a6eb6583 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 15 Nov 2024 01:02:31 +0000 Subject: [PATCH 1/3] Properly track deferred syscall patching with the syscall event This fixes #3880 by moving the deferred syscall patching flag from Task to the corresponding syscall event. Both to avoid the assertion in that issue and for performance (since unpatched syscalls are traced), it is important that we (attempt to) patch the correct syscall ip. Additionally, we add the ability for tests to make sure that a particular syscall was actually patched. In particular, our logic for bailing out when another task is in the patch region was accidentally preventing all future attempts at patching the same region, even if doing so would have succeeded. With that fixed up, the new test will make sure this doesn't regress. --- CMakeLists.txt | 1 + src/Event.h | 2 + src/Monkeypatcher.cc | 76 ++++++++-------- src/Monkeypatcher.h | 17 ++-- src/RecordSession.cc | 58 +++++++----- src/RecordTask.cc | 1 - src/RecordTask.h | 3 - src/preload/rrcalls.h | 9 ++ src/preload/syscallbuf.c | 7 ++ src/record_signal.cc | 10 ++- src/record_syscall.cc | 16 +++- src/test/x86/deferred_patch.c | 160 ++++++++++++++++++++++++++++++++++ 12 files changed, 288 insertions(+), 72 deletions(-) create mode 100644 src/test/x86/deferred_patch.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c3a4780745..5b40b75b83f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1025,6 +1025,7 @@ set(BASIC_TESTS x86/cross_arch cwd_inaccessible daemon + x86/deferred_patch desched_blocking_poll desched_sigkill detach_huge_mmap diff --git a/src/Event.h b/src/Event.h index c1422ae7898..1aacd76bef0 100644 --- a/src/Event.h +++ b/src/Event.h @@ -279,6 +279,8 @@ struct SyscallEvent { bool failed_during_preparation; // Syscall is being emulated via PTRACE_SYSEMU. bool in_sysemu; + // True if we should retry patching on exit from this syscall + bool should_retry_patch; }; struct syscall_interruption_t { diff --git a/src/Monkeypatcher.cc b/src/Monkeypatcher.cc index 1055b434df7..6cdb6b762b9 100644 --- a/src/Monkeypatcher.cc +++ b/src/Monkeypatcher.cc @@ -981,7 +981,9 @@ static uint64_t jump_patch_size(SupportedArch arch) const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, remote_code_ptr ip, bool entering_syscall, - size_t instruction_length) { + size_t instruction_length, + bool &should_retry, + bool &transient_failure) { /* we need to inspect this many bytes before the start of the instruction, to find every short jump that might land after it. Conservative. */ static const intptr_t LOOK_BACK = 0x80; @@ -1035,9 +1037,9 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, // A patch that uses bytes before the syscall can't be done when // entering the syscall, it must be done when exiting. So set a flag on // the Task that tells us to come back later. - t->retry_syscall_patching = true; + should_retry = true; LOG(debug) << "Deferring syscall patching at " << ip << " in " << t - << " until syscall exit."; + << " until syscall exit."; return nullptr; } matches_hook = true; @@ -1111,6 +1113,7 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, end_range = ip + instruction_length + hook.patch_region_length; } if (!safe_for_syscall_patching(start_range, end_range, t)) { + transient_failure = true; LOG(debug) << "Temporarily declining to patch syscall at " << ip << " because a different task has its ip in the patched range"; @@ -1141,18 +1144,18 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, // The `entering_syscall` flag tells us whether or not we're at syscall entry. // If we are, and we find a pattern that can only be patched at exit, we'll // set a flag on the RecordTask telling it to try again after syscall exit. -bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscall, - SupportedArch arch) { - Registers r = t->regs(); - remote_code_ptr ip = r.ip(); - +bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, remote_code_ptr ip, bool entering_syscall, + SupportedArch arch, bool &should_retry) { ASSERT(t, is_x86ish(arch)) << "Unsupported architecture"; size_t instruction_length = rr::syscall_instruction_length(arch); + bool transient_failure = false; const syscall_patch_hook* hook_ptr = find_syscall_hook(t, ip - instruction_length, - entering_syscall, instruction_length); + entering_syscall, instruction_length, should_retry, transient_failure); bool success = false; - intptr_t syscallno = r.original_syscallno(); + // `syscallno` isn't necessarily correct here (in the extremely rare corner case that we + // deferred a patch and the signal handler changed it), but we only use it for logging. + intptr_t syscallno = t->regs().original_syscallno(); if (hook_ptr) { // Get out of executing the current syscall before we patch it. if (entering_syscall && !t->exit_syscall_and_prepare_restart()) { @@ -1170,7 +1173,7 @@ bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscal } if (!success) { - if (!t->retry_syscall_patching) { + if (!should_retry && !transient_failure) { LOG(debug) << "Failed to patch syscall at " << ip << " syscall " << syscall_name(syscallno, t->arch()) << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip); @@ -1181,15 +1184,12 @@ bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscal return true; } -bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_syscall) { - Registers r = t->regs(); - remote_code_ptr ip = r.ip() - 4; - +bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, remote_code_ptr ip, bool entering_syscall) { uint32_t inst[2] = {0, 0}; - size_t bytes_count = t->read_bytes_fallible(ip.to_data_ptr() - 4, 8, &inst); + size_t bytes_count = t->read_bytes_fallible(ip.to_data_ptr() - 8, 8, &inst); if (bytes_count < sizeof(inst) || inst[1] != 0xd4000001) { LOG(debug) << "Declining to patch syscall at " - << ip << " for unexpected instruction"; + << ip - 4 << " for unexpected instruction"; tried_to_patch_syscall_addresses.insert(ip); return false; } @@ -1201,7 +1201,7 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca // Our syscall hook cannot do that so this would have to be a raw syscall. // We can handle this at runtime but if we know the call is definitely // a clone we can avoid patching it here. - LOG(debug) << "Declining to patch clone syscall at " << ip; + LOG(debug) << "Declining to patch clone syscall at " << ip - 4; tried_to_patch_syscall_addresses.insert(ip); return false; } @@ -1210,9 +1210,9 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca memcmp(syscall_hooks[0].patch_region_bytes, &inst[1], 4) == 0)) << "Unknown syscall hook"; - if (!safe_for_syscall_patching(ip, ip + 4, t)) { + if (!safe_for_syscall_patching(ip - 4, ip, t)) { LOG(debug) - << "Temporarily declining to patch syscall at " << ip + << "Temporarily declining to patch syscall at " << ip - 4 << " because a different task has its ip in the patched range"; return false; } @@ -1222,10 +1222,10 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca return false; } - LOG(debug) << "Patching syscall at " << ip << " syscall " - << syscall_name(r.original_syscallno(), aarch64) << " tid " << t->tid; + LOG(debug) << "Patching syscall at " << ip - 4 << " syscall " + << syscall_name(t->regs().original_syscallno(), aarch64) << " tid " << t->tid; - auto success = patch_syscall_with_hook(*this, t, syscall_hooks[0], ip, 4, 0); + auto success = patch_syscall_with_hook(*this, t, syscall_hooks[0], ip - 4, 4, 0); if (!success && entering_syscall) { // Need to reenter the syscall to undo exit_syscall_and_prepare_restart if (!t->enter_syscall()) { @@ -1234,8 +1234,8 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca } if (!success) { - LOG(debug) << "Failed to patch syscall at " << ip << " syscall " - << syscall_name(r.original_syscallno(), aarch64) << " tid " << t->tid; + LOG(debug) << "Failed to patch syscall at " << ip - 4 << " syscall " + << syscall_name(t->regs().original_syscallno(), aarch64) << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip); return false; } @@ -1243,7 +1243,14 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca return true; } -bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { + +bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry) { + Registers r = t->regs(); + remote_code_ptr ip = r.ip(); + return try_patch_syscall(t, entering_syscall, should_retry, ip); +} + +bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry, remote_code_ptr ip) { if (syscall_hooks.empty()) { // Syscall hooks not set up yet. Don't spew warnings, and don't // fill tried_to_patch_syscall_addresses with addresses that we might be @@ -1261,14 +1268,12 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { return false; } - Registers r = t->regs(); - remote_code_ptr ip = r.ip(); // We should not get here for untraced syscalls or anything else from the rr page. // These should be normally prevented by our seccomp filter // and in the case of syscalls interrupted by signals, // the check for the syscall restart should prevent us from reaching here. - DEBUG_ASSERT(ip.to_data_ptr() < AddressSpace::rr_page_start() || - ip.to_data_ptr() >= AddressSpace::rr_page_end()); + ASSERT(t, ip.to_data_ptr() < AddressSpace::rr_page_start() || + ip.to_data_ptr() >= AddressSpace::rr_page_end()); if (tried_to_patch_syscall_addresses.count(ip) || is_jump_stub_instruction(ip, true)) { return false; } @@ -1303,13 +1308,13 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { } if (arch == aarch64) { - return try_patch_syscall_aarch64(t, entering_syscall); + return try_patch_syscall_aarch64(t, ip, entering_syscall); } - return try_patch_syscall_x86ish(t, entering_syscall, arch); + return try_patch_syscall_x86ish(t, ip, entering_syscall, arch, should_retry); } bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruction_length, - bool before_instruction) { + bool before_instruction, bool &should_retry) { if (syscall_hooks.empty()) { // Syscall hooks not set up yet. Don't spew warnings, and don't // fill tried_to_patch_syscall_addresses with addresses that we might be @@ -1332,8 +1337,9 @@ bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruc // event, not a FLUSH_SYSCALLBUF event. t->maybe_flush_syscallbuf(); + bool transient_failure = false; const syscall_patch_hook* hook_ptr = - find_syscall_hook(t, ip_of_instruction, before_instruction, instruction_length); + find_syscall_hook(t, ip_of_instruction, before_instruction, instruction_length, should_retry, transient_failure); bool success = false; if (hook_ptr) { LOG(debug) << "Patching trapping instruction at " << ip_of_instruction << " tid " << t->tid; @@ -1343,7 +1349,7 @@ bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruc } if (!success) { - if (!t->retry_syscall_patching) { + if (!should_retry && !transient_failure) { LOG(debug) << "Failed to patch trapping instruction at " << ip_of_instruction << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip_of_instruction + instruction_length); } diff --git a/src/Monkeypatcher.h b/src/Monkeypatcher.h index d683d9c3925..e43adfea86c 100644 --- a/src/Monkeypatcher.h +++ b/src/Monkeypatcher.h @@ -63,10 +63,12 @@ class Monkeypatcher { * Zero or more mapping operations are also recorded to the trace and must * be replayed. */ - bool try_patch_syscall(RecordTask* t, bool entering_syscall = true); - bool try_patch_syscall_x86ish(RecordTask* t, bool entering_syscall, - SupportedArch arch); - bool try_patch_syscall_aarch64(RecordTask* t, bool entering_syscall); + bool try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry); + bool try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry, remote_code_ptr ip); + + bool try_patch_syscall_x86ish(RecordTask* t, remote_code_ptr ip, bool entering_syscall, + SupportedArch arch, bool &should_retry); + bool try_patch_syscall_aarch64(RecordTask* t, remote_code_ptr ip, bool entering_syscall); /** * Try to patch the trapping instruction that |t| just trapped on. If this @@ -78,7 +80,8 @@ class Monkeypatcher { * be replayed. */ bool try_patch_trapping_instruction(RecordTask* t, size_t instruction_length, - bool before_instruction = true); + bool before_instruction, + bool &should_retry); /** * Replace all extended jumps by syscalls again. Note that we do not try to @@ -161,7 +164,9 @@ class Monkeypatcher { const syscall_patch_hook* find_syscall_hook(RecordTask* t, remote_code_ptr ip, bool entering_syscall, - size_t instruction_length); + size_t instruction_length, + bool &should_retry, + bool &transient_failure); /** * The list of supported syscall patches obtained from the preload diff --git a/src/RecordSession.cc b/src/RecordSession.cc index 4990247913d..36d8b6d9b57 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -1025,13 +1025,32 @@ void RecordSession::desched_state_changed(RecordTask* t) { (uint8_t)1); } +static void retry_syscall_patch(RecordTask* t, remote_code_ptr orig_syscall_ip) +{ + bool should_retry = false; + if (t->vm()->monkeypatcher().try_patch_syscall(t, false, should_retry, orig_syscall_ip)) { + // Syscall was patched. Emit event and continue execution. + LOG(debug) << "Retried patch applied successfully"; + auto ev = Event::patch_syscall(); + ev.PatchSyscall().patch_after_syscall = true; + t->record_event(ev); + } + ASSERT(t, !should_retry); +} + static void syscall_not_restarted(RecordTask* t) { LOG(debug) << " " << t->tid << ": popping abandoned interrupted " << t->ev() << "; pending events:"; if (IS_LOGGING(debug)) { t->log_pending_events(); } + bool should_retry_patch = t->ev().Syscall().should_retry_patch; + remote_code_ptr orig_syscall_ip = t->ev().Syscall().regs.ip(); t->pop_syscall_interruption(); + if (should_retry_patch) { + LOG(debug) << "Retrying deferred syscall patching non-restarted syscall at " << orig_syscall_ip << " (current ip " << t->regs().ip() << ")"; + retry_syscall_patch(t, orig_syscall_ip); + } } /** @@ -1080,15 +1099,12 @@ static void maybe_discard_syscall_interruption(RecordTask* t, intptr_t ret) { if (0 > ret) { syscall_not_restarted(t); } else if (t->arch() == x86 || t->arch() == x86_64) { - // On x86, we would have expected this to get restored to the syscallno. - // Since the syscallno is in a different register on other platforms, this - // assert does not apply. - // We have seen cases where `ret` is `restart_syscall`. - ASSERT(t, syscallno == ret || - is_restart_syscall_syscall(ret, t->ev().Syscall().arch())) - << "Interrupted call was " << t->ev().Syscall().syscall_name() - << " and sigreturn claims to be restarting " - << syscall_name(ret, t->ev().Syscall().arch()); + SupportedArch arch; + if (get_syscall_instruction_arch(t, t->regs().ip(), &arch) && arch == t->arch() && + (syscallno == ret || is_restart_syscall_syscall(ret, t->ev().Syscall().arch()))) { + return; + } + syscall_not_restarted(t); } } @@ -1345,7 +1361,13 @@ void RecordSession::syscall_state_changed(RecordTask* t, * done with it. But if we are, "freeze" it on the * event stack until the execution point where it * might be restarted. */ + bool need_patch_retry = false; + remote_code_ptr orig_syscall_ip; if (!may_restart) { + need_patch_retry = t->ev().Syscall().should_retry_patch; + if (need_patch_retry) { + orig_syscall_ip = t->ev().Syscall().regs.ip(); + } t->pop_syscall(); if (EV_DESCHED == t->ev().type()) { LOG(debug) << " exiting desched critical section"; @@ -1358,17 +1380,9 @@ void RecordSession::syscall_state_changed(RecordTask* t, t->canonicalize_regs(syscall_arch); - if (!may_restart) { - if (t->retry_syscall_patching) { - LOG(debug) << "Retrying deferred syscall patching"; - t->retry_syscall_patching = false; - if (t->vm()->monkeypatcher().try_patch_syscall(t, false)) { - // Syscall was patched. Emit event and continue execution. - auto ev = Event::patch_syscall(); - ev.PatchSyscall().patch_after_syscall = true; - t->record_event(ev); - } - } + if (need_patch_retry) { + LOG(debug) << "Retrying deferred syscall patching at " << orig_syscall_ip << " (current ip " << t->regs().ip() << ")"; + retry_syscall_patch(t, orig_syscall_ip); } } @@ -2061,8 +2075,9 @@ bool RecordSession::process_syscall_entry(RecordTask* t, StepState* step_state, } // Don't ever patch a sigreturn syscall. These can't go through the syscallbuf. + bool should_retry = false; if (!is_sigreturn(t->regs().original_syscallno(), t->arch())) { - if (t->vm()->monkeypatcher().try_patch_syscall(t)) { + if (t->vm()->monkeypatcher().try_patch_syscall(t, true, should_retry)) { // Syscall was patched. Emit event and continue execution. t->record_event(Event::patch_syscall()); return true; @@ -2076,6 +2091,7 @@ bool RecordSession::process_syscall_entry(RecordTask* t, StepState* step_state, } t->push_event(SyscallEvent(t->regs().original_syscallno(), syscall_arch)); + t->ev().Syscall().should_retry_patch = should_retry; } check_initial_task_syscalls(t, step_result); diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 4e2136927d0..a369b82971d 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -200,7 +200,6 @@ RecordTask::RecordTask(RecordSession& session, pid_t _tid, uint32_t serial, did_record_robust_futex_changes(false), waiting_for_reap(false), waiting_for_ptrace_exit(false), - retry_syscall_patching(false), sent_shutdown_kill(false), did_execveat(false), tick_request_override((TicksRequest)0), diff --git a/src/RecordTask.h b/src/RecordTask.h index 18db093643d..59968db2b09 100644 --- a/src/RecordTask.h +++ b/src/RecordTask.h @@ -808,9 +808,6 @@ class RecordTask final : public Task { // be manually run. bool waiting_for_ptrace_exit; - // When exiting a syscall, we should call MonkeyPatcher::try_patch_syscall again. - bool retry_syscall_patching; - // We've sent a SIGKILL during shutdown for this task. bool sent_shutdown_kill; diff --git a/src/preload/rrcalls.h b/src/preload/rrcalls.h index b448495b3ff..8d358667aa8 100644 --- a/src/preload/rrcalls.h +++ b/src/preload/rrcalls.h @@ -73,6 +73,15 @@ * presence of absence of rr. */ #define SYS_rrcall_check_presence (RR_CALL_BASE + 8) + /* + * If `arg1` is `RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED`, + * SYS_rrcall_check_presence returns 0 only if + * 1) The syscallbuf was used to service the syscall + * 2) The syscallbuf is disabled + * + * Otherwise returns ENOTSUP. + */ + #define RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED 1 /** * Requests that rr detach from this process and re-create outside of its * process tree, such that it may run without seccomp. diff --git a/src/preload/syscallbuf.c b/src/preload/syscallbuf.c index baac71df8d3..f596fb8b978 100644 --- a/src/preload/syscallbuf.c +++ b/src/preload/syscallbuf.c @@ -4341,6 +4341,13 @@ case SYS_epoll_pwait: #undef CASE #undef CASE_GENERIC_NONBLOCKING #undef CASE_GENERIC_NONBLOCKING_FD + case SYS_rrcall_check_presence: + if (call->args[0] == RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED && + call->args[1] == 0 && call->args[2] == 0 && call->args[3] == 0 && + call->args[4] == 0 && call->args[5] == 0) { + return 0; + } + // fallthrough default: return traced_raw_syscall(call); } diff --git a/src/record_signal.cc b/src/record_signal.cc index 81f34950c50..738953c8566 100644 --- a/src/record_signal.cc +++ b/src/record_signal.cc @@ -102,6 +102,7 @@ static bool try_handle_trapped_instruction(RecordTask* t, siginfo_t* si) { ASSERT(t, len > 0); Registers r = t->regs(); + bool should_retry_patch = false; if (special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTVCT_EL0 || special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTVCTSS_EL0) { if (special_instruction.regno != 31) { @@ -114,7 +115,7 @@ static bool try_handle_trapped_instruction(RecordTask* t, siginfo_t* si) { } else if (special_instruction.opcode == SpecialInstOpcode::X86_RDTSC || special_instruction.opcode == SpecialInstOpcode::X86_RDTSCP) { if (special_instruction.opcode == SpecialInstOpcode::X86_RDTSC && - t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, true)) { + t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, true, should_retry_patch)) { Event ev = Event::patch_syscall(); ev.PatchSyscall().patch_trapping_instruction = true; t->record_event(ev); @@ -141,15 +142,16 @@ static bool try_handle_trapped_instruction(RecordTask* t, siginfo_t* si) { t->set_regs(r); t->record_event(Event::instruction_trap()); - if (t->retry_syscall_patching) { + if (should_retry_patch) { LOG(debug) << "Retrying deferred syscall patching"; - t->retry_syscall_patching = false; - if (t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, false)) { + should_retry_patch = false; + if (t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, false, should_retry_patch)) { // Instruction was patched. Emit event. auto ev = Event::patch_syscall(); ev.PatchSyscall().patch_after_syscall = true; t->record_event(ev); } + ASSERT(t, !should_retry_patch); } t->push_event(Event::noop()); diff --git a/src/record_syscall.cc b/src/record_syscall.cc index c164b1caa0b..eed3bd271a8 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -5250,10 +5250,22 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, // are zero. bool arguments_are_zero = true; Registers r = t->regs(); - for (int i = 1; i <= 6; ++i) { + int first_zero_reg = 1; + uintptr_t ret = 0; + if (r.arg(1) == RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED) { + if (!!getenv(SYSCALLBUF_ENABLED_ENV_VAR)) { + // Syscallbuf is enabled, but we reached here anyway. Return -ENOTSUP. + ret = (uintptr_t)-ENOTSUP; + } + first_zero_reg = 2; + } + for (int i = first_zero_reg; i <= 6; ++i) { arguments_are_zero &= r.arg(i) == 0; } - syscall_state.emulate_result(arguments_are_zero ? 0 : (uintptr_t)-EINVAL); + if (!arguments_are_zero) + syscall_state.emulate_result((uintptr_t)-EINVAL); + else + syscall_state.emulate_result(ret); syscall_state.expect_errno = ENOSYS; return PREVENT_SWITCH; } diff --git a/src/test/x86/deferred_patch.c b/src/test/x86/deferred_patch.c new file mode 100644 index 00000000000..eb166f43cdd --- /dev/null +++ b/src/test/x86/deferred_patch.c @@ -0,0 +1,160 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" +#include "rrcalls.h" + +/* + * When we see a syscall patch region in which the syscall is the last instruction, + * we defer patching until we exit the corresponding syscall + * (since we can't assume that we can jump to the top of the patch and re-execute + * the preceeding instructions). This test tests a few corner cases: + * + * 1. When a syscall that had such a deferred patch is interrupted by a signal and the + * signal handler itself makes a syscall, we need to be sure that we don't accidentally + * try to patch that syscall. + * + * 2. If another thread is in the middle of the same syscall we need to make sure that we + * don't accidentally patch it (but do patch it on the exit of the second syscall). + * + * 3. If the signal handler rewrites the signal context to resume execution elsewhere, we still + * want to try patching the original syscall. + */ + +#ifdef __x86_64__ +static __attribute__((noinline)) uintptr_t deferred_patch_syscall( + uintptr_t syscall, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4) { + uintptr_t ret; + register uintptr_t r10 __asm__("r10") = arg4; + register long r8 __asm__("r8") = 0; + register long r9 __asm__("r9") = 0; + __asm__ volatile( + /* Use a syscall sequence for which we have a PATCH_SYSCALL_INSTRUCTION_IS_LAST */ + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t" + "syscall\n\t" + /* Make sure we don't accidentally match any hook pattern with the instructions after */ + "cmp $0x77,%%rax\n\t" + : "=a"(ret) + : "a"(syscall), "D"(arg1), "S"(arg2), "d"(arg3), "r"(r10), "r"(r8), "r"(r9) + : "flags"); + return ret; +} + +static __attribute__((noinline)) uintptr_t deferred_patch_syscall2( + uintptr_t syscall, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4) { + uintptr_t ret; + register uintptr_t r10 __asm__("r10") = arg4; + register long r8 __asm__("r8") = 0; + register long r9 __asm__("r9") = 0; + __asm__ volatile( + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t" + "syscall\n\t" + "cmp $0x77,%%rax\n\t" + : "=a"(ret) + : "a"(syscall), "D"(arg1), "S"(arg2), "d"(arg3), "r"(r10), "r"(r8), "r"(r9) + : "flags"); + return ret; +} + +int pipefds[2]; +int pipefds2[2]; +uint8_t byte = 0x1; +volatile uint32_t futex = 0; + +static void handle_alrm(__attribute__((unused)) int sig, + __attribute__((unused)) siginfo_t* si, void* user) { + ucontext_t* ctx = (ucontext_t*)user; + // Skip the syscall and cmp after and have it return 0 + ctx->uc_mcontext.gregs[REG_RIP] += 4; + ctx->uc_mcontext.gregs[REG_RAX] = 0; + futex = 1; + return; +} + +static void handle_usr1(__attribute__((unused)) int sig) { + // Wake up child + test_assert(1 == write(pipefds2[1], &byte, 1)); + // We need this read to desched, so that rr sees a syscall inside the rr page. + // We want to make sure that rr doesn't acccidentally try patching that syscall + // rather than the futex one that we interrupted. + test_assert(1 == read(pipefds[0], &byte, 1)); + futex = 2; + return; +} + +static void handle_usr2(__attribute__((unused)) int sig) { + futex = 3; + return; +} + +static void futex_wait(uintptr_t val) +{ + test_assert((uintptr_t)-EAGAIN == deferred_patch_syscall(SYS_futex, (uintptr_t)&futex, FUTEX_WAIT, val, (uintptr_t)NULL)); +} + +pid_t parent; + +static void *do_thread(void*) { + test_assert(1 == read(pipefds2[0], &byte, 1)); + sched_yield(); + syscall(SYS_tkill, parent, SIGALRM); + test_assert(1 == read(pipefds2[0], &byte, 1)); + sched_yield(); + syscall(SYS_tkill, parent, SIGUSR1); + test_assert(1 == read(pipefds2[0], &byte, 1)); + test_assert(1 == write(pipefds[1], &byte, 1)); + futex_wait(2); + return NULL; +} + +int main(void) { + test_assert(0 == pipe(pipefds)); + test_assert(0 == pipe(pipefds2)); + + parent = sys_gettid(); + + pthread_t t; + + // Setup signal handlers + struct sigaction sa; + sa.sa_sigaction = handle_alrm; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + test_assert(0 == sigaction(SIGALRM, &sa, NULL)); + test_assert(0 == signal(SIGUSR1, handle_usr1)); + test_assert(0 == signal(SIGUSR2, handle_usr2)); + + // Kick off driver thread + pthread_create(&t, NULL, do_thread, NULL); + + // Wait for SIGALARM + test_assert(1 == write(pipefds2[1], &byte, 1)); + test_assert(0 == deferred_patch_syscall2(SYS_futex, (uintptr_t)&futex, FUTEX_WAIT, 0, (uintptr_t)NULL)); + test_assert(futex == 1); + + // Wait for SIGUSR1 + test_assert(1 == write(pipefds2[1], &byte, 1)); + futex_wait(1); + pthread_kill(t, SIGUSR2); + pthread_join(t, NULL); + + test_assert(futex == 3); + + // Make sure that the patching actually happened (if the syscallbuf is enabled) + uintptr_t ret = deferred_patch_syscall(SYS_rrcall_check_presence, RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED, 0, 0, 0); + test_assert(ret == (uintptr_t)-ENOSYS || ret == 0); + + ret = deferred_patch_syscall2(SYS_rrcall_check_presence, RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED, 0, 0, 0); + test_assert(ret == (uintptr_t)-ENOSYS || ret == 0); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} +#else +int main(void) { + atomic_puts("This test can only be run on x86_64. Skipping..."); + atomic_puts("EXIT-SUCCESS"); + return 77; +} +#endif \ No newline at end of file From 1d56770377627bb89d664abf6c724f5393115a9e Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 15 Nov 2024 01:34:19 +0000 Subject: [PATCH 2/3] Give the thread a chance to actually enter the futex --- src/test/x86/deferred_patch.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/test/x86/deferred_patch.c b/src/test/x86/deferred_patch.c index eb166f43cdd..f5ac39193ca 100644 --- a/src/test/x86/deferred_patch.c +++ b/src/test/x86/deferred_patch.c @@ -105,6 +105,7 @@ static void *do_thread(void*) { test_assert(1 == read(pipefds2[0], &byte, 1)); test_assert(1 == write(pipefds[1], &byte, 1)); futex_wait(2); + test_assert(futex == 3); return NULL; } @@ -136,11 +137,12 @@ int main(void) { // Wait for SIGUSR1 test_assert(1 == write(pipefds2[1], &byte, 1)); futex_wait(1); + + // Issue SIGUSR2 in thread to let it out of the futex (and let it patch the syscall) + sched_yield(); sched_yield(); pthread_kill(t, SIGUSR2); pthread_join(t, NULL); - test_assert(futex == 3); - // Make sure that the patching actually happened (if the syscallbuf is enabled) uintptr_t ret = deferred_patch_syscall(SYS_rrcall_check_presence, RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED, 0, 0, 0); test_assert(ret == (uintptr_t)-ENOSYS || ret == 0); From 822b80c8a39151177d0c775f7d51fab5a9c1c5b8 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 18 Nov 2024 05:20:08 +0000 Subject: [PATCH 3/3] Add missing synchronization edge --- src/test/x86/deferred_patch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/x86/deferred_patch.c b/src/test/x86/deferred_patch.c index f5ac39193ca..5db525c682e 100644 --- a/src/test/x86/deferred_patch.c +++ b/src/test/x86/deferred_patch.c @@ -80,6 +80,7 @@ static void handle_usr1(__attribute__((unused)) int sig) { // rather than the futex one that we interrupted. test_assert(1 == read(pipefds[0], &byte, 1)); futex = 2; + test_assert(1 == write(pipefds2[1], &byte, 1)); return; } @@ -104,6 +105,7 @@ static void *do_thread(void*) { syscall(SYS_tkill, parent, SIGUSR1); test_assert(1 == read(pipefds2[0], &byte, 1)); test_assert(1 == write(pipefds[1], &byte, 1)); + test_assert(1 == read(pipefds2[0], &byte, 1)); futex_wait(2); test_assert(futex == 3); return NULL; @@ -159,4 +161,4 @@ int main(void) { atomic_puts("EXIT-SUCCESS"); return 77; } -#endif \ No newline at end of file +#endif