diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c3a4780745..5b40b75b83f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1025,6 +1025,7 @@ set(BASIC_TESTS x86/cross_arch cwd_inaccessible daemon + x86/deferred_patch desched_blocking_poll desched_sigkill detach_huge_mmap diff --git a/src/Event.h b/src/Event.h index c1422ae7898..1aacd76bef0 100644 --- a/src/Event.h +++ b/src/Event.h @@ -279,6 +279,8 @@ struct SyscallEvent { bool failed_during_preparation; // Syscall is being emulated via PTRACE_SYSEMU. bool in_sysemu; + // True if we should retry patching on exit from this syscall + bool should_retry_patch; }; struct syscall_interruption_t { diff --git a/src/Monkeypatcher.cc b/src/Monkeypatcher.cc index 1055b434df7..6cdb6b762b9 100644 --- a/src/Monkeypatcher.cc +++ b/src/Monkeypatcher.cc @@ -981,7 +981,9 @@ static uint64_t jump_patch_size(SupportedArch arch) const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, remote_code_ptr ip, bool entering_syscall, - size_t instruction_length) { + size_t instruction_length, + bool &should_retry, + bool &transient_failure) { /* we need to inspect this many bytes before the start of the instruction, to find every short jump that might land after it. Conservative. */ static const intptr_t LOOK_BACK = 0x80; @@ -1035,9 +1037,9 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, // A patch that uses bytes before the syscall can't be done when // entering the syscall, it must be done when exiting. So set a flag on // the Task that tells us to come back later. - t->retry_syscall_patching = true; + should_retry = true; LOG(debug) << "Deferring syscall patching at " << ip << " in " << t - << " until syscall exit."; + << " until syscall exit."; return nullptr; } matches_hook = true; @@ -1111,6 +1113,7 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, end_range = ip + instruction_length + hook.patch_region_length; } if (!safe_for_syscall_patching(start_range, end_range, t)) { + transient_failure = true; LOG(debug) << "Temporarily declining to patch syscall at " << ip << " because a different task has its ip in the patched range"; @@ -1141,18 +1144,18 @@ const syscall_patch_hook* Monkeypatcher::find_syscall_hook(RecordTask* t, // The `entering_syscall` flag tells us whether or not we're at syscall entry. // If we are, and we find a pattern that can only be patched at exit, we'll // set a flag on the RecordTask telling it to try again after syscall exit. -bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscall, - SupportedArch arch) { - Registers r = t->regs(); - remote_code_ptr ip = r.ip(); - +bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, remote_code_ptr ip, bool entering_syscall, + SupportedArch arch, bool &should_retry) { ASSERT(t, is_x86ish(arch)) << "Unsupported architecture"; size_t instruction_length = rr::syscall_instruction_length(arch); + bool transient_failure = false; const syscall_patch_hook* hook_ptr = find_syscall_hook(t, ip - instruction_length, - entering_syscall, instruction_length); + entering_syscall, instruction_length, should_retry, transient_failure); bool success = false; - intptr_t syscallno = r.original_syscallno(); + // `syscallno` isn't necessarily correct here (in the extremely rare corner case that we + // deferred a patch and the signal handler changed it), but we only use it for logging. + intptr_t syscallno = t->regs().original_syscallno(); if (hook_ptr) { // Get out of executing the current syscall before we patch it. if (entering_syscall && !t->exit_syscall_and_prepare_restart()) { @@ -1170,7 +1173,7 @@ bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscal } if (!success) { - if (!t->retry_syscall_patching) { + if (!should_retry && !transient_failure) { LOG(debug) << "Failed to patch syscall at " << ip << " syscall " << syscall_name(syscallno, t->arch()) << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip); @@ -1181,15 +1184,12 @@ bool Monkeypatcher::try_patch_syscall_x86ish(RecordTask* t, bool entering_syscal return true; } -bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_syscall) { - Registers r = t->regs(); - remote_code_ptr ip = r.ip() - 4; - +bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, remote_code_ptr ip, bool entering_syscall) { uint32_t inst[2] = {0, 0}; - size_t bytes_count = t->read_bytes_fallible(ip.to_data_ptr() - 4, 8, &inst); + size_t bytes_count = t->read_bytes_fallible(ip.to_data_ptr() - 8, 8, &inst); if (bytes_count < sizeof(inst) || inst[1] != 0xd4000001) { LOG(debug) << "Declining to patch syscall at " - << ip << " for unexpected instruction"; + << ip - 4 << " for unexpected instruction"; tried_to_patch_syscall_addresses.insert(ip); return false; } @@ -1201,7 +1201,7 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca // Our syscall hook cannot do that so this would have to be a raw syscall. // We can handle this at runtime but if we know the call is definitely // a clone we can avoid patching it here. - LOG(debug) << "Declining to patch clone syscall at " << ip; + LOG(debug) << "Declining to patch clone syscall at " << ip - 4; tried_to_patch_syscall_addresses.insert(ip); return false; } @@ -1210,9 +1210,9 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca memcmp(syscall_hooks[0].patch_region_bytes, &inst[1], 4) == 0)) << "Unknown syscall hook"; - if (!safe_for_syscall_patching(ip, ip + 4, t)) { + if (!safe_for_syscall_patching(ip - 4, ip, t)) { LOG(debug) - << "Temporarily declining to patch syscall at " << ip + << "Temporarily declining to patch syscall at " << ip - 4 << " because a different task has its ip in the patched range"; return false; } @@ -1222,10 +1222,10 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca return false; } - LOG(debug) << "Patching syscall at " << ip << " syscall " - << syscall_name(r.original_syscallno(), aarch64) << " tid " << t->tid; + LOG(debug) << "Patching syscall at " << ip - 4 << " syscall " + << syscall_name(t->regs().original_syscallno(), aarch64) << " tid " << t->tid; - auto success = patch_syscall_with_hook(*this, t, syscall_hooks[0], ip, 4, 0); + auto success = patch_syscall_with_hook(*this, t, syscall_hooks[0], ip - 4, 4, 0); if (!success && entering_syscall) { // Need to reenter the syscall to undo exit_syscall_and_prepare_restart if (!t->enter_syscall()) { @@ -1234,8 +1234,8 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca } if (!success) { - LOG(debug) << "Failed to patch syscall at " << ip << " syscall " - << syscall_name(r.original_syscallno(), aarch64) << " tid " << t->tid; + LOG(debug) << "Failed to patch syscall at " << ip - 4 << " syscall " + << syscall_name(t->regs().original_syscallno(), aarch64) << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip); return false; } @@ -1243,7 +1243,14 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca return true; } -bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { + +bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry) { + Registers r = t->regs(); + remote_code_ptr ip = r.ip(); + return try_patch_syscall(t, entering_syscall, should_retry, ip); +} + +bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry, remote_code_ptr ip) { if (syscall_hooks.empty()) { // Syscall hooks not set up yet. Don't spew warnings, and don't // fill tried_to_patch_syscall_addresses with addresses that we might be @@ -1261,14 +1268,12 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { return false; } - Registers r = t->regs(); - remote_code_ptr ip = r.ip(); // We should not get here for untraced syscalls or anything else from the rr page. // These should be normally prevented by our seccomp filter // and in the case of syscalls interrupted by signals, // the check for the syscall restart should prevent us from reaching here. - DEBUG_ASSERT(ip.to_data_ptr() < AddressSpace::rr_page_start() || - ip.to_data_ptr() >= AddressSpace::rr_page_end()); + ASSERT(t, ip.to_data_ptr() < AddressSpace::rr_page_start() || + ip.to_data_ptr() >= AddressSpace::rr_page_end()); if (tried_to_patch_syscall_addresses.count(ip) || is_jump_stub_instruction(ip, true)) { return false; } @@ -1303,13 +1308,13 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { } if (arch == aarch64) { - return try_patch_syscall_aarch64(t, entering_syscall); + return try_patch_syscall_aarch64(t, ip, entering_syscall); } - return try_patch_syscall_x86ish(t, entering_syscall, arch); + return try_patch_syscall_x86ish(t, ip, entering_syscall, arch, should_retry); } bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruction_length, - bool before_instruction) { + bool before_instruction, bool &should_retry) { if (syscall_hooks.empty()) { // Syscall hooks not set up yet. Don't spew warnings, and don't // fill tried_to_patch_syscall_addresses with addresses that we might be @@ -1332,8 +1337,9 @@ bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruc // event, not a FLUSH_SYSCALLBUF event. t->maybe_flush_syscallbuf(); + bool transient_failure = false; const syscall_patch_hook* hook_ptr = - find_syscall_hook(t, ip_of_instruction, before_instruction, instruction_length); + find_syscall_hook(t, ip_of_instruction, before_instruction, instruction_length, should_retry, transient_failure); bool success = false; if (hook_ptr) { LOG(debug) << "Patching trapping instruction at " << ip_of_instruction << " tid " << t->tid; @@ -1343,7 +1349,7 @@ bool Monkeypatcher::try_patch_trapping_instruction(RecordTask* t, size_t instruc } if (!success) { - if (!t->retry_syscall_patching) { + if (!should_retry && !transient_failure) { LOG(debug) << "Failed to patch trapping instruction at " << ip_of_instruction << " tid " << t->tid; tried_to_patch_syscall_addresses.insert(ip_of_instruction + instruction_length); } diff --git a/src/Monkeypatcher.h b/src/Monkeypatcher.h index d683d9c3925..e43adfea86c 100644 --- a/src/Monkeypatcher.h +++ b/src/Monkeypatcher.h @@ -63,10 +63,12 @@ class Monkeypatcher { * Zero or more mapping operations are also recorded to the trace and must * be replayed. */ - bool try_patch_syscall(RecordTask* t, bool entering_syscall = true); - bool try_patch_syscall_x86ish(RecordTask* t, bool entering_syscall, - SupportedArch arch); - bool try_patch_syscall_aarch64(RecordTask* t, bool entering_syscall); + bool try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry); + bool try_patch_syscall(RecordTask* t, bool entering_syscall, bool &should_retry, remote_code_ptr ip); + + bool try_patch_syscall_x86ish(RecordTask* t, remote_code_ptr ip, bool entering_syscall, + SupportedArch arch, bool &should_retry); + bool try_patch_syscall_aarch64(RecordTask* t, remote_code_ptr ip, bool entering_syscall); /** * Try to patch the trapping instruction that |t| just trapped on. If this @@ -78,7 +80,8 @@ class Monkeypatcher { * be replayed. */ bool try_patch_trapping_instruction(RecordTask* t, size_t instruction_length, - bool before_instruction = true); + bool before_instruction, + bool &should_retry); /** * Replace all extended jumps by syscalls again. Note that we do not try to @@ -161,7 +164,9 @@ class Monkeypatcher { const syscall_patch_hook* find_syscall_hook(RecordTask* t, remote_code_ptr ip, bool entering_syscall, - size_t instruction_length); + size_t instruction_length, + bool &should_retry, + bool &transient_failure); /** * The list of supported syscall patches obtained from the preload diff --git a/src/RecordSession.cc b/src/RecordSession.cc index 4990247913d..36d8b6d9b57 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -1025,13 +1025,32 @@ void RecordSession::desched_state_changed(RecordTask* t) { (uint8_t)1); } +static void retry_syscall_patch(RecordTask* t, remote_code_ptr orig_syscall_ip) +{ + bool should_retry = false; + if (t->vm()->monkeypatcher().try_patch_syscall(t, false, should_retry, orig_syscall_ip)) { + // Syscall was patched. Emit event and continue execution. + LOG(debug) << "Retried patch applied successfully"; + auto ev = Event::patch_syscall(); + ev.PatchSyscall().patch_after_syscall = true; + t->record_event(ev); + } + ASSERT(t, !should_retry); +} + static void syscall_not_restarted(RecordTask* t) { LOG(debug) << " " << t->tid << ": popping abandoned interrupted " << t->ev() << "; pending events:"; if (IS_LOGGING(debug)) { t->log_pending_events(); } + bool should_retry_patch = t->ev().Syscall().should_retry_patch; + remote_code_ptr orig_syscall_ip = t->ev().Syscall().regs.ip(); t->pop_syscall_interruption(); + if (should_retry_patch) { + LOG(debug) << "Retrying deferred syscall patching non-restarted syscall at " << orig_syscall_ip << " (current ip " << t->regs().ip() << ")"; + retry_syscall_patch(t, orig_syscall_ip); + } } /** @@ -1080,15 +1099,12 @@ static void maybe_discard_syscall_interruption(RecordTask* t, intptr_t ret) { if (0 > ret) { syscall_not_restarted(t); } else if (t->arch() == x86 || t->arch() == x86_64) { - // On x86, we would have expected this to get restored to the syscallno. - // Since the syscallno is in a different register on other platforms, this - // assert does not apply. - // We have seen cases where `ret` is `restart_syscall`. - ASSERT(t, syscallno == ret || - is_restart_syscall_syscall(ret, t->ev().Syscall().arch())) - << "Interrupted call was " << t->ev().Syscall().syscall_name() - << " and sigreturn claims to be restarting " - << syscall_name(ret, t->ev().Syscall().arch()); + SupportedArch arch; + if (get_syscall_instruction_arch(t, t->regs().ip(), &arch) && arch == t->arch() && + (syscallno == ret || is_restart_syscall_syscall(ret, t->ev().Syscall().arch()))) { + return; + } + syscall_not_restarted(t); } } @@ -1345,7 +1361,13 @@ void RecordSession::syscall_state_changed(RecordTask* t, * done with it. But if we are, "freeze" it on the * event stack until the execution point where it * might be restarted. */ + bool need_patch_retry = false; + remote_code_ptr orig_syscall_ip; if (!may_restart) { + need_patch_retry = t->ev().Syscall().should_retry_patch; + if (need_patch_retry) { + orig_syscall_ip = t->ev().Syscall().regs.ip(); + } t->pop_syscall(); if (EV_DESCHED == t->ev().type()) { LOG(debug) << " exiting desched critical section"; @@ -1358,17 +1380,9 @@ void RecordSession::syscall_state_changed(RecordTask* t, t->canonicalize_regs(syscall_arch); - if (!may_restart) { - if (t->retry_syscall_patching) { - LOG(debug) << "Retrying deferred syscall patching"; - t->retry_syscall_patching = false; - if (t->vm()->monkeypatcher().try_patch_syscall(t, false)) { - // Syscall was patched. Emit event and continue execution. - auto ev = Event::patch_syscall(); - ev.PatchSyscall().patch_after_syscall = true; - t->record_event(ev); - } - } + if (need_patch_retry) { + LOG(debug) << "Retrying deferred syscall patching at " << orig_syscall_ip << " (current ip " << t->regs().ip() << ")"; + retry_syscall_patch(t, orig_syscall_ip); } } @@ -2061,8 +2075,9 @@ bool RecordSession::process_syscall_entry(RecordTask* t, StepState* step_state, } // Don't ever patch a sigreturn syscall. These can't go through the syscallbuf. + bool should_retry = false; if (!is_sigreturn(t->regs().original_syscallno(), t->arch())) { - if (t->vm()->monkeypatcher().try_patch_syscall(t)) { + if (t->vm()->monkeypatcher().try_patch_syscall(t, true, should_retry)) { // Syscall was patched. Emit event and continue execution. t->record_event(Event::patch_syscall()); return true; @@ -2076,6 +2091,7 @@ bool RecordSession::process_syscall_entry(RecordTask* t, StepState* step_state, } t->push_event(SyscallEvent(t->regs().original_syscallno(), syscall_arch)); + t->ev().Syscall().should_retry_patch = should_retry; } check_initial_task_syscalls(t, step_result); diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 4e2136927d0..a369b82971d 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -200,7 +200,6 @@ RecordTask::RecordTask(RecordSession& session, pid_t _tid, uint32_t serial, did_record_robust_futex_changes(false), waiting_for_reap(false), waiting_for_ptrace_exit(false), - retry_syscall_patching(false), sent_shutdown_kill(false), did_execveat(false), tick_request_override((TicksRequest)0), diff --git a/src/RecordTask.h b/src/RecordTask.h index 18db093643d..59968db2b09 100644 --- a/src/RecordTask.h +++ b/src/RecordTask.h @@ -808,9 +808,6 @@ class RecordTask final : public Task { // be manually run. bool waiting_for_ptrace_exit; - // When exiting a syscall, we should call MonkeyPatcher::try_patch_syscall again. - bool retry_syscall_patching; - // We've sent a SIGKILL during shutdown for this task. bool sent_shutdown_kill; diff --git a/src/preload/rrcalls.h b/src/preload/rrcalls.h index b448495b3ff..8d358667aa8 100644 --- a/src/preload/rrcalls.h +++ b/src/preload/rrcalls.h @@ -73,6 +73,15 @@ * presence of absence of rr. */ #define SYS_rrcall_check_presence (RR_CALL_BASE + 8) + /* + * If `arg1` is `RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED`, + * SYS_rrcall_check_presence returns 0 only if + * 1) The syscallbuf was used to service the syscall + * 2) The syscallbuf is disabled + * + * Otherwise returns ENOTSUP. + */ + #define RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED 1 /** * Requests that rr detach from this process and re-create outside of its * process tree, such that it may run without seccomp. diff --git a/src/preload/syscallbuf.c b/src/preload/syscallbuf.c index baac71df8d3..f596fb8b978 100644 --- a/src/preload/syscallbuf.c +++ b/src/preload/syscallbuf.c @@ -4341,6 +4341,13 @@ case SYS_epoll_pwait: #undef CASE #undef CASE_GENERIC_NONBLOCKING #undef CASE_GENERIC_NONBLOCKING_FD + case SYS_rrcall_check_presence: + if (call->args[0] == RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED && + call->args[1] == 0 && call->args[2] == 0 && call->args[3] == 0 && + call->args[4] == 0 && call->args[5] == 0) { + return 0; + } + // fallthrough default: return traced_raw_syscall(call); } diff --git a/src/record_signal.cc b/src/record_signal.cc index 81f34950c50..738953c8566 100644 --- a/src/record_signal.cc +++ b/src/record_signal.cc @@ -102,6 +102,7 @@ static bool try_handle_trapped_instruction(RecordTask* t, siginfo_t* si) { ASSERT(t, len > 0); Registers r = t->regs(); + bool should_retry_patch = false; if (special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTVCT_EL0 || special_instruction.opcode == SpecialInstOpcode::ARM_MRS_CNTVCTSS_EL0) { if (special_instruction.regno != 31) { @@ -114,7 +115,7 @@ static bool try_handle_trapped_instruction(RecordTask* t, siginfo_t* si) { } else if (special_instruction.opcode == SpecialInstOpcode::X86_RDTSC || special_instruction.opcode == SpecialInstOpcode::X86_RDTSCP) { if (special_instruction.opcode == SpecialInstOpcode::X86_RDTSC && - t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, true)) { + t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, true, should_retry_patch)) { Event ev = Event::patch_syscall(); ev.PatchSyscall().patch_trapping_instruction = true; t->record_event(ev); @@ -141,15 +142,16 @@ static bool try_handle_trapped_instruction(RecordTask* t, siginfo_t* si) { t->set_regs(r); t->record_event(Event::instruction_trap()); - if (t->retry_syscall_patching) { + if (should_retry_patch) { LOG(debug) << "Retrying deferred syscall patching"; - t->retry_syscall_patching = false; - if (t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, false)) { + should_retry_patch = false; + if (t->vm()->monkeypatcher().try_patch_trapping_instruction(t, len, false, should_retry_patch)) { // Instruction was patched. Emit event. auto ev = Event::patch_syscall(); ev.PatchSyscall().patch_after_syscall = true; t->record_event(ev); } + ASSERT(t, !should_retry_patch); } t->push_event(Event::noop()); diff --git a/src/record_syscall.cc b/src/record_syscall.cc index c164b1caa0b..eed3bd271a8 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -5250,10 +5250,22 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, // are zero. bool arguments_are_zero = true; Registers r = t->regs(); - for (int i = 1; i <= 6; ++i) { + int first_zero_reg = 1; + uintptr_t ret = 0; + if (r.arg(1) == RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED) { + if (!!getenv(SYSCALLBUF_ENABLED_ENV_VAR)) { + // Syscallbuf is enabled, but we reached here anyway. Return -ENOTSUP. + ret = (uintptr_t)-ENOTSUP; + } + first_zero_reg = 2; + } + for (int i = first_zero_reg; i <= 6; ++i) { arguments_are_zero &= r.arg(i) == 0; } - syscall_state.emulate_result(arguments_are_zero ? 0 : (uintptr_t)-EINVAL); + if (!arguments_are_zero) + syscall_state.emulate_result((uintptr_t)-EINVAL); + else + syscall_state.emulate_result(ret); syscall_state.expect_errno = ENOSYS; return PREVENT_SWITCH; } diff --git a/src/test/x86/deferred_patch.c b/src/test/x86/deferred_patch.c new file mode 100644 index 00000000000..5db525c682e --- /dev/null +++ b/src/test/x86/deferred_patch.c @@ -0,0 +1,164 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" +#include "rrcalls.h" + +/* + * When we see a syscall patch region in which the syscall is the last instruction, + * we defer patching until we exit the corresponding syscall + * (since we can't assume that we can jump to the top of the patch and re-execute + * the preceeding instructions). This test tests a few corner cases: + * + * 1. When a syscall that had such a deferred patch is interrupted by a signal and the + * signal handler itself makes a syscall, we need to be sure that we don't accidentally + * try to patch that syscall. + * + * 2. If another thread is in the middle of the same syscall we need to make sure that we + * don't accidentally patch it (but do patch it on the exit of the second syscall). + * + * 3. If the signal handler rewrites the signal context to resume execution elsewhere, we still + * want to try patching the original syscall. + */ + +#ifdef __x86_64__ +static __attribute__((noinline)) uintptr_t deferred_patch_syscall( + uintptr_t syscall, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4) { + uintptr_t ret; + register uintptr_t r10 __asm__("r10") = arg4; + register long r8 __asm__("r8") = 0; + register long r9 __asm__("r9") = 0; + __asm__ volatile( + /* Use a syscall sequence for which we have a PATCH_SYSCALL_INSTRUCTION_IS_LAST */ + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t" + "syscall\n\t" + /* Make sure we don't accidentally match any hook pattern with the instructions after */ + "cmp $0x77,%%rax\n\t" + : "=a"(ret) + : "a"(syscall), "D"(arg1), "S"(arg2), "d"(arg3), "r"(r10), "r"(r8), "r"(r9) + : "flags"); + return ret; +} + +static __attribute__((noinline)) uintptr_t deferred_patch_syscall2( + uintptr_t syscall, uintptr_t arg1, uintptr_t arg2, + uintptr_t arg3, uintptr_t arg4) { + uintptr_t ret; + register uintptr_t r10 __asm__("r10") = arg4; + register long r8 __asm__("r8") = 0; + register long r9 __asm__("r9") = 0; + __asm__ volatile( + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t" + "syscall\n\t" + "cmp $0x77,%%rax\n\t" + : "=a"(ret) + : "a"(syscall), "D"(arg1), "S"(arg2), "d"(arg3), "r"(r10), "r"(r8), "r"(r9) + : "flags"); + return ret; +} + +int pipefds[2]; +int pipefds2[2]; +uint8_t byte = 0x1; +volatile uint32_t futex = 0; + +static void handle_alrm(__attribute__((unused)) int sig, + __attribute__((unused)) siginfo_t* si, void* user) { + ucontext_t* ctx = (ucontext_t*)user; + // Skip the syscall and cmp after and have it return 0 + ctx->uc_mcontext.gregs[REG_RIP] += 4; + ctx->uc_mcontext.gregs[REG_RAX] = 0; + futex = 1; + return; +} + +static void handle_usr1(__attribute__((unused)) int sig) { + // Wake up child + test_assert(1 == write(pipefds2[1], &byte, 1)); + // We need this read to desched, so that rr sees a syscall inside the rr page. + // We want to make sure that rr doesn't acccidentally try patching that syscall + // rather than the futex one that we interrupted. + test_assert(1 == read(pipefds[0], &byte, 1)); + futex = 2; + test_assert(1 == write(pipefds2[1], &byte, 1)); + return; +} + +static void handle_usr2(__attribute__((unused)) int sig) { + futex = 3; + return; +} + +static void futex_wait(uintptr_t val) +{ + test_assert((uintptr_t)-EAGAIN == deferred_patch_syscall(SYS_futex, (uintptr_t)&futex, FUTEX_WAIT, val, (uintptr_t)NULL)); +} + +pid_t parent; + +static void *do_thread(void*) { + test_assert(1 == read(pipefds2[0], &byte, 1)); + sched_yield(); + syscall(SYS_tkill, parent, SIGALRM); + test_assert(1 == read(pipefds2[0], &byte, 1)); + sched_yield(); + syscall(SYS_tkill, parent, SIGUSR1); + test_assert(1 == read(pipefds2[0], &byte, 1)); + test_assert(1 == write(pipefds[1], &byte, 1)); + test_assert(1 == read(pipefds2[0], &byte, 1)); + futex_wait(2); + test_assert(futex == 3); + return NULL; +} + +int main(void) { + test_assert(0 == pipe(pipefds)); + test_assert(0 == pipe(pipefds2)); + + parent = sys_gettid(); + + pthread_t t; + + // Setup signal handlers + struct sigaction sa; + sa.sa_sigaction = handle_alrm; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + test_assert(0 == sigaction(SIGALRM, &sa, NULL)); + test_assert(0 == signal(SIGUSR1, handle_usr1)); + test_assert(0 == signal(SIGUSR2, handle_usr2)); + + // Kick off driver thread + pthread_create(&t, NULL, do_thread, NULL); + + // Wait for SIGALARM + test_assert(1 == write(pipefds2[1], &byte, 1)); + test_assert(0 == deferred_patch_syscall2(SYS_futex, (uintptr_t)&futex, FUTEX_WAIT, 0, (uintptr_t)NULL)); + test_assert(futex == 1); + + // Wait for SIGUSR1 + test_assert(1 == write(pipefds2[1], &byte, 1)); + futex_wait(1); + + // Issue SIGUSR2 in thread to let it out of the futex (and let it patch the syscall) + sched_yield(); sched_yield(); + pthread_kill(t, SIGUSR2); + pthread_join(t, NULL); + + // Make sure that the patching actually happened (if the syscallbuf is enabled) + uintptr_t ret = deferred_patch_syscall(SYS_rrcall_check_presence, RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED, 0, 0, 0); + test_assert(ret == (uintptr_t)-ENOSYS || ret == 0); + + ret = deferred_patch_syscall2(SYS_rrcall_check_presence, RRCALL_CHECK_SYSCALLBUF_USED_OR_DISABLED, 0, 0, 0); + test_assert(ret == (uintptr_t)-ENOSYS || ret == 0); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} +#else +int main(void) { + atomic_puts("This test can only be run on x86_64. Skipping..."); + atomic_puts("EXIT-SUCCESS"); + return 77; +} +#endif