Skip to content

Commit

Permalink
When Task::did_waitpid can't get registers for a stop because we pr…
Browse files Browse the repository at this point in the history
…ematurely exited the stop, just ignore the stop.

This makes our state tracking more accurate. Before, when a task is booted out of a ptrace-stop via SIGKILL or equivalent while we're reading its state in `Task::did_waitpid`, we would treat the task as as if it were still stopped, even though now it is either running in the kernel towards PTRACE_EVENT_EXIT or reap, or else stopped in a new PTRACE_EVENT_EXIT stop which we will see in the next wait(). We would cover up the discrepancy by changing the stop to a fake PTRACE_EVENT_EXIT. Instead we now "tell the truth" that the task is not known to be stopped and instead we should wait for its next stop.

The downside is that this requires us to propagate the fact that the task is not actually stopped to various callers, including the callers of `Task::resume_execution`. This is annoying, but it's also helpful to consider how these callers should behave when a task is booted out of a ptrace-stop by SIGKILL and is no longer stopped. It's complex and hard to test, but the underlying kernel behavior is the problem.
  • Loading branch information
rocallahan committed Sep 8, 2023
1 parent 1b4371d commit 5970a88
Show file tree
Hide file tree
Showing 15 changed files with 365 additions and 191 deletions.
67 changes: 44 additions & 23 deletions src/AutoRemoteSyscalls.cc
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,11 @@ void AutoRemoteSyscalls::restore_state_to(Task* t) {
regs.set_arg1(regs.orig_arg1());
t->set_regs(regs);
if (t->enter_syscall(true)) {
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee died unexpectedly, there is nothing more we can do.
// Do not restore the status, we want callers to see that the task died.
return;
}
}
regs.set_ip(initial_ip);
regs.set_syscallno(regs.original_syscallno());
Expand All @@ -286,7 +290,11 @@ void AutoRemoteSyscalls::restore_state_to(Task* t) {
t->set_regs(regs);
RecordTask* rt = static_cast<RecordTask*>(t);
while (true) {
rt->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS);
if (!rt->resume_execution(RESUME_CONT, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee died unexpectedly, there is nothing more we can do.
// Do not restore the status, we want callers to see that the task died.
return;
}
if (rt->ptrace_event())
break;
rt->stash_sig();
Expand Down Expand Up @@ -365,17 +373,18 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
bool from_seccomp = initial_at_seccomp && t->ptrace_event() == PTRACE_EVENT_SECCOMP;
if (use_singlestep_path && !from_seccomp) {
while (true) {
t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "Used singlestep path; status=" << t->status();
// When a PTRACE_EVENT_EXIT is returned we don't update registers
if (t->ip() != callregs.ip()) {
// We entered the syscall, so stop now
break;
}
if (t->ptrace_event() == PTRACE_EVENT_EXIT) {
// We died, just let it be
break;
}
if (t->stop_sig() == SIGTRAP && t->get_siginfo().si_code == TRAP_TRACE) {
// On aarch64, if we were previously in a syscall-exit stop, continuing
// with PTRACE_SINGLESTEP will result in incurring a trap upon execution
Expand All @@ -389,26 +398,26 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
ASSERT(t, false) << "Unexpected status " << t->status();
}
} else {
bool exited = false;
if (from_seccomp) {
LOG(debug) << "Skipping enter_syscall - already at seccomp stop";
} else {
exited = !t->enter_syscall(true);
if (!t->enter_syscall(true)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "Used enter_syscall; status=" << t->status();
}
// proceed to syscall exit
if (!exited) {
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
LOG(debug) << "syscall exit status=" << t->status();
}
}
while (true) {
// If the syscall caused the task to exit, just stop now with that status.
if (t->seen_ptrace_exit_event() || t->status().reaped()) {
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
LOG(debug) << "Task is dying, no status result";
return -ESRCH;
}
LOG(debug) << "syscall exit status=" << t->status();
}
while (true) {
if (t->status().is_syscall() ||
(t->stop_sig() == SIGTRAP &&
is_kernel_trap(t->get_siginfo().si_code))) {
Expand All @@ -418,19 +427,31 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
}
if (is_clone_syscall(syscallno, t->arch()) &&
t->clone_syscall_is_complete(&new_tid_, t->arch())) {
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "got clone event; new status=" << t->status();
continue;
}
if (ignore_signal(t)) {
if (t->regs().syscall_may_restart()) {
if (!t->enter_syscall(true)) {
// We died, just let it be
break;
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "signal ignored; restarting syscall, status="
<< t->status();
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "syscall exit status=" << t->status();
continue;
}
Expand Down
16 changes: 10 additions & 6 deletions src/DiversionSession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,20 @@ DiversionSession::DiversionResult DiversionSession::diversion_step(

while (true) {
switch (command) {
case RUN_CONTINUE:
case RUN_CONTINUE: {
LOG(debug) << "Continuing to next syscall";
t->resume_execution(RESUME_SYSEMU, RESUME_WAIT, RESUME_UNLIMITED_TICKS,
signal_to_deliver);
bool ok = t->resume_execution(RESUME_SYSEMU, RESUME_WAIT,
RESUME_UNLIMITED_TICKS, signal_to_deliver);
ASSERT(t, ok) << "Tracee was killed unexpectedly";
break;
case RUN_SINGLESTEP:
}
case RUN_SINGLESTEP: {
LOG(debug) << "Stepping to next insn/syscall";
t->resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT,
RESUME_UNLIMITED_TICKS, signal_to_deliver);
bool ok = t->resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT,
RESUME_UNLIMITED_TICKS, signal_to_deliver);
ASSERT(t, ok) << "Tracee was killed unexpectedly";
break;
}
default:
FATAL() << "Illegal run command " << command;
}
Expand Down
9 changes: 8 additions & 1 deletion src/Monkeypatcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1218,7 +1218,9 @@ bool Monkeypatcher::try_patch_syscall_aarch64(RecordTask* t, bool entering_sysca
auto success = patch_syscall_with_hook(*this, t, syscall_hooks[0], ip, 4, 0);
if (!success && entering_syscall) {
// Need to reenter the syscall to undo exit_syscall_and_prepare_restart
t->enter_syscall();
if (!t->enter_syscall()) {
return false;
}
}

if (!success) {
Expand Down Expand Up @@ -1284,6 +1286,11 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) {
// We want our mmap records to be associated with the next (PATCH_SYSCALL)
// event, not a FLUSH_SYSCALLBUF event.
t->maybe_flush_syscallbuf();
if (!t->is_stopped()) {
// Tracee was unexpectedly kicked out of a ptrace-stop by SIGKILL or
// equivalent. Abort trying to patch.
return false;
}

if (arch == aarch64) {
return try_patch_syscall_aarch64(t, entering_syscall);
Expand Down
Loading

0 comments on commit 5970a88

Please sign in to comment.