Skip to content

Commit

Permalink
When Task::did_waitpid can't get registers for a stop because we pr…
Browse files Browse the repository at this point in the history
…ematurely exited the stop, just ignore the stop

This makes our state tracking more accurate. Before, when a task is booted out of a ptrace-stop via SIGKILL or equivalent while we're reading its state in `Task::did_waitpid`, we would treat the task as as if it were still stopped, even though now it is either running in the kernel towards PTRACE_EVENT_EXIT or reap, or else stopped in a new PTRACE_EVENT_EXIT stop which we will see in the next wait(). We would cover up the discrepancy by changing the stop to a fake PTRACE_EVENT_EXIT. Instead we now "tell the truth" that the task is not known to be stopped and instead we should wait for its next stop.

The downside is that this requires us to propagate the fact that the task is not actually stopped to various callers, including the callers of `Task::resume_execution`. This is annoying, but it's also helpful to consider how these callers should behave when a task is booted out of a ptrace-stop by SIGKILL and is no longer stopped. It's complex and hard to test, but the underlying kernel behavior is the problem.
  • Loading branch information
rocallahan committed Sep 4, 2023
1 parent 368c7a2 commit 4392bfb
Show file tree
Hide file tree
Showing 13 changed files with 321 additions and 166 deletions.
67 changes: 44 additions & 23 deletions src/AutoRemoteSyscalls.cc
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,11 @@ void AutoRemoteSyscalls::restore_state_to(Task* t) {
regs.set_arg1(regs.orig_arg1());
t->set_regs(regs);
if (t->enter_syscall(true)) {
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee died unexpectedly, there is nothing more we can do.
// Do not restore the status, we want callers to see that the task died.
return;
}
}
regs.set_ip(initial_ip);
regs.set_syscallno(regs.original_syscallno());
Expand All @@ -286,7 +290,11 @@ void AutoRemoteSyscalls::restore_state_to(Task* t) {
t->set_regs(regs);
RecordTask* rt = static_cast<RecordTask*>(t);
while (true) {
rt->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS);
if (!rt->resume_execution(RESUME_CONT, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee died unexpectedly, there is nothing more we can do.
// Do not restore the status, we want callers to see that the task died.
return;
}
if (rt->ptrace_event())
break;
rt->stash_sig();
Expand Down Expand Up @@ -365,17 +373,18 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
bool from_seccomp = initial_at_seccomp && t->ptrace_event() == PTRACE_EVENT_SECCOMP;
if (use_singlestep_path && !from_seccomp) {
while (true) {
t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "Used singlestep path; status=" << t->status();
// When a PTRACE_EVENT_EXIT is returned we don't update registers
if (t->ip() != callregs.ip()) {
// We entered the syscall, so stop now
break;
}
if (t->ptrace_event() == PTRACE_EVENT_EXIT) {
// We died, just let it be
break;
}
if (t->stop_sig() == SIGTRAP && t->get_siginfo().si_code == TRAP_TRACE) {
// On aarch64, if we were previously in a syscall-exit stop, continuing
// with PTRACE_SINGLESTEP will result in incurring a trap upon execution
Expand All @@ -389,26 +398,26 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
ASSERT(t, false) << "Unexpected status " << t->status();
}
} else {
bool exited = false;
if (from_seccomp) {
LOG(debug) << "Skipping enter_syscall - already at seccomp stop";
} else {
exited = !t->enter_syscall(true);
if (!t->enter_syscall(true)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "Used enter_syscall; status=" << t->status();
}
// proceed to syscall exit
if (!exited) {
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
LOG(debug) << "syscall exit status=" << t->status();
}
}
while (true) {
// If the syscall caused the task to exit, just stop now with that status.
if (t->seen_ptrace_exit_event() || t->status().reaped()) {
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
LOG(debug) << "Task is dying, no status result";
return -ESRCH;
}
LOG(debug) << "syscall exit status=" << t->status();
}
while (true) {
if (t->status().is_syscall() ||
(t->stop_sig() == SIGTRAP &&
is_kernel_trap(t->get_siginfo().si_code))) {
Expand All @@ -418,19 +427,31 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
}
if (is_clone_syscall(syscallno, t->arch()) &&
t->clone_syscall_is_complete(&new_tid_, t->arch())) {
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "got clone event; new status=" << t->status();
continue;
}
if (ignore_signal(t)) {
if (t->regs().syscall_may_restart()) {
if (!t->enter_syscall(true)) {
// We died, just let it be
break;
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "signal ignored; restarting syscall, status="
<< t->status();
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee was killed, there is nothing more we can do.
// Ensure callers see the task death status.
restore_wait_status = t->status();
return -ESRCH;
}
LOG(debug) << "syscall exit status=" << t->status();
continue;
}
Expand Down
16 changes: 10 additions & 6 deletions src/DiversionSession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,20 @@ DiversionSession::DiversionResult DiversionSession::diversion_step(

while (true) {
switch (command) {
case RUN_CONTINUE:
case RUN_CONTINUE: {
LOG(debug) << "Continuing to next syscall";
t->resume_execution(RESUME_SYSEMU, RESUME_WAIT, RESUME_UNLIMITED_TICKS,
signal_to_deliver);
bool ok = t->resume_execution(RESUME_SYSEMU, RESUME_WAIT,
RESUME_UNLIMITED_TICKS, signal_to_deliver);
ASSERT(t, ok) << "Tracee was killed unexpectedly";
break;
case RUN_SINGLESTEP:
}
case RUN_SINGLESTEP: {
LOG(debug) << "Stepping to next insn/syscall";
t->resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT,
RESUME_UNLIMITED_TICKS, signal_to_deliver);
bool ok = t->resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT,
RESUME_UNLIMITED_TICKS, signal_to_deliver);
ASSERT(t, ok) << "Tracee was killed unexpectedly";
break;
}
default:
FATAL() << "Illegal run command " << command;
}
Expand Down
5 changes: 5 additions & 0 deletions src/Monkeypatcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,11 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) {
// We want our mmap records to be associated with the next (PATCH_SYSCALL)
// event, not a FLUSH_SYSCALLBUF event.
t->maybe_flush_syscallbuf();
if (!t->is_stopped()) {
// Tracee was unexpectedly kicked out of a ptrace-stop by SIGKILL or
// equivalent. Abort trying to patch.
return false;
}

if (arch == aarch64) {
return try_patch_syscall_aarch64(t, entering_syscall);
Expand Down
113 changes: 67 additions & 46 deletions src/RecordSession.cc
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,8 @@ void RecordSession::handle_seccomp_traced_syscall(RecordTask* t,
t->set_regs(regs);
t->vm()->add_breakpoint(ret_addr, BKPT_INTERNAL);
while (true) {
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
if (t->ptrace_event() == PTRACE_EVENT_EXIT) {
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee exited unexpectedly
return;
}
ASSERT(t, !t->ptrace_event());
Expand Down Expand Up @@ -439,7 +439,12 @@ void RecordSession::handle_seccomp_traced_syscall(RecordTask* t,
Registers r = orig_regs;
r.set_original_syscallno(syscall_number_for_gettid(t->arch()));
t->set_regs(r);
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
// Tracee died unexpectedly. We did not enter a syscall.
// We shouldn't try to resume it now.
step_state->continue_type = RecordSession::DONT_CONTINUE;
return;
}
t->set_regs(orig_regs);
}

Expand Down Expand Up @@ -602,7 +607,8 @@ static void handle_seccomp_trap(RecordTask* t,
// The tracee is currently in the seccomp ptrace-stop. Advance it to the
// syscall-exit stop so that when we try to deliver the SIGSYS via
// PTRACE_SINGLESTEP, that doesn't trigger a SIGTRAP stop.
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
// If this fails, that's fine, we're not going to deliver the SIGSYS.
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS);
}

// Don't continue yet. At the next iteration of record_step, if we
Expand Down Expand Up @@ -719,6 +725,7 @@ bool RecordSession::handle_ptrace_event(RecordTask** t_ptr,
// to the PTRACE_EVENT_EXIT. This avoids the race where our
// PTRACE_CONT might kick us out of the PTRACE_EVENT_EXIT before
// we can process it.
// If this fails because of *another* SIGKILL that's fine.
t->wait();
break;
default:
Expand Down Expand Up @@ -761,7 +768,13 @@ bool RecordSession::handle_ptrace_event(RecordTask** t_ptr,
*t_ptr = t;
// Tell t that it is actually stopped, because the stop we got is really
// for this task, not the old dead task.
t->did_waitpid(status);
if (!t->did_waitpid(status)) {
// This is totally untested and almost certainly broken, but if the
// task was SIGKILLed out of the EXEC stop then we should probably
// just pretend the exec never happened.
step_state->continue_type = CONTINUE_SYSCALL;
break;
}
}
t->post_exec();
t->session().scheduler().did_exit_execve(t);
Expand Down Expand Up @@ -929,8 +942,7 @@ static void advance_to_disarm_desched_syscall(RecordTask* t) {
/* TODO: send this through main loop. */
/* TODO: mask off signals and avoid this loop. */
do {
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_UNLIMITED_TICKS);
if (t->seen_ptrace_exit_event()) {
if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_UNLIMITED_TICKS)) {
return;
}
if (t->status().is_syscall()) {
Expand Down Expand Up @@ -968,8 +980,8 @@ static void advance_to_disarm_desched_syscall(RecordTask* t) {
}
} while (!t->is_disarm_desched_event_syscall());

// Exit the syscall.
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
// Exit the syscall. If this fails, that's fine, we can ignore it.
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS);
}

/**
Expand Down Expand Up @@ -1127,10 +1139,13 @@ void RecordSession::syscall_state_changed(RecordTask* t,
Registers orig_regs = r;
r.set_original_syscallno(-1);
t->set_regs(r);
t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
ASSERT(t, t->ip() == r.ip());
t->set_regs(orig_regs);
maybe_trigger_emulated_ptrace_syscall_exit_stop(t);
// If this fails because of premature exit, don't mess with the
// task anymore.
if (t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
ASSERT(t, t->ip() == r.ip());
t->set_regs(orig_regs);
maybe_trigger_emulated_ptrace_syscall_exit_stop(t);
}
return;
}
last_task_switchable = PREVENT_SWITCH;
Expand Down Expand Up @@ -1465,9 +1480,7 @@ static bool preinject_signal(RecordTask* t) {
*/
LOG(debug) << " maybe not in signal-stop (status " << t->status()
<< "); doing tgkill(SYSCALLBUF_DESCHED_SIGNAL)";
t->move_to_signal_stop();

if (t->status().ptrace_event() == PTRACE_EVENT_EXIT) {
if (!t->move_to_signal_stop()) {
/* We raced with an exit (e.g. due to a pending SIGKILL). */
return false;
}
Expand All @@ -1488,7 +1501,7 @@ static bool preinject_signal(RecordTask* t) {
/**
* Returns true if the signal should be delivered.
* Returns false if this signal should not be delivered because another signal
* occurred during delivery.
* occurred during delivery or there was a premature exit.
* Must call t->stashed_signal_processed() once we're ready to unmask signals.
*/
static bool inject_handled_signal(RecordTask* t) {
Expand All @@ -1506,7 +1519,9 @@ static bool inject_handled_signal(RecordTask* t) {
// XXX we assume the kernel won't respond by notifying us of a different
// signal. We don't want to do this with signals blocked because that will
// save a bogus signal mask in the signal frame.
t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS, sig);
if (!t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS, sig)) {
return false;
}
// Signal injection can change the sigmask due to sa_mask effects, lack of
// SA_NODEFER, and signal frame construction triggering a synchronous
// SIGSEGV.
Expand Down Expand Up @@ -2549,6 +2564,8 @@ RecordSession::RecordResult RecordSession::record_step() {

StepState step_state(CONTINUE);

ASSERT(t, t->is_stopped()) << "Somehow we're not stopped here; status="
<< t->status();
bool did_enter_syscall;
if (rescheduled.by_waitpid &&
handle_ptrace_event(&t, &step_state, &result, &did_enter_syscall)) {
Expand All @@ -2561,37 +2578,41 @@ RecordSession::RecordResult RecordSession::record_step() {
if (did_enter_syscall && t->ev().type() == EV_SYSCALL) {
syscall_state_changed(t, &step_state);
}
} else if (rescheduled.by_waitpid && handle_signal_event(t, &step_state)) {
// Tracee may have exited while processing descheds; handle that.
if (handle_ptrace_exit_event(t)) {
// t may have been deleted.
last_task_switchable = ALLOW_SWITCH;
return result;
}
} else {
runnable_state_changed(t, &step_state, &result, rescheduled.by_waitpid);
ASSERT(t, t->is_stopped()) << "handle_ptrace_event left us in a not-stopped state";
if (rescheduled.by_waitpid && handle_signal_event(t, &step_state)) {
// Tracee may have exited while processing descheds; handle that.
if (handle_ptrace_exit_event(t)) {
// t may have been deleted.
last_task_switchable = ALLOW_SWITCH;
return result;
}
} else {
ASSERT(t, t->is_stopped()) << "handle_signal_event left us in a not-stopped state";
runnable_state_changed(t, &step_state, &result, rescheduled.by_waitpid);

if (result.status != STEP_CONTINUE ||
step_state.continue_type == DONT_CONTINUE) {
return result;
}
if (result.status != STEP_CONTINUE ||
step_state.continue_type == DONT_CONTINUE) {
return result;
}

switch (t->ev().type()) {
case EV_DESCHED:
desched_state_changed(t);
break;
case EV_SYSCALL:
syscall_state_changed(t, &step_state);
break;
case EV_SIGNAL:
case EV_SIGNAL_DELIVERY:
if (signal_state_changed(t, &step_state)) {
// t may have been deleted
return result;
}
break;
default:
break;
switch (t->ev().type()) {
case EV_DESCHED:
desched_state_changed(t);
break;
case EV_SYSCALL:
syscall_state_changed(t, &step_state);
break;
case EV_SIGNAL:
case EV_SIGNAL_DELIVERY:
if (signal_state_changed(t, &step_state)) {
// t may have been deleted
return result;
}
break;
default:
break;
}
}
}

Expand Down
Loading

0 comments on commit 4392bfb

Please sign in to comment.