diff --git a/src/AutoRemoteSyscalls.cc b/src/AutoRemoteSyscalls.cc
index 7c5ad33b7f4..0fb3c927350 100644
--- a/src/AutoRemoteSyscalls.cc
+++ b/src/AutoRemoteSyscalls.cc
@@ -270,7 +270,11 @@ void AutoRemoteSyscalls::restore_state_to(Task* t) {
     regs.set_arg1(regs.orig_arg1());
     t->set_regs(regs);
     if (t->enter_syscall(true)) {
-      t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+      if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+        // Tracee died unexpectedly, there is nothing more we can do.
+        // Do not restore the status, we want callers to see that the task died.
+        return;
+      }
     }
     regs.set_ip(initial_ip);
     regs.set_syscallno(regs.original_syscallno());
@@ -286,7 +290,11 @@ void AutoRemoteSyscalls::restore_state_to(Task* t) {
     t->set_regs(regs);
     RecordTask* rt = static_cast<RecordTask*>(t);
     while (true) {
-      rt->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS);
+      if (!rt->resume_execution(RESUME_CONT, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+        // Tracee died unexpectedly, there is nothing more we can do.
+        // Do not restore the status, we want callers to see that the task died.
+        return;
+      }
       if (rt->ptrace_event())
         break;
       rt->stash_sig();
@@ -365,17 +373,18 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
   bool from_seccomp = initial_at_seccomp && t->ptrace_event() == PTRACE_EVENT_SECCOMP;
   if (use_singlestep_path && !from_seccomp) {
     while (true) {
-      t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS);
+      if (!t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+        // Tracee was killed, there is nothing more we can do.
+        // Ensure callers see the task death status.
+        restore_wait_status = t->status();
+        return -ESRCH;
+      }
       LOG(debug) << "Used singlestep path; status=" << t->status();
       // When a PTRACE_EVENT_EXIT is returned we don't update registers
       if (t->ip() != callregs.ip()) {
         // We entered the syscall, so stop now
         break;
       }
-      if (t->ptrace_event() == PTRACE_EVENT_EXIT) {
-        // We died, just let it be
-        break;
-      }
       if (t->stop_sig() == SIGTRAP && t->get_siginfo().si_code == TRAP_TRACE) {
         // On aarch64, if we were previously in a syscall-exit stop, continuing
         // with PTRACE_SINGLESTEP will result in incurring a trap upon execution
@@ -389,26 +398,26 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
       ASSERT(t, false) << "Unexpected status " << t->status();
     }
   } else {
-    bool exited = false;
     if (from_seccomp) {
       LOG(debug) << "Skipping enter_syscall - already at seccomp stop";
     } else {
-      exited = !t->enter_syscall(true);
+      if (!t->enter_syscall(true)) {
+        // Tracee was killed, there is nothing more we can do.
+        // Ensure callers see the task death status.
+        restore_wait_status = t->status();
+        return -ESRCH;
+      }
       LOG(debug) << "Used enter_syscall; status=" << t->status();
     }
-    // proceed to syscall exit
-    if (!exited) {
-      t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
-      LOG(debug) << "syscall exit status=" << t->status();
-    }
-  }
-  while (true) {
-    // If the syscall caused the task to exit, just stop now with that status.
-    if (t->seen_ptrace_exit_event() || t->status().reaped()) {
+    if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+      // Tracee was killed, there is nothing more we can do.
+      // Ensure callers see the task death status.
       restore_wait_status = t->status();
-      LOG(debug) << "Task is dying, no status result";
       return -ESRCH;
     }
+    LOG(debug) << "syscall exit status=" << t->status();
+  }
+  while (true) {
     if (t->status().is_syscall() ||
         (t->stop_sig() == SIGTRAP &&
          is_kernel_trap(t->get_siginfo().si_code))) {
@@ -418,19 +427,31 @@ long AutoRemoteSyscalls::syscall_base(int syscallno, Registers& callregs) {
     }
     if (is_clone_syscall(syscallno, t->arch()) &&
         t->clone_syscall_is_complete(&new_tid_, t->arch())) {
-      t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+      if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+        // Tracee was killed, there is nothing more we can do.
+        // Ensure callers see the task death status.
+        restore_wait_status = t->status();
+        return -ESRCH;
+      }
       LOG(debug) << "got clone event; new status=" << t->status();
       continue;
     }
     if (ignore_signal(t)) {
       if (t->regs().syscall_may_restart()) {
         if (!t->enter_syscall(true)) {
-          // We died, just let it be
-          break;
+          // Tracee was killed, there is nothing more we can do.
+          // Ensure callers see the task death status.
+          restore_wait_status = t->status();
+          return -ESRCH;
         }
         LOG(debug) << "signal ignored; restarting syscall, status="
                    << t->status();
-        t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+        if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+          // Tracee was killed, there is nothing more we can do.
+          // Ensure callers see the task death status.
+          restore_wait_status = t->status();
+          return -ESRCH;
+        }
         LOG(debug) << "syscall exit status=" << t->status();
         continue;
       }
diff --git a/src/DiversionSession.cc b/src/DiversionSession.cc
index 53eb5d295a0..93cad2d5e24 100644
--- a/src/DiversionSession.cc
+++ b/src/DiversionSession.cc
@@ -198,16 +198,20 @@ DiversionSession::DiversionResult DiversionSession::diversion_step(
 
   while (true) {
     switch (command) {
-      case RUN_CONTINUE:
+      case RUN_CONTINUE: {
         LOG(debug) << "Continuing to next syscall";
-        t->resume_execution(RESUME_SYSEMU, RESUME_WAIT, RESUME_UNLIMITED_TICKS,
-                            signal_to_deliver);
+        bool ok = t->resume_execution(RESUME_SYSEMU, RESUME_WAIT,
+                                      RESUME_UNLIMITED_TICKS, signal_to_deliver);
+        ASSERT(t, ok) << "Tracee was killed unexpectedly";
         break;
-      case RUN_SINGLESTEP:
+      }
+      case RUN_SINGLESTEP: {
         LOG(debug) << "Stepping to next insn/syscall";
-        t->resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT,
-                           RESUME_UNLIMITED_TICKS, signal_to_deliver);
+        bool ok = t->resume_execution(RESUME_SYSEMU_SINGLESTEP, RESUME_WAIT,
+                                      RESUME_UNLIMITED_TICKS, signal_to_deliver);
+        ASSERT(t, ok) << "Tracee was killed unexpectedly";
         break;
+      }
       default:
         FATAL() << "Illegal run command " << command;
     }
diff --git a/src/Monkeypatcher.cc b/src/Monkeypatcher.cc
index 7f7f29ae717..c34293b1327 100644
--- a/src/Monkeypatcher.cc
+++ b/src/Monkeypatcher.cc
@@ -1284,6 +1284,11 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) {
   // We want our mmap records to be associated with the next (PATCH_SYSCALL)
   // event, not a FLUSH_SYSCALLBUF event.
   t->maybe_flush_syscallbuf();
+  if (!t->is_stopped()) {
+    // Tracee was unexpectedly kicked out of a ptrace-stop by SIGKILL or
+    // equivalent. Abort trying to patch.
+    return false;
+  }
 
   if (arch == aarch64) {
     return try_patch_syscall_aarch64(t, entering_syscall);
diff --git a/src/RecordSession.cc b/src/RecordSession.cc
index 5c823925ddd..e24756899c0 100644
--- a/src/RecordSession.cc
+++ b/src/RecordSession.cc
@@ -381,8 +381,8 @@ void RecordSession::handle_seccomp_traced_syscall(RecordTask* t,
     t->set_regs(regs);
     t->vm()->add_breakpoint(ret_addr, BKPT_INTERNAL);
     while (true) {
-      t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
-      if (t->ptrace_event() == PTRACE_EVENT_EXIT) {
+      if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+        // Tracee exited unexpectedly
         return;
       }
       ASSERT(t, !t->ptrace_event());
@@ -439,7 +439,12 @@ void RecordSession::handle_seccomp_traced_syscall(RecordTask* t,
       Registers r = orig_regs;
       r.set_original_syscallno(syscall_number_for_gettid(t->arch()));
       t->set_regs(r);
-      t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+      if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+        // Tracee died unexpectedly. We did not enter a syscall.
+        // We shouldn't try to resume it now.
+        step_state->continue_type = RecordSession::DONT_CONTINUE;
+        return;
+      }
       t->set_regs(orig_regs);
     }
 
@@ -602,7 +607,8 @@ static void handle_seccomp_trap(RecordTask* t,
     // The tracee is currently in the seccomp ptrace-stop. Advance it to the
     // syscall-exit stop so that when we try to deliver the SIGSYS via
     // PTRACE_SINGLESTEP, that doesn't trigger a SIGTRAP stop.
-    t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+    // If this fails, that's fine, we're not going to deliver the SIGSYS.
+    t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS);
   }
 
   // Don't continue yet. At the next iteration of record_step, if we
@@ -719,6 +725,7 @@ bool RecordSession::handle_ptrace_event(RecordTask** t_ptr,
             // to the PTRACE_EVENT_EXIT. This avoids the race where our
             // PTRACE_CONT might kick us out of the PTRACE_EVENT_EXIT before
             // we can process it.
+            // If this fails because of *another* SIGKILL that's fine.
             t->wait();
             break;
           default:
@@ -761,7 +768,13 @@ bool RecordSession::handle_ptrace_event(RecordTask** t_ptr,
         *t_ptr = t;
         // Tell t that it is actually stopped, because the stop we got is really
         // for this task, not the old dead task.
-        t->did_waitpid(status);
+        if (!t->did_waitpid(status)) {
+          // This is totally untested and almost certainly broken, but if the
+          // task was SIGKILLed out of the EXEC stop then we should probably
+          // just pretend the exec never happened.
+          step_state->continue_type = CONTINUE_SYSCALL;
+          break;
+        }
       }
       t->post_exec();
       t->session().scheduler().did_exit_execve(t);
@@ -929,8 +942,7 @@ static void advance_to_disarm_desched_syscall(RecordTask* t) {
   /* TODO: send this through main loop. */
   /* TODO: mask off signals and avoid this loop. */
   do {
-    t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_UNLIMITED_TICKS);
-    if (t->seen_ptrace_exit_event()) {
+    if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_UNLIMITED_TICKS)) {
       return;
     }
     if (t->status().is_syscall()) {
@@ -968,8 +980,8 @@ static void advance_to_disarm_desched_syscall(RecordTask* t) {
     }
   } while (!t->is_disarm_desched_event_syscall());
 
-  // Exit the syscall.
-  t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+  // Exit the syscall. If this fails, that's fine, we can ignore it.
+  t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS);
 }
 
 /**
@@ -1127,10 +1139,13 @@ void RecordSession::syscall_state_changed(RecordTask* t,
         Registers orig_regs = r;
         r.set_original_syscallno(-1);
         t->set_regs(r);
-        t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
-        ASSERT(t, t->ip() == r.ip());
-        t->set_regs(orig_regs);
-        maybe_trigger_emulated_ptrace_syscall_exit_stop(t);
+        // If this fails because of premature exit, don't mess with the
+        // task anymore.
+        if (t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+          ASSERT(t, t->ip() == r.ip());
+          t->set_regs(orig_regs);
+          maybe_trigger_emulated_ptrace_syscall_exit_stop(t);
+        }
         return;
       }
       last_task_switchable = PREVENT_SWITCH;
@@ -1465,9 +1480,7 @@ static bool preinject_signal(RecordTask* t) {
      */
     LOG(debug) << "    maybe not in signal-stop (status " << t->status()
                << "); doing tgkill(SYSCALLBUF_DESCHED_SIGNAL)";
-    t->move_to_signal_stop();
-
-    if (t->status().ptrace_event() == PTRACE_EVENT_EXIT) {
+    if (!t->move_to_signal_stop()) {
       /* We raced with an exit (e.g. due to a pending SIGKILL). */
       return false;
     }
@@ -1488,7 +1501,7 @@ static bool preinject_signal(RecordTask* t) {
 /**
  * Returns true if the signal should be delivered.
  * Returns false if this signal should not be delivered because another signal
- * occurred during delivery.
+ * occurred during delivery or there was a premature exit.
  * Must call t->stashed_signal_processed() once we're ready to unmask signals.
  */
 static bool inject_handled_signal(RecordTask* t) {
@@ -1506,7 +1519,9 @@ static bool inject_handled_signal(RecordTask* t) {
     // XXX we assume the kernel won't respond by notifying us of a different
     // signal. We don't want to do this with signals blocked because that will
     // save a bogus signal mask in the signal frame.
-    t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS, sig);
+    if (!t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS, sig)) {
+      return false;
+    }
     // Signal injection can change the sigmask due to sa_mask effects, lack of
     // SA_NODEFER, and signal frame construction triggering a synchronous
     // SIGSEGV.
@@ -2549,6 +2564,8 @@ RecordSession::RecordResult RecordSession::record_step() {
 
   StepState step_state(CONTINUE);
 
+  ASSERT(t, t->is_stopped()) << "Somehow we're not stopped here; status="
+    << t->status();
   bool did_enter_syscall;
   if (rescheduled.by_waitpid &&
       handle_ptrace_event(&t, &step_state, &result, &did_enter_syscall)) {
@@ -2561,37 +2578,41 @@ RecordSession::RecordResult RecordSession::record_step() {
     if (did_enter_syscall && t->ev().type() == EV_SYSCALL) {
       syscall_state_changed(t, &step_state);
     }
-  } else if (rescheduled.by_waitpid && handle_signal_event(t, &step_state)) {
-    // Tracee may have exited while processing descheds; handle that.
-    if (handle_ptrace_exit_event(t)) {
-      // t may have been deleted.
-      last_task_switchable = ALLOW_SWITCH;
-      return result;
-    }
   } else {
-    runnable_state_changed(t, &step_state, &result, rescheduled.by_waitpid);
+    ASSERT(t, t->is_stopped()) << "handle_ptrace_event left us in a not-stopped state";
+    if (rescheduled.by_waitpid && handle_signal_event(t, &step_state)) {
+      // Tracee may have exited while processing descheds; handle that.
+      if (handle_ptrace_exit_event(t)) {
+        // t may have been deleted.
+        last_task_switchable = ALLOW_SWITCH;
+        return result;
+      }
+    } else {
+      ASSERT(t, t->is_stopped()) << "handle_signal_event left us in a not-stopped state";
+      runnable_state_changed(t, &step_state, &result, rescheduled.by_waitpid);
 
-    if (result.status != STEP_CONTINUE ||
-        step_state.continue_type == DONT_CONTINUE) {
-      return result;
-    }
+      if (result.status != STEP_CONTINUE ||
+          step_state.continue_type == DONT_CONTINUE) {
+        return result;
+      }
 
-    switch (t->ev().type()) {
-      case EV_DESCHED:
-        desched_state_changed(t);
-        break;
-      case EV_SYSCALL:
-        syscall_state_changed(t, &step_state);
-        break;
-      case EV_SIGNAL:
-      case EV_SIGNAL_DELIVERY:
-        if (signal_state_changed(t, &step_state)) {
-          // t may have been deleted
-          return result;
-        }
-        break;
-      default:
-        break;
+      switch (t->ev().type()) {
+        case EV_DESCHED:
+          desched_state_changed(t);
+          break;
+        case EV_SYSCALL:
+          syscall_state_changed(t, &step_state);
+          break;
+        case EV_SIGNAL:
+        case EV_SIGNAL_DELIVERY:
+          if (signal_state_changed(t, &step_state)) {
+            // t may have been deleted
+            return result;
+          }
+          break;
+        default:
+          break;
+      }
     }
   }
 
diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc
index d82616721f4..209d2151de1 100644
--- a/src/ReplaySession.cc
+++ b/src/ReplaySession.cc
@@ -492,7 +492,8 @@ Completion ReplaySession::cont_syscall_boundary(
   } else {
     ResumeRequest resume_how =
         constraints.is_singlestep() ? RESUME_SYSEMU_SINGLESTEP : RESUME_SYSEMU;
-    t->resume_execution(resume_how, RESUME_WAIT, ticks_request);
+    bool ok = t->resume_execution(resume_how, RESUME_WAIT_NO_EXIT, ticks_request);
+    ASSERT(t, ok) << "Tracee died unexpectedly";
   }
 
   switch (t->stop_sig()) {
@@ -525,7 +526,8 @@ Completion ReplaySession::cont_syscall_boundary(
       syscall_seccomp_ordering_ = PTRACE_SYSCALL_BEFORE_SECCOMP;
     }
     // Eat the following event, either a seccomp or syscall notification
-    t->resume_execution(RESUME_SYSEMU, RESUME_WAIT, ticks_request);
+    bool ok = t->resume_execution(RESUME_SYSEMU, RESUME_WAIT_NO_EXIT, ticks_request);
+    ASSERT(t, ok) << "Tracee died unexpectedly";
   }
 
   t->apply_syscall_entry_regs();
@@ -726,14 +728,16 @@ Completion ReplaySession::continue_or_step(ReplayTask* t,
                                            TicksRequest tick_request,
                                            ResumeRequest resume_how) {
   if (constraints.command == RUN_SINGLESTEP) {
-    t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, tick_request);
+    bool ok = t->resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, tick_request);
+    ASSERT(t, ok) << "Tracee died unexpectedly";
     handle_unrecorded_cpuid_fault(t, constraints);
   } else if (constraints.command == RUN_SINGLESTEP_FAST_FORWARD) {
     fast_forward_status |= fast_forward_through_instruction(
         t, RESUME_SINGLESTEP, constraints.stop_before_states);
     handle_unrecorded_cpuid_fault(t, constraints);
   } else {
-    t->resume_execution(resume_how, RESUME_WAIT, tick_request);
+    bool ok = t->resume_execution(resume_how, RESUME_WAIT_NO_EXIT, tick_request);
+    ASSERT(t, ok) << "Tracee died unexpectedly";
     if (t->stop_sig() == 0) {
       auto type = AddressSpace::rr_page_syscall_from_exit_point(t->arch(), t->ip());
       if (type && type->traced == AddressSpace::UNTRACED) {
@@ -1670,7 +1674,8 @@ static void end_task(ReplayTask* t) {
   r.set_syscallno(syscall_number_for_exit(t->arch()));
   t->set_regs(r);
   // Enter the syscall.
-  t->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS);
+  bool ok = t->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS);
+  ASSERT(t, ok) << "Tracee died unexpectedly";
   if (t->session().done_initial_exec()) {
     ASSERT(t, t->ptrace_event() == PTRACE_EVENT_EXIT);
     t->did_handle_ptrace_exit_event();
@@ -2116,7 +2121,9 @@ void ReplaySession::reattach_tasks(ScopedFd new_tracee_socket, ScopedFd new_trac
   // Get stop events for all tasks
   for (auto& entry : task_map) {
     Task* t = entry.second;
-    t->wait();
+    if (!t->wait()) {
+      FATAL() << "Task " << t->tid << " killed unexpectedly";
+    }
     if (SIGSTOP != t->status().group_stop()) {
       WaitStatus failed_status = t->status();
       FATAL() << "Unexpected stop " << failed_status << " for " << t->tid;
diff --git a/src/Scheduler.cc b/src/Scheduler.cc
index 8c6754b1b0c..42c9f42ceaa 100644
--- a/src/Scheduler.cc
+++ b/src/Scheduler.cc
@@ -229,8 +229,10 @@ bool WaitAggregator::try_wait(RecordTask* t) {
     return false;
   }
   LOGM(debug) << "wait on " << t->tid << " returns " << result.status;
-  t->did_waitpid(result.status);
-  return true;
+  // If did_waitpid fails then the task left the stop prematurely
+  // due to SIGKILL or equivalent, and we should report that we did not get
+  // a stop.
+  return t->did_waitpid(result.status);
 }
 
 bool WaitAggregator::try_wait_exit(RecordTask* t) {
@@ -245,9 +247,11 @@ bool WaitAggregator::try_wait_exit(RecordTask* t) {
   options.consume = false;
   WaitResult result = WaitManager::wait_exit(options);
   switch (result.code) {
-    case WAIT_OK:
-      t->did_waitpid(result.status);
+    case WAIT_OK: {
+      bool ok = t->did_waitpid(result.status);
+      ASSERT(t, ok) << "did_waitpid shouldn't fail for exit statuses";
       return true;
+    }
     case WAIT_NO_STATUS:
       // This can happen when the task is in zap_pid_ns_processes waiting for all tasks
       // in the pid-namespace to exit. It's not in a signal stop, but it's also not
@@ -286,7 +290,7 @@ bool Scheduler::is_task_runnable(RecordTask* t, WaitAggregator& wait_aggregator,
   }
 
   LOGM(debug) << "Task event is " << t->ev();
-  if (!t->may_be_blocked()) {
+  if (!t->may_be_blocked() && (t->is_stopped() || t->was_reaped())) {
     LOGM(debug) << "  " << t->tid << " isn't blocked";
     if (t->schedule_frozen) {
       LOGM(debug) << "  " << t->tid << "  but is frozen";
@@ -303,12 +307,17 @@ bool Scheduler::is_task_runnable(RecordTask* t, WaitAggregator& wait_aggregator,
       t->emulate_SIGCONT();
       // We shouldn't run any user code since there is at least one signal
       // pending.
-      t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
-      *by_waitpid = true;
-      must_run_task = t;
-      LOGM(debug) << "  Got " << t->tid
-                 << " out of emulated stop due to pending SIGCONT";
-      return true;
+      if (t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+        *by_waitpid = true;
+        must_run_task = t;
+        LOGM(debug) << "  Got " << t->tid
+                   << " out of emulated stop due to pending SIGCONT";
+        return true;
+      }
+      // Tracee exited unexpectedly. Reexamine it now in case it has a new
+      // status we can use. Note that we cleared `t->emulated_stop_type`
+      // so we won't end up here again.
+      return is_task_runnable(t, wait_aggregator, by_waitpid);
     } else {
       LOGM(debug) << "  " << t->tid << " is stopped by ptrace or signal";
       // We have no way to detect a SIGCONT coming from outside the tracees.
@@ -346,7 +355,11 @@ bool Scheduler::is_task_runnable(RecordTask* t, WaitAggregator& wait_aggregator,
     // These syscalls never really block but the kernel may report that
     // the task is not stopped yet if we pass WNOHANG. To make them
     // behave predictably, do a blocking wait.
-    t->wait();
+    if (!t->wait()) {
+      // Task got SIGKILL or equivalent while trying to process the stop.
+      // Ignore this event and we'll process the new status later.
+      return false;
+    }
     *by_waitpid = true;
     must_run_task = t;
     LOGM(debug) << "  " << syscall_name(t->ev().Syscall().number, t->arch())
@@ -602,7 +615,11 @@ static RecordTask* find_waited_task(RecordSession& session, pid_t tid, WaitStatu
   }
 
   if (waited->detached_proxy) {
-    waited->did_waitpid(status);
+    if (!waited->did_waitpid(status)) {
+      // Proxy died unexpectedly during the waitpid, just ignore
+      // the stop.
+      return nullptr;
+    }
     pid_t parent_rec_tid = waited->get_parent_pid();
     LOGM(debug) << "    ... but it's a detached process.";
     RecordTask *parent = session.find_task(parent_rec_tid);
@@ -696,7 +713,13 @@ Scheduler::Rescheduled Scheduler::reschedule(Switchable switchable) {
           if (!waited) {
             continue;
           }
-          waited->did_waitpid(status);
+          if (!waited->did_waitpid(status)) {
+            // Tracee exited stop prematurely due to SIGKILL or equivalent.
+            // Pretend the stop didn't happen.
+            continue;
+          }
+          result.by_waitpid = true;
+          LOGM(debug) << "  new status is " << current_->status();
           // Another task just became runnable, we're no longer in unlimited
           // ticks mode
           unlimited_ticks_mode = false;
@@ -714,7 +737,15 @@ Scheduler::Rescheduled Scheduler::reschedule(Switchable switchable) {
           timeout = elapsed > 0.05 ? 0.0 : 0.05 - elapsed;
           LOGM(debug) << "  But that's not our current task...";
         } else {
-          current_->wait(timeout);
+          if (current_->wait(timeout)) {
+            result.by_waitpid = true;
+            LOGM(debug) << "  new status is " << current_->status();
+          } else {
+            // A SIGKILL or equivalent kicked the task out of the stop.
+            // We are now running towards PTRACE_EVENT_EXIT or zombie status.
+            // Even though we're PREVENT_SWITCH, we still have to switch.
+            // The task won't be stopped so this is handled below.
+          }
           break;
         }
       }
@@ -725,11 +756,11 @@ Scheduler::Rescheduled Scheduler::reschedule(Switchable switchable) {
                  strevent(current_->event), 1000.0 * wait_duration);
       }
 #endif
-      result.by_waitpid = true;
-      LOGM(debug) << "  new status is " << current_->status();
     }
-    validate_scheduled_task();
-    return result;
+    if (current_->is_stopped() || current_->was_reaped()) {
+      validate_scheduled_task();
+      return result;
+    }
   }
 
   unlimited_ticks_mode = false;
@@ -891,8 +922,9 @@ Scheduler::Rescheduled Scheduler::reschedule(Switchable switchable) {
                    status.ptrace_event() == PTRACE_EVENT_EXIT ||
                    status.reaped())
             << "Scheduled task should have been blocked";
-        next->did_waitpid(status);
-        if (in_exec_tgid && next->tgid() != in_exec_tgid) {
+        if (!next->did_waitpid(status)) {
+          next = nullptr;
+        } else if (in_exec_tgid && next->tgid() != in_exec_tgid) {
           // Some threadgroup is doing execve and this task isn't in
           // that threadgroup. Don't schedule this task until the execve
           // is complete.
diff --git a/src/Scheduler.h b/src/Scheduler.h
index 7534f8540c2..94e8b16e0b7 100644
--- a/src/Scheduler.h
+++ b/src/Scheduler.h
@@ -106,6 +106,10 @@ class Scheduler {
    * The new current() task is guaranteed to either have already been
    * runnable, or have been made runnable by a waitpid status change (in
    * which case, result.by_waitpid will be true.
+   * 
+   * After this, if Rescheduled::interrupted_by_signal is false,
+   * and there is a new current task, its is_stopped() must
+   * be true.
    */
   struct Rescheduled {
     bool interrupted_by_signal;
diff --git a/src/Task.cc b/src/Task.cc
index 8746d940f38..bb962e84098 100644
--- a/src/Task.cc
+++ b/src/Task.cc
@@ -277,7 +277,8 @@ void Task::finish_emulated_syscall() {
   // Passing RESUME_NO_TICKS here is not only a small performance optimization,
   // but also avoids counting an event if the instruction immediately following
   // a syscall instruction is a conditional branch.
-  resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+  bool ok = resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS);
+  ASSERT(this, ok) << "Tracee exited unexpectedly";
 
   set_regs(r);
   wait_status = WaitStatus();
@@ -888,8 +889,10 @@ bool Task::enter_syscall(bool allow_exit) {
                                        Session::SECCOMP_BEFORE_PTRACE_SYSCALL;
   bool need_seccomp_event = seccomp_bpf_enabled;
   while (need_ptrace_syscall_event || need_seccomp_event) {
-    resume_execution(need_ptrace_syscall_event ? RESUME_SYSCALL : RESUME_CONT,
-                     RESUME_WAIT, RESUME_NO_TICKS);
+    if (!resume_execution(need_ptrace_syscall_event ? RESUME_SYSCALL : RESUME_CONT,
+                          RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+      return false;
+    }
     if (is_ptrace_seccomp_event()) {
       ASSERT(this, need_seccomp_event);
       need_seccomp_event = false;
@@ -934,7 +937,9 @@ bool Task::exit_syscall() {
                            Session::PTRACE_SYSCALL_BEFORE_SECCOMP) &&
                           !is_ptrace_seccomp_event();
   while (true) {
-    resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+    if (!resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+      return false;
+    }
     if (will_see_seccomp && is_ptrace_seccomp_event()) {
       will_see_seccomp = false;
       continue;
@@ -1437,7 +1442,7 @@ void Task::work_around_KNL_string_singlestep_bug() {
   }
 }
 
-void Task::resume_execution(ResumeRequest how, WaitRequest wait_how,
+bool Task::resume_execution(ResumeRequest how, WaitRequest wait_how,
                             TicksRequest tick_period, int sig) {
   // Ensure our HW debug registers are up to date before we execute any code.
   // If this fails because the task died, the code below will detect it.
@@ -1497,7 +1502,6 @@ void Task::resume_execution(ResumeRequest how, WaitRequest wait_how,
     flush_regs();
   }
 
-  bool detected_exit = false;
   if (session().is_recording() && !seen_ptrace_exit_event()) {
     /* There's a nasty race where a stopped task gets woken up by a SIGKILL
      * and advances to the PTRACE_EXIT_EVENT ptrace-stop just before we
@@ -1524,24 +1528,42 @@ void Task::resume_execution(ResumeRequest how, WaitRequest wait_how,
              result.status.ptrace_event() == PTRACE_EVENT_EXIT ||
                  result.status.reaped())
           << "got " << result.status;
-      did_waitpid(result.status);
-      detected_exit = true;
+      LOG(debug) << "Task " << tid << " exited unexpectedly with status "
+          << result.status;
+      if (did_waitpid(result.status)) {
+        // We reached a new stop (or actually reaped the task).
+        // Consider this "resume execution" to be done.
+        return wait_how != RESUME_WAIT_NO_EXIT;
+      }
+      ASSERT(this, result.status.ptrace_event() == PTRACE_EVENT_EXIT)
+        << "did_waitpid should always succeed for reaped() statuses";
+      // The tracee must have been kicked out of PTRACE_EVENT_EXIT
+      // by a SIGKILL (only possible on older kernels).
+      // If we were supposed to wait, we've failed.
+      // We can't wait now because on old kernels tasks can block
+      // indefinitely even after PTRACE_EVENT_EXIT (e.g. due to coredumping).
+      // We don't know what state it's in exactly, but registers haven't changed
+      // since nothing has really happened since the last stop.
+      set_stopped(false);
+      return RESUME_NONBLOCKING == wait_how;
     }
   }
-  if (detected_exit) {
-    LOG(debug) << "Task " << tid << " exited unexpectedly";
-  } else {
-    ASSERT(this, setup_succeeded);
-    ptrace_if_stopped(how, nullptr, (void*)(uintptr_t)sig);
-    // If ptrace_if_stopped failed, it means we're running along the
-    // exit path due to a SIGKILL or equivalent, so just like if it
-    // succeeded, we are stopped and will receive a wait notification.
-    set_stopped(false);
-    extra_registers_known = false;
-    if (RESUME_WAIT == wait_how) {
-      wait();
+  ASSERT(this, setup_succeeded);
+  ptrace_if_stopped(how, nullptr, (void*)(uintptr_t)sig);
+  // If ptrace_if_stopped failed, it means we're running along the
+  // exit path due to a SIGKILL or equivalent, so just like if it
+  // succeeded, we will eventually receive a wait notification.
+  set_stopped(false);
+  extra_registers_known = false;
+  if (RESUME_NONBLOCKING != wait_how) {
+    if (!wait()) {
+      return false;
+    }
+    if (wait_how == RESUME_WAIT_NO_EXIT) {
+      return ptrace_event() != PTRACE_EVENT_EXIT && !was_reaped();
     }
   }
+  return true;
 }
 
 void Task::set_regs(const Registers& regs) {
@@ -1962,7 +1984,7 @@ bool Task::account_for_potential_ptrace_interrupt_stop(WaitStatus status) {
   return false;
 }
 
-void Task::wait(double interrupt_after_elapsed) {
+bool Task::wait(double interrupt_after_elapsed) {
   LOG(debug) << "going into blocking wait for " << tid << " ...";
   ASSERT(this, session().is_recording() || interrupt_after_elapsed == -1);
 
@@ -1994,12 +2016,8 @@ void Task::wait(double interrupt_after_elapsed) {
       /* The process died without us getting a PTRACE_EXIT_EVENT notification.
        * This is possible if the process receives a SIGKILL while in the exit
        * event stop, but before we were able to read the event notification.
-       * We handle this situation by synthesizing an exit event, but otherwise
-       * going about our regular business.
        */
-      LOG(debug) << "Synthesizing PTRACE_EVENT_EXIT for zombie process " << tid;
-      result.status = WaitStatus::for_ptrace_event(PTRACE_EVENT_EXIT);
-      break;
+      return false;
     }
     ASSERT(this, result.code == WAIT_NO_STATUS);
   }
@@ -2010,7 +2028,7 @@ void Task::wait(double interrupt_after_elapsed) {
       LOG(warn) << "  PTRACE_INTERRUPT raced with another event " << result.status;
     }
   }
-  did_waitpid(result.status);
+  return did_waitpid(result.status);
 }
 
 void Task::canonicalize_regs(SupportedArch syscall_arch) {
@@ -2087,11 +2105,11 @@ void Task::set_aarch64_tls_register(uintptr_t val) {
      we tried to set. */
 }
 
-void Task::did_waitpid(WaitStatus status) {
+bool Task::did_waitpid(WaitStatus status) {
   if (is_detached_proxy() &&
       (status.stop_sig() == SIGSTOP || status.stop_sig() == SIGCONT)) {
     LOG(debug) << "Task " << tid << " is a detached proxy, ignoring status " << status;
-    return;
+    return true;
   }
 
   LOG(debug) << "  Task " << tid << " changed status to " << status;
@@ -2110,7 +2128,7 @@ void Task::did_waitpid(WaitStatus status) {
       // here is get out quickly and the higher-level function should go ahead
       // and delete us.
       wait_status = status;
-      return;
+      return true;
     }
     LOG(debug) << "Unexpected process reap for " << tid;
     /* Mark buffers as having been destroyed. We missed our chance
@@ -2146,11 +2164,13 @@ void Task::did_waitpid(WaitStatus status) {
     } else if (status.stop_sig()) {
       if (!ptrace_if_stopped(PTRACE_GETSIGINFO, nullptr, &pending_siginfo)) {
         LOG(debug) << "Unexpected process death getting siginfo for " << tid;
-        status = WaitStatus::for_ptrace_event(PTRACE_EVENT_EXIT);
+        // Let's pretend this stop never happened.
+        set_stopped(false);
+        return false;
       }
     }
 
-    // An unstable exit can cause a task to exit without us having run it, in
+    // A SIGKILL or equivalent can cause a task to exit without us having run it, in
     // which case we might have pending register changes for it that are now
     // irrelevant. In that case we just throw away our register changes and use
     // whatever the kernel now has.
@@ -2201,7 +2221,12 @@ void Task::did_waitpid(WaitStatus status) {
 #endif
       else {
         LOG(debug) << "Unexpected process death for " << tid;
-        status = WaitStatus::for_ptrace_event(PTRACE_EVENT_EXIT);
+        // Let's pretend this stop never happened.
+        // Note that pending_siginfo may have been overwritten above,
+        // but in that case we're going to ignore this signal-stop
+        // so it doesn't matter.
+        set_stopped(false);
+        return false;
       }
     }
   }
@@ -2297,6 +2322,7 @@ void Task::did_waitpid(WaitStatus status) {
   }
 
   did_wait();
+  return true;
 }
 
 template <typename Arch>
@@ -2398,7 +2424,8 @@ Task* Task::clone(CloneReason reason, int flags, remote_ptr<void> stack,
 
   // wait() before trying to do anything that might need to
   // use ptrace to access memory
-  t->wait();
+  bool ok = t->wait();
+  ASSERT(t, ok) << "Task " << t->tid << " killed unexpectedly; not sure how to handle this";
 
   t->post_wait_clone(this, flags);
   if (CLONE_SHARE_THREAD_GROUP & flags) {
@@ -3615,7 +3642,9 @@ long Task::ptrace_seize(pid_t tid, Session& session) {
   sa.sa_flags = 0; // No SA_RESTART, so waitpid() will be interrupted
   sigaction(SIGALRM, &sa, nullptr);
 
-  t->wait();
+  if (!t->wait()) {
+    FATAL() << "Tracee was killed via SIGKILL before reaching SIGSTOP";
+  }
   if (t->ptrace_event() == PTRACE_EVENT_EXIT) {
     t->proceed_to_exit();
     FATAL() << "Tracee died before reaching SIGSTOP\n"
@@ -4017,8 +4046,10 @@ void Task::dup_from(Task *other) {
 
 /**
  * Proceeds until the next system call, which is being executed.
+ * Returns false if did_waitpid failed because the task got SIGKILL
+ * or equivalent.
  */
-static void __ptrace_cont(Task* t, ResumeRequest resume_how,
+static bool __ptrace_cont(Task* t, ResumeRequest resume_how,
                           SupportedArch syscall_arch, int expect_syscallno,
                           int expect_syscallno2 = -1, pid_t new_tid = -1) {
   t->resume_execution(resume_how, RESUME_NONBLOCKING, RESUME_NO_TICKS);
@@ -4041,7 +4072,9 @@ static void __ptrace_cont(Task* t, ResumeRequest resume_how,
       t->hpc.set_tid(new_tid);
       t->tid = new_tid;
     }
-    t->did_waitpid(result.status);
+    if (!t->did_waitpid(result.status)) {
+      return false;
+    }
 
     if (ReplaySession::is_ignored_signal(t->status().stop_sig())) {
       t->resume_execution(resume_how, RESUME_NONBLOCKING, RESUME_NO_TICKS);
@@ -4060,6 +4093,7 @@ static void __ptrace_cont(Task* t, ResumeRequest resume_how,
              current_syscall == expect_syscallno2)
       << "Should be at " << syscall_name(expect_syscallno, syscall_arch)
       << ", but instead at " << syscall_name(current_syscall, syscall_arch);
+  return true;
 }
 
 void Task::did_handle_ptrace_exit_event() {
@@ -4123,9 +4157,10 @@ void Task::os_exec(SupportedArch exec_arch, std::string filename)
    * tid, no matter what tid it was before.
    */
   pid_t tgid = real_tgid();
-  __ptrace_cont(this, RESUME_SYSCALL, arch(), expect_syscallno,
-                syscall_number_for_execve(exec_arch),
-                tgid == tid ? -1 : tgid);
+  bool ok = __ptrace_cont(this, RESUME_SYSCALL, arch(), expect_syscallno,
+                          syscall_number_for_execve(exec_arch),
+                          tgid == tid ? -1 : tgid);
+  ASSERT(this, ok) << "Task " << tid << " got killed while trying to exec";
   LOG(debug) << this->status() << " " << this->regs();
   if (this->regs().syscall_result()) {
     errno = -this->regs().syscall_result();
@@ -4166,7 +4201,7 @@ void Task::tgkill(int sig) {
   ASSERT(this, 0 == syscall(SYS_tgkill, real_tgid(), tid, sig));
 }
 
-void Task::move_to_signal_stop()
+bool Task::move_to_signal_stop()
 {
   LOG(debug) << "    maybe not in signal-stop (status " << status()
              << "); doing tgkill(SYSCALLBUF_DESCHED_SIGNAL)";
@@ -4194,7 +4229,9 @@ void Task::move_to_signal_stop()
     old_ip = old_ip.decrement_by_syscall_insn_length(arch());
   }
   do {
-    resume_execution(RESUME_SINGLESTEP, RESUME_WAIT, RESUME_NO_TICKS);
+    if (!resume_execution(RESUME_SINGLESTEP, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+      return false;
+    }
     ASSERT(this, old_ip == ip())
         << "Singlestep actually advanced when we "
         << "just expected a signal; was at " << old_ip << " now at "
@@ -4202,6 +4239,7 @@ void Task::move_to_signal_stop()
     // Ignore any pending TIME_SLICE_SIGNALs and continue until we get our
     // SYSCALLBUF_DESCHED_SIGNAL.
   } while (stop_sig() == PerfCounters::TIME_SLICE_SIGNAL);
+  return true;
 }
 
 bool Task::should_apply_rseq_abort(EventType event_type, remote_code_ptr* new_ip,
diff --git a/src/Task.h b/src/Task.h
index 8a20bfb071b..62f3a53758d 100644
--- a/src/Task.h
+++ b/src/Task.h
@@ -78,11 +78,14 @@ enum ResumeRequest {
   RESUME_SYSEMU_SINGLESTEP = NativeArch::PTRACE_SYSEMU_SINGLESTEP,
 };
 enum WaitRequest {
+  // Don't wait after resuming.
+  RESUME_NONBLOCKING,
   // After resuming, blocking-waitpid() until tracee status
   // changes.
   RESUME_WAIT,
-  // Don't wait after resuming.
-  RESUME_NONBLOCKING
+  // Like RESUME_WAIT, but we're not expecting a PTRACE_EVENT_EXIT
+  // or reap, so return false also in that case.
+  RESUME_WAIT_NO_EXIT
 };
 enum TicksRequest {
   // We don't expect to see any ticks (though we seem to on the odd buggy
@@ -254,11 +257,17 @@ class Task {
   pid_t pid_of_pidfd(int fd);
 
   /**
-   * Force the wait status of this to |status|, as if
-   * |wait()/try_wait()| had returned it. Call this whenever a waitpid
-   * returned activity for this past.
+   * Records the wait status of this task as |status|, e.g. if
+   * |wait()/try_wait()| has returned it. Call this whenever a waitpid
+   * returned activity for this task.
+   * If this returns false, then the task was kicked out of a ptrace-stop
+   * by SIGKILL or equivalent before we could read registers etc.
+   * We will treat this stop as if it never happened; the caller must
+   * act as if there was no stop.
+   * If `status.reaped()` (i.e. fatal signal or normal exit), this always
+   * returns true.
    */
-  void did_waitpid(WaitStatus status);
+  bool did_waitpid(WaitStatus status);
 
   /**
    * Syscalls have side effects on registers (e.g. setting the flags register).
@@ -575,8 +584,12 @@ class Task {
    * after that many seconds have elapsed.
    *
    * All tracee execution goes through here.
+   *
+   * If `wait_how` == RESUME_WAIT and we don't complete a
+   * did_waitpid() (e.g. because the tracee was SIGKILLed or
+   * equivalent), this returns false.
    */
-  void resume_execution(ResumeRequest how, WaitRequest wait_how,
+  bool resume_execution(ResumeRequest how, WaitRequest wait_how,
                         TicksRequest tick_period, int sig = 0);
 
   /** Return the session this is part of. */
@@ -712,8 +725,12 @@ class Task {
    * with the process in a stopped() state. If interrupt_after_elapsed >= 0,
    * interrupt the task after that many seconds have elapsed. If
    * interrupt_after_elapsed == 0.0, the interrupt will happen immediately.
+   * Returns false if the wait failed because we reached a stop but we got
+   * SIGKILLed (or equivalent) out of it, in which case it is not safe to wait
+   * because that might block indefinitely waiting for us to acknowledge the
+   * PTRACE_EVENT_EXIT of other tasks.
    */
-  void wait(double interrupt_after_elapsed = -1);
+  bool wait(double interrupt_after_elapsed = -1);
 
   /**
    * Currently we don't allow recording across uid changes, so we can
@@ -1079,8 +1096,9 @@ class Task {
   /**
    * Try to move this task to a signal stop by signaling it with the
    * syscallbuf desched signal (which is guaranteed not to be blocked).
+   * Returns false if the task exited unexpectedly.
    */
-  void move_to_signal_stop();
+  bool move_to_signal_stop();
 
   // A map from original table to (potentially detached) clone, to preserve
   // FdTable sharing relationships during a session fork.
diff --git a/src/fast_forward.cc b/src/fast_forward.cc
index 9e77ccdf8f9..1eeeb86dc1e 100644
--- a/src/fast_forward.cc
+++ b/src/fast_forward.cc
@@ -168,7 +168,8 @@ FastForwardStatus fast_forward_through_instruction(Task* t, ResumeRequest how,
 
   remote_code_ptr ip = t->ip();
 
-  t->resume_execution(how, RESUME_WAIT, RESUME_UNLIMITED_TICKS);
+  bool ok = t->resume_execution(how, RESUME_WAIT_NO_EXIT, RESUME_UNLIMITED_TICKS);
+  ASSERT(t, ok) << "Tracee was killed";
   if (t->stop_sig() != SIGTRAP) {
     // we might have stepped into a system call...
     return result;
@@ -310,7 +311,8 @@ FastForwardStatus fast_forward_through_instruction(Task* t, ResumeRequest how,
     // So, disable watchpoints temporarily.
     t->vm()->save_watchpoints();
     t->vm()->remove_all_watchpoints();
-    t->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_UNLIMITED_TICKS);
+    ok = t->resume_execution(RESUME_CONT, RESUME_WAIT_NO_EXIT, RESUME_UNLIMITED_TICKS);
+    ASSERT(t, ok) << "Tracee was killed";
     t->vm()->restore_watchpoints();
     t->vm()->remove_breakpoint(limit_ip, BKPT_INTERNAL);
     result.did_fast_forward = true;
diff --git a/src/record_signal.cc b/src/record_signal.cc
index 75d9197b5cf..4178c064811 100644
--- a/src/record_signal.cc
+++ b/src/record_signal.cc
@@ -518,7 +518,11 @@ static void handle_desched_event(RecordTask* t) {
     // syscall may have re-armed the event.
     disarm_desched_event(t);
 
-    t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_UNLIMITED_TICKS);
+    if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_UNLIMITED_TICKS)) {
+      LOG(debug) << "  (got exit, bailing out)";
+      t->push_event(Event::noop());
+      return;
+    }
 
     if (t->status().is_syscall()) {
       t->apply_syscall_entry_regs();
@@ -537,12 +541,6 @@ static void handle_desched_event(RecordTask* t) {
       // seccomp filter.
       break;
     }
-    if (t->ptrace_event() == PTRACE_EVENT_EXIT) {
-      LOG(debug)
-          << "  (got exit, bailing out)";
-      t->push_event(Event::noop());
-      return;
-    }
 
     // Completely ignore spurious desched signals and
     // signals that aren't going to be delivered to the
diff --git a/src/record_syscall.cc b/src/record_syscall.cc
index 11e02d6778d..442ed9fb7f2 100644
--- a/src/record_syscall.cc
+++ b/src/record_syscall.cc
@@ -3355,7 +3355,10 @@ static Switchable prepare_clone(RecordTask* t, TaskSyscallState& syscall_state)
   }
 
   while (true) {
-    t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS);
+    if (!t->resume_execution(RESUME_SYSCALL, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS)) {
+      // Tracee died unexpectedly during clone. 
+      return ALLOW_SWITCH;
+    }
     // XXX handle stray signals?
     if (t->ptrace_event()) {
       break;
@@ -3366,7 +3369,7 @@ static Switchable prepare_clone(RecordTask* t, TaskSyscallState& syscall_state)
       LOG(debug) << "clone failed, returning "
                  << errno_name(-t->regs().syscall_result_signed());
       syscall_state.emulate_result(t->regs().syscall_result());
-      // clone failed and we're existing the syscall with an error. Reenter
+      // clone failed and we're exiting the syscall with an error. Reenter
       // the syscall so that we're in the same state as the normal execution
       // path.
       t->ev().Syscall().failed_during_preparation = true;
diff --git a/src/replay_syscall.cc b/src/replay_syscall.cc
index 473e5d71331..fdf4e534ecd 100644
--- a/src/replay_syscall.cc
+++ b/src/replay_syscall.cc
@@ -218,7 +218,8 @@ template <typename Arch> static void prepare_clone(ReplayTask* t) {
   Registers entry_regs = r;
 
   // Run; we will be interrupted by PTRACE_EVENT_CLONE/FORK/VFORK.
-  t->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS);
+  bool ok = t->resume_execution(RESUME_CONT, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS);
+  ASSERT(t, ok) << "Tracee was killed";
 
   pid_t new_tid;
   while (!t->clone_syscall_is_complete(&new_tid, Arch::arch())) {
@@ -227,7 +228,8 @@ template <typename Arch> static void prepare_clone(ReplayTask* t) {
     // state to try the syscall again.
     ASSERT(t, t->regs().syscall_result_signed() == -EAGAIN);
     t->set_regs(entry_regs);
-    t->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS);
+    bool ok = t->resume_execution(RESUME_CONT, RESUME_WAIT_NO_EXIT, RESUME_NO_TICKS);
+    ASSERT(t, ok) << "Tracee was killed";
   }
 
   // Get out of the syscall