Skip to content

Commit

Permalink
[nextest-runner] use grace period on receiving Windows Ctrl-C
Browse files Browse the repository at this point in the history
Previously on Windows, we would not do anything at all on receiving Ctrl-C,
letting tests exit on their own.

With this change, in case of Ctrl-C, nextest will apply the same grace period
that it does on Unix. By default, nextest will now wait 10 seconds before
calling `TerminateJobObject` on the test. Like on Unix, a double Ctrl-C will
kill the test immediately.

The behavior in case of timeouts is unchanged -- call `TerminateJobObject`
immediately.
  • Loading branch information
sunshowers committed Dec 14, 2024
1 parent 208d6e5 commit 365f630
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 15 deletions.
13 changes: 12 additions & 1 deletion nextest-runner/src/reporter/displayer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1661,7 +1661,6 @@ impl<'a> TestReporterImpl<'a> {
state: &UnitTerminatingState,
writer: &mut dyn Write,
) -> io::Result<()> {
#[cfg_attr(not(any(unix, test)), expect(unused_variables))]
let UnitTerminatingState {
pid,
time_taken,
Expand Down Expand Up @@ -1707,6 +1706,18 @@ impl<'a> TestReporterImpl<'a> {
"note".style(self.styles.count),
)?;
}
#[cfg(windows)]
UnitTerminateMethod::Wait => {
writeln!(
writer,
"{}: waiting for {} to exit on its own; spent {:.3?}s, will terminate \
job object after another {:.3?}s",
"note".style(self.styles.count),
kind,
waiting_duration.as_secs_f64(),
remaining.as_secs_f64(),
)?;
}
#[cfg(test)]
UnitTerminateMethod::Fake => {
// This is only used in tests.
Expand Down
10 changes: 10 additions & 0 deletions nextest-runner/src/reporter/events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,16 @@ pub enum UnitTerminateMethod {
#[cfg(windows)]
JobObject,

/// The unit is being waited on to exit. A termination signal will be sent
/// if it doesn't exit within the grace period.
///
/// On Windows, this occurs when nextest receives Ctrl-C. In that case, it
/// is assumed that tests will also receive Ctrl-C and exit on their own. If
/// tests do not exit within the grace period configured for them, their
/// corresponding job objects will be terminated.
#[cfg(windows)]
Wait,

/// A fake method used for testing.
#[cfg(test)]
Fake,
Expand Down
1 change: 0 additions & 1 deletion nextest-runner/src/runner/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,6 @@ pub(super) struct UnitContext<'a> {
}

impl<'a> UnitContext<'a> {
#[cfg_attr(not(unix), expect(dead_code))]
pub(super) fn packet(&self) -> &UnitPacket<'a> {
&self.packet
}
Expand Down
124 changes: 111 additions & 13 deletions nextest-runner/src/runner/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@

use crate::{
errors::ConfigureHandleInheritanceError,
runner::{InternalTerminateReason, RunUnitRequest, UnitContext},
reporter::events::{UnitState, UnitTerminateMethod, UnitTerminateReason, UnitTerminatingState},
runner::{
InternalTerminateReason, RunUnitQuery, RunUnitRequest, ShutdownRequest, SignalRequest,
UnitContext,
},
signal::ShutdownEvent,
test_command::ChildAccumulator,
time::StopwatchStart,
};
Expand Down Expand Up @@ -73,21 +78,78 @@ pub(super) fn assign_process_to_job(
}

#[expect(clippy::too_many_arguments)]
pub(super) async fn terminate_child(
_cx: &UnitContext<'_>,
pub(super) async fn terminate_child<'a>(
cx: &UnitContext<'a>,
child: &mut Child,
_child_acc: &mut ChildAccumulator,
child_acc: &mut ChildAccumulator,
reason: InternalTerminateReason,
_stopwatch: &mut StopwatchStart,
_req_rx: &mut UnboundedReceiver<RunUnitRequest<'_>>,
stopwatch: &mut StopwatchStart,
req_rx: &mut UnboundedReceiver<RunUnitRequest<'a>>,
job: Option<&Job>,
_grace_period: Duration,
grace_period: Duration,
) {
// Ignore signal events since Windows propagates them to child processes (this may change if
// we start assigning processes to groups on Windows).
if !matches!(reason, InternalTerminateReason::Timeout) {
return;
}
let Some(pid) = child.id() else { return };
let (term_reason, term_method) = to_terminate_reason_and_method(&reason, grace_period);

let child_exited = match term_method {
UnitTerminateMethod::Wait => {
let waiting_stopwatch = crate::time::stopwatch();

loop {
tokio::select! {
() = child_acc.fill_buf(), if !child_acc.fds.is_done() => {}
_ = child.wait() => {
// The process exited.
break true;
}
recv = req_rx.recv() => {
// The sender stays open longer than the whole loop, and
// the buffer is big enough for all messages ever sent
// through this channel, so a RecvError should never
// happen.
let req = recv.expect("a RecvError should never happen here");

match req {
RunUnitRequest::Signal(SignalRequest::Shutdown(_)) => {
// Receiving a shutdown signal while waiting for
// the process to exit always means kill
// immediately -- go to the next step.
break false;
}
RunUnitRequest::Query(RunUnitQuery::GetInfo(sender)) => {
let waiting_snapshot = waiting_stopwatch.snapshot();
_ = sender.send(
cx.info_response(
UnitState::Terminating(UnitTerminatingState {
pid,
time_taken: stopwatch.snapshot().active,
reason: term_reason,
method: term_method,
waiting_duration: waiting_snapshot.active,
remaining: grace_period
.checked_sub(waiting_snapshot.active)
.unwrap_or_default(),
}),
child_acc.snapshot_in_progress(cx.packet().kind().waiting_on_message()),
)
);
}
}
}
}
}
}
UnitTerminateMethod::JobObject => {
// The process is killed by the job object.
false
}
#[cfg(test)]
UnitTerminateMethod::Fake => {
unreachable!("fake method is only used for reporter tests");
}
};

// In any case, always call TerminateJobObject.
if let Some(job) = job {
let handle = job.handle();
unsafe {
Expand All @@ -96,6 +158,42 @@ pub(super) async fn terminate_child(
_ = TerminateJobObject(handle as _, 1);
}
}

// Start killing the process directly for good measure.
let _ = child.start_kill();
if !child_exited {
let _ = child.start_kill();
}
}

fn to_terminate_reason_and_method(
reason: &InternalTerminateReason,
grace_period: Duration,
) -> (UnitTerminateReason, UnitTerminateMethod) {
match reason {
InternalTerminateReason::Timeout => (
UnitTerminateReason::Timeout,
// The grace period is currently ignored for timeouts --
// TerminateJobObject is immediately called.
UnitTerminateMethod::JobObject,
),
InternalTerminateReason::Signal(req) => (
// The only signals we support on Windows are interrupts.
UnitTerminateReason::Interrupt,
shutdown_terminate_method(*req, grace_period),
),
}
}

fn shutdown_terminate_method(req: ShutdownRequest, grace_period: Duration) -> UnitTerminateMethod {
if grace_period.is_zero() {
return UnitTerminateMethod::JobObject;
}

match req {
// In case of interrupt events, wait for the grace period to elapse
// before terminating the job. We're assuming that if nextest got an
// interrupt, child processes did as well.
ShutdownRequest::Once(ShutdownEvent::Interrupt) => UnitTerminateMethod::Wait,
ShutdownRequest::Twice => UnitTerminateMethod::JobObject,
}
}
4 changes: 4 additions & 0 deletions site/src/docs/design/architecture/signal-handling.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,10 @@ Unlike process groups, job objects form a tree. If something else runs nextest
within a job object and then calls `TerminateJobObject`, both nextest and all
its child processes are terminated.

When a test times out, nextest calls `TerminateJobObject` on the job object
associated with the test immediately. In the future, it would be interesting
to send a Ctrl-C (or maybe a `WM_CLOSE`?) to the test process first.

[job objects]: https://learn.microsoft.com/en-us/windows/win32/procthread/job-objects
[terminate-job-object]: https://docs.microsoft.com/en-us/windows/win32/api/jobapi2/nf-jobapi2-terminatejobobject

Expand Down

0 comments on commit 365f630

Please sign in to comment.