Skip to content

Commit

Permalink
pd: 🙏 propagate errors from ACME worker
Browse files Browse the repository at this point in the history
when auto-https is enabled, we spawn a task in the background to handle
https certificate resolution, via `rustls_acme::AcmeState`.

if that task encounters errors, they should be propagated up to the
daemon, so that `pd` does not rapidly retry lookups, potentially hitting
rate-limits and causing service interruptions.

this changes the `pd` entrypoint, binding the [`JoinHandle`] to a
variable and polling upon that future in the `tokio::select` block that
represents the core steady-state event loop of the daemon.

we also update the acme_worker loop to log-and-bail on error, ensuring
that pd exits when the error hits the select loop in pd main.

Co-Authored-By: Conor Schaefer <[email protected]>
(cherry picked from commit adf260b)
  • Loading branch information
cratelyn authored and conorsch committed Jun 25, 2024
1 parent 60dc84f commit bc3c326
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 12 deletions.
28 changes: 17 additions & 11 deletions crates/bin/pd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,23 +145,22 @@ async fn main() -> anyhow::Result<()> {
let make_svc = router.into_make_service();

// Now start the GRPC server, initializing an ACME client to use as a certificate
// resolver if auto-https has been enabled.
macro_rules! spawn_grpc_server {
($server:expr) => {
tokio::task::spawn($server.serve(make_svc))
};
}
// resolver if auto-https has been enabled. if auto-https is not enabled, we will
// instead spawn a future that will never return.
let grpc_server = axum_server::bind(grpc_bind);
let grpc_server = match grpc_auto_https {
let (grpc_server, acme_worker) = match grpc_auto_https {
Some(domain) => {
let (acceptor, acme_worker) =
penumbra_auto_https::axum_acceptor(pd_home, domain, !acme_staging);
// TODO(kate): we should eventually propagate errors from the ACME worker task.
tokio::spawn(acme_worker);
spawn_grpc_server!(grpc_server.acceptor(acceptor))
let acme_worker = tokio::spawn(acme_worker);
let grpc_server =
tokio::task::spawn(grpc_server.acceptor(acceptor).serve(make_svc));
(grpc_server, acme_worker)
}
None => {
spawn_grpc_server!(grpc_server)
let acme_worker = tokio::task::spawn(futures::future::pending());
let grpc_server = tokio::task::spawn(grpc_server.serve(make_svc));
(grpc_server, acme_worker)
}
};

Expand Down Expand Up @@ -228,6 +227,13 @@ async fn main() -> anyhow::Result<()> {
anyhow::anyhow!(msg)
}
)?,

// if the acme worker returns an error, let's propagate it!
x = acme_worker => x?.map_err(|error| {
let msg = format!("acme worker failed: {error}");
tracing::error!("{}", msg);
anyhow::anyhow!(msg)
})?,
};
}

Expand Down
5 changes: 4 additions & 1 deletion crates/util/auto-https/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ where
loop {
match state.next().await {
Some(Ok(ok)) => tracing::debug!("received acme event: {:?}", ok),
Some(Err(err)) => tracing::error!("acme error: {:?}", err),
Some(Err(err)) => {
tracing::error!("acme error: {:?}", err);
anyhow::bail!("exiting due to acme error");
}
None => {
debug_assert!(false, "acme worker unexpectedly reached end-of-stream");
tracing::error!("acme worker unexpectedly reached end-of-stream");
Expand Down

0 comments on commit bc3c326

Please sign in to comment.