Skip to content

Commit

Permalink
Improve logging on changes in a compute's status
Browse files Browse the repository at this point in the history
I'm trying to debug a situation with the LR benchmark publisher not
being in the correct state. This should aid in debugging, while just
being generally useful.

PR: #9265
Signed-off-by: Tristan Partin <[email protected]>
  • Loading branch information
tristan957 authored Oct 7, 2024
1 parent 99d4c18 commit 6eba29c
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 16 deletions.
3 changes: 1 addition & 2 deletions compute_tools/src/bin/compute_ctl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,7 @@ fn start_postgres(
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
// We got all we need, update the state.
let mut state = compute.state.lock().unwrap();
state.status = ComputeStatus::Init;
compute.state_changed.notify_all();
state.set_status(ComputeStatus::Init, &compute.state_changed);

info!(
"running compute with features: {:?}",
Expand Down
19 changes: 14 additions & 5 deletions compute_tools/src/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ impl ComputeState {
metrics: ComputeMetrics::default(),
}
}

pub fn set_status(&mut self, status: ComputeStatus, state_changed: &Condvar) {
let prev = self.status;
info!("Changing compute status from {} to {}", prev, status);
self.status = status;
state_changed.notify_all();
}

pub fn set_failed_status(&mut self, err: anyhow::Error, state_changed: &Condvar) {
self.error = Some(format!("{err:?}"));
self.set_status(ComputeStatus::Failed, state_changed);
}
}

impl Default for ComputeState {
Expand Down Expand Up @@ -303,15 +315,12 @@ impl ComputeNode {

pub fn set_status(&self, status: ComputeStatus) {
let mut state = self.state.lock().unwrap();
state.status = status;
self.state_changed.notify_all();
state.set_status(status, &self.state_changed);
}

pub fn set_failed_status(&self, err: anyhow::Error) {
let mut state = self.state.lock().unwrap();
state.error = Some(format!("{err:?}"));
state.status = ComputeStatus::Failed;
self.state_changed.notify_all();
state.set_failed_status(err, &self.state_changed);
}

pub fn get_status(&self) -> ComputeStatus {
Expand Down
3 changes: 1 addition & 2 deletions compute_tools/src/configurator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
// Re-check the status after waking up
if state.status == ComputeStatus::ConfigurationPending {
info!("got configuration request");
state.status = ComputeStatus::Configuration;
compute.state_changed.notify_all();
state.set_status(ComputeStatus::Configuration, &compute.state_changed);
drop(state);

let mut new_status = ComputeStatus::Failed;
Expand Down
14 changes: 7 additions & 7 deletions compute_tools/src/http/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,7 @@ async fn handle_configure_request(
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.pspec = Some(parsed_spec);
state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
drop(state);
info!("set new spec and notified waiters");
}
Expand Down Expand Up @@ -362,15 +361,15 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
}
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!(
"invalid compute status for termination request: {:?}",
state.status.clone()
"invalid compute status for termination request: {}",
state.status
);
return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.status = ComputeStatus::TerminationPending;
compute.state_changed.notify_all();
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
drop(state);
}

forward_termination_signal();
info!("sent signal and notified waiters");

Expand All @@ -384,7 +383,8 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
while state.status != ComputeStatus::Terminated {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Terminated, current status: {:?}",
"waiting for compute to become {}, current status: {:?}",
ComputeStatus::Terminated,
state.status
);
}
Expand Down
17 changes: 17 additions & 0 deletions libs/compute_api/src/responses.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
use std::fmt::Display;

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize, Serializer};

Expand Down Expand Up @@ -58,6 +60,21 @@ pub enum ComputeStatus {
Terminated,
}

impl Display for ComputeStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ComputeStatus::Empty => f.write_str("empty"),
ComputeStatus::ConfigurationPending => f.write_str("configuration-pending"),
ComputeStatus::Init => f.write_str("init"),
ComputeStatus::Running => f.write_str("running"),
ComputeStatus::Configuration => f.write_str("configuration"),
ComputeStatus::Failed => f.write_str("failed"),
ComputeStatus::TerminationPending => f.write_str("termination-pending"),
ComputeStatus::Terminated => f.write_str("terminated"),
}
}
}

fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
Expand Down

1 comment on commit 6eba29c

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5173 tests run: 4956 passed, 0 failed, 217 skipped (full report)


Flaky tests (1)

Postgres 17

Code coverage* (full report)

  • functions: 31.4% (7510 of 23945 functions)
  • lines: 49.6% (60300 of 121621 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
6eba29c at 2024-10-07T19:07:02.009Z :recycle:

Please sign in to comment.