Skip to content

Commit

Permalink
feat(phoenix-service): add geo to node alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
blombern committed Sep 3, 2024
1 parent ecb398a commit 81a25f5
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 7 deletions.
106 changes: 101 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ redis = { version = "0.22.1", features = ["aio", "tokio-comp"] }
reqwest = { version = "0.11", features = ["json", "gzip"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_with = "3.5.1"
sqlx = { version = "0.8.0", features = [
"chrono",
"postgres",
Expand Down
34 changes: 34 additions & 0 deletions src/phoenix/env.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
use std::{fmt, str};

use lazy_static::lazy_static;
use reqwest::Url;
use serde::Deserialize;
use serde_with::{serde_as, DisplayFromStr};

use crate::env::{deserialize_network, deserialize_urls, get_app_config, Env, Network};

#[serde_as]
#[derive(Deserialize)]
pub struct AppConfig {
#[serde(default = "default_wait")]
Expand All @@ -15,6 +19,8 @@ pub struct AppConfig {
/// Skip global checks in `run_ops_monitors` and only check for beacon/sim node status.
#[serde(default)]
pub ff_node_check_only: bool,
#[serde_as(as = "DisplayFromStr")]
pub geo: Geo,
pub loki_url: String,
/// Minimum number of missed slots per check interval to trigger an alert
#[serde(default = "default_missed_slots_alert_threshold")]
Expand Down Expand Up @@ -79,6 +85,34 @@ fn default_missed_slots_alert_threshold() -> i64 {
3
}

/// Auction geography
#[allow(clippy::upper_case_acronyms)]
pub enum Geo {
RBX,
VIN,
}

impl fmt::Display for Geo {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let str = match &self {
Geo::RBX => "rbx",
Geo::VIN => "vin",
};
write!(f, "{}", str)
}
}

impl str::FromStr for Geo {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"rbx" => Ok(Geo::RBX),
"vin" => Ok(Geo::VIN),
_ => Err(format!("invalid auction geo: {}", s)),
}
}
}

lazy_static! {
pub static ref APP_CONFIG: AppConfig = get_app_config();
}
8 changes: 6 additions & 2 deletions src/phoenix/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,19 @@ impl NodeAlarm {

async fn fire_age_over_limit(&mut self, name: &str) {
let message = format!(
"{} hasn't updated for more than {} seconds",
"{} hasn't updated for more than {} seconds on {}",
name,
PHOENIX_MAX_LIFESPAN.num_seconds(),
APP_CONFIG.geo
);
self.alarm.fire(&message, &AlarmType::Opsgenie).await;
}

async fn fire_num_unsynced_nodes(&mut self, name: &str, num_unsynced_nodes: usize) {
let message = format!("{} has {} unsynced instances", name, num_unsynced_nodes);
let message = format!(
"{} has {} unsynced instances on {}",
name, num_unsynced_nodes, APP_CONFIG.geo
);

if num_unsynced_nodes >= APP_CONFIG.unsynced_nodes_threshold_og_alert {
self.alarm.fire(&message, &AlarmType::Opsgenie).await;
Expand Down

0 comments on commit 81a25f5

Please sign in to comment.