From aa8902f4af916be670091ba979af61f958d54cd8 Mon Sep 17 00:00:00 2001 From: Mateusz Charytoniuk Date: Sat, 23 Nov 2024 13:18:25 +0100 Subject: [PATCH] feat: more meaningful error messages in dashboard when slots are not enabled --- Makefile | 4 ++-- resources/ts/components/Dashboard.tsx | 23 ++++++++++++++++++----- src/agent/monitoring_service.rs | 4 +++- src/balancer/status_update.rs | 7 +++++-- src/balancer/upstream_peer.rs | 14 +++++++++++--- src/llamacpp/llamacpp_client.rs | 18 +++++++++++++++--- src/llamacpp/slots_response.rs | 3 ++- 7 files changed, 56 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 60b3232..fd519e5 100644 --- a/Makefile +++ b/Makefile @@ -52,9 +52,9 @@ esbuild: node_modules run.agent: esbuild cargo run -- agent \ --external-llamacp-addr "127.0.0.1:8081" \ - --local-llamacpp-addr="http://localhost:8081" \ + --local-llamacpp-addr="localhost:8081" \ --local-llamacpp-api-key "test" \ - --management-addr="http://localhost:8095" \ + --management-addr="localhost:8095" \ --name "wohoo" .PHONY: run.balancer diff --git a/resources/ts/components/Dashboard.tsx b/resources/ts/components/Dashboard.tsx index ab85b38..33b1b61 100644 --- a/resources/ts/components/Dashboard.tsx +++ b/resources/ts/components/Dashboard.tsx @@ -9,7 +9,8 @@ const agentSchema = z.object({ agent_name: z.string().nullable(), error: z.string().nullable(), external_llamacpp_addr: z.string(), - is_authorized: z.boolean(), + is_authorized: z.boolean().nullable(), + is_slots_endpoint_enabled: z.boolean().nullable(), last_update: z.object({ nanos_since_epoch: z.number(), secs_since_epoch: z.number(), @@ -32,7 +33,7 @@ const agentsResponseSchema = z.object({ type Agent = z.infer; type AgentsResponse = z.infer; -const TICK_MS = 500; +const TICK_MS = 1000; function formatTimestamp(timestamp: number): string { return new Date(timestamp * 1000).toLocaleString(); @@ -146,7 +147,10 @@ export function Dashboard() { {agents.map(function (agent: Agent) { const hasIssues = - agent.error || !agent.is_authorized || agent.quarantined_until; + agent.error || + true !== agent.is_authorized || + true !== agent.is_slots_endpoint_enabled || + agent.quarantined_until; return ( {agent.error && ( <> -

Error

+

Agent reported an Error

{agent.error}

)} - {!agent.is_authorized && ( + {false === agent.is_authorized && ( <>

Unauthorized

@@ -173,6 +177,15 @@ export function Dashboard() {

)} + {false === agent.is_slots_endpoint_enabled && ( + <> +

Slots endpoint is not enabled

+

+ Probably llama.cpp server is running without the + `--slots` flag. +

+ + )} {agent.quarantined_until && ( <>

diff --git a/src/agent/monitoring_service.rs b/src/agent/monitoring_service.rs index 6b64358..7a33129 100644 --- a/src/agent/monitoring_service.rs +++ b/src/agent/monitoring_service.rs @@ -48,13 +48,15 @@ impl MonitoringService { None, self.external_llamacpp_addr.to_owned(), slots_response.is_authorized, + slots_response.is_slot_endpoint_enabled, slots_response.slots, )), Err(err) => Ok(StatusUpdate::new( self.name.to_owned(), Some(err.to_string()), self.external_llamacpp_addr.to_owned(), - true, + None, + None, vec![], )), } diff --git a/src/balancer/status_update.rs b/src/balancer/status_update.rs index 3b37277..79c40c2 100644 --- a/src/balancer/status_update.rs +++ b/src/balancer/status_update.rs @@ -9,7 +9,8 @@ pub struct StatusUpdate { pub error: Option, pub external_llamacpp_addr: SocketAddr, pub idle_slots_count: usize, - pub is_authorized: bool, + pub is_authorized: Option, + pub is_slots_endpoint_enabled: Option, pub processing_slots_count: usize, slots: Vec, } @@ -19,7 +20,8 @@ impl StatusUpdate { agent_name: Option, error: Option, external_llamacpp_addr: SocketAddr, - is_authorized: bool, + is_authorized: Option, + is_slots_endpoint_enabled: Option, slots: Vec, ) -> Self { let idle_slots_count = slots.iter().filter(|slot| !slot.is_processing).count(); @@ -30,6 +32,7 @@ impl StatusUpdate { external_llamacpp_addr, idle_slots_count, is_authorized, + is_slots_endpoint_enabled, processing_slots_count: slots.len() - idle_slots_count, slots, } diff --git a/src/balancer/upstream_peer.rs b/src/balancer/upstream_peer.rs index 6268b45..9a134a4 100644 --- a/src/balancer/upstream_peer.rs +++ b/src/balancer/upstream_peer.rs @@ -13,7 +13,10 @@ pub struct UpstreamPeer { pub agent_name: Option, pub error: Option, pub external_llamacpp_addr: SocketAddr, - pub is_authorized: bool, + /// None means undetermined, probably due to an error + pub is_authorized: Option, + /// None means undetermined, probably due to an error + pub is_slots_endpoint_enabled: Option, pub last_update: SystemTime, pub quarantined_until: Option, pub slots_idle: usize, @@ -26,7 +29,8 @@ impl UpstreamPeer { agent_name: Option, error: Option, external_llamacpp_addr: SocketAddr, - is_authorized: bool, + is_authorized: Option, + is_slots_endpoint_enabled: Option, slots_idle: usize, slots_processing: usize, ) -> Self { @@ -36,6 +40,7 @@ impl UpstreamPeer { error, external_llamacpp_addr, is_authorized, + is_slots_endpoint_enabled, last_update: SystemTime::now(), quarantined_until: None, slots_idle, @@ -50,6 +55,7 @@ impl UpstreamPeer { status_update.error.to_owned(), status_update.external_llamacpp_addr, status_update.is_authorized, + status_update.is_slots_endpoint_enabled, status_update.idle_slots_count, status_update.processing_slots_count, ) @@ -59,7 +65,7 @@ impl UpstreamPeer { self.slots_idle > 0 && self.quarantined_until.is_none() && self.error.is_none() - && self.is_authorized + && matches!(self.is_authorized, Some(true)) } pub fn release_slot(&mut self) { @@ -72,6 +78,8 @@ impl UpstreamPeer { self.agent_name = status_update.agent_name.to_owned(); self.error = status_update.error.to_owned(); self.external_llamacpp_addr = status_update.external_llamacpp_addr; + self.is_authorized = status_update.is_authorized; + self.is_slots_endpoint_enabled = status_update.is_slots_endpoint_enabled; self.last_update = SystemTime::now(); self.quarantined_until = None; self.slots_idle = status_update.idle_slots_count; diff --git a/src/llamacpp/llamacpp_client.rs b/src/llamacpp/llamacpp_client.rs index 5b1daa2..339463a 100644 --- a/src/llamacpp/llamacpp_client.rs +++ b/src/llamacpp/llamacpp_client.rs @@ -15,10 +15,15 @@ pub struct LlamacppClient { impl LlamacppClient { pub fn new(addr: SocketAddr, api_key: Option) -> Result { let mut builder = reqwest::Client::builder().timeout(Duration::from_secs(3)); + let mut headers = header::HeaderMap::new(); + + headers.insert( + header::ACCEPT, + header::HeaderValue::from_static("application/json"), + ); builder = match api_key { Some(api_key) => { - let mut headers = header::HeaderMap::new(); let mut auth_value = header::HeaderValue::from_str(&format!("Bearer {}", api_key))?; auth_value.set_sensitive(true); @@ -45,11 +50,18 @@ impl LlamacppClient { match response.status() { reqwest::StatusCode::OK => Ok(SlotsResponse { - is_authorized: true, + is_authorized: Some(true), + is_slot_endpoint_enabled: Some(true), slots: response.json::>().await?, }), reqwest::StatusCode::UNAUTHORIZED => Ok(SlotsResponse { - is_authorized: false, + is_authorized: Some(false), + is_slot_endpoint_enabled: None, + slots: vec![], + }), + reqwest::StatusCode::NOT_IMPLEMENTED => Ok(SlotsResponse { + is_authorized: None, + is_slot_endpoint_enabled: Some(false), slots: vec![], }), _ => Err("Unexpected response status".into()), diff --git a/src/llamacpp/slots_response.rs b/src/llamacpp/slots_response.rs index bf74ac6..25598a1 100644 --- a/src/llamacpp/slots_response.rs +++ b/src/llamacpp/slots_response.rs @@ -1,6 +1,7 @@ use crate::llamacpp::slot::Slot; pub struct SlotsResponse { - pub is_authorized: bool, + pub is_authorized: Option, + pub is_slot_endpoint_enabled: Option, pub slots: Vec, }