Skip to content

Commit

Permalink
feat: more meaningful error messages in dashboard when slots are not …
Browse files Browse the repository at this point in the history
…enabled
  • Loading branch information
mcharytoniuk committed Nov 23, 2024
1 parent cb779d0 commit aa8902f
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 17 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ esbuild: node_modules
run.agent: esbuild
cargo run -- agent \
--external-llamacp-addr "127.0.0.1:8081" \
--local-llamacpp-addr="http://localhost:8081" \
--local-llamacpp-addr="localhost:8081" \
--local-llamacpp-api-key "test" \
--management-addr="http://localhost:8095" \
--management-addr="localhost:8095" \
--name "wohoo"

.PHONY: run.balancer
Expand Down
23 changes: 18 additions & 5 deletions resources/ts/components/Dashboard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ const agentSchema = z.object({
agent_name: z.string().nullable(),
error: z.string().nullable(),
external_llamacpp_addr: z.string(),
is_authorized: z.boolean(),
is_authorized: z.boolean().nullable(),
is_slots_endpoint_enabled: z.boolean().nullable(),
last_update: z.object({
nanos_since_epoch: z.number(),
secs_since_epoch: z.number(),
Expand All @@ -32,7 +33,7 @@ const agentsResponseSchema = z.object({
type Agent = z.infer<typeof agentSchema>;
type AgentsResponse = z.infer<typeof agentsResponseSchema>;

const TICK_MS = 500;
const TICK_MS = 1000;

function formatTimestamp(timestamp: number): string {
return new Date(timestamp * 1000).toLocaleString();
Expand Down Expand Up @@ -146,7 +147,10 @@ export function Dashboard() {
<tbody>
{agents.map(function (agent: Agent) {
const hasIssues =
agent.error || !agent.is_authorized || agent.quarantined_until;
agent.error ||
true !== agent.is_authorized ||
true !== agent.is_slots_endpoint_enabled ||
agent.quarantined_until;

return (
<tr
Expand All @@ -159,11 +163,11 @@ export function Dashboard() {
<td>
{agent.error && (
<>
<p>Error</p>
<p>Agent reported an Error</p>
<p>{agent.error}</p>
</>
)}
{!agent.is_authorized && (
{false === agent.is_authorized && (
<>
<p>Unauthorized</p>
<p>
Expand All @@ -173,6 +177,15 @@ export function Dashboard() {
</p>
</>
)}
{false === agent.is_slots_endpoint_enabled && (
<>
<p>Slots endpoint is not enabled</p>
<p>
Probably llama.cpp server is running without the
`--slots` flag.
</p>
</>
)}
{agent.quarantined_until && (
<>
<p>
Expand Down
4 changes: 3 additions & 1 deletion src/agent/monitoring_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,15 @@ impl MonitoringService {
None,
self.external_llamacpp_addr.to_owned(),
slots_response.is_authorized,
slots_response.is_slot_endpoint_enabled,
slots_response.slots,
)),
Err(err) => Ok(StatusUpdate::new(
self.name.to_owned(),
Some(err.to_string()),
self.external_llamacpp_addr.to_owned(),
true,
None,
None,
vec![],
)),
}
Expand Down
7 changes: 5 additions & 2 deletions src/balancer/status_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ pub struct StatusUpdate {
pub error: Option<String>,
pub external_llamacpp_addr: SocketAddr,
pub idle_slots_count: usize,
pub is_authorized: bool,
pub is_authorized: Option<bool>,
pub is_slots_endpoint_enabled: Option<bool>,
pub processing_slots_count: usize,
slots: Vec<Slot>,
}
Expand All @@ -19,7 +20,8 @@ impl StatusUpdate {
agent_name: Option<String>,
error: Option<String>,
external_llamacpp_addr: SocketAddr,
is_authorized: bool,
is_authorized: Option<bool>,
is_slots_endpoint_enabled: Option<bool>,
slots: Vec<Slot>,
) -> Self {
let idle_slots_count = slots.iter().filter(|slot| !slot.is_processing).count();
Expand All @@ -30,6 +32,7 @@ impl StatusUpdate {
external_llamacpp_addr,
idle_slots_count,
is_authorized,
is_slots_endpoint_enabled,
processing_slots_count: slots.len() - idle_slots_count,
slots,
}
Expand Down
14 changes: 11 additions & 3 deletions src/balancer/upstream_peer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ pub struct UpstreamPeer {
pub agent_name: Option<String>,
pub error: Option<String>,
pub external_llamacpp_addr: SocketAddr,
pub is_authorized: bool,
/// None means undetermined, probably due to an error
pub is_authorized: Option<bool>,
/// None means undetermined, probably due to an error
pub is_slots_endpoint_enabled: Option<bool>,
pub last_update: SystemTime,
pub quarantined_until: Option<SystemTime>,
pub slots_idle: usize,
Expand All @@ -26,7 +29,8 @@ impl UpstreamPeer {
agent_name: Option<String>,
error: Option<String>,
external_llamacpp_addr: SocketAddr,
is_authorized: bool,
is_authorized: Option<bool>,
is_slots_endpoint_enabled: Option<bool>,
slots_idle: usize,
slots_processing: usize,
) -> Self {
Expand All @@ -36,6 +40,7 @@ impl UpstreamPeer {
error,
external_llamacpp_addr,
is_authorized,
is_slots_endpoint_enabled,
last_update: SystemTime::now(),
quarantined_until: None,
slots_idle,
Expand All @@ -50,6 +55,7 @@ impl UpstreamPeer {
status_update.error.to_owned(),
status_update.external_llamacpp_addr,
status_update.is_authorized,
status_update.is_slots_endpoint_enabled,
status_update.idle_slots_count,
status_update.processing_slots_count,
)
Expand All @@ -59,7 +65,7 @@ impl UpstreamPeer {
self.slots_idle > 0
&& self.quarantined_until.is_none()
&& self.error.is_none()
&& self.is_authorized
&& matches!(self.is_authorized, Some(true))
}

pub fn release_slot(&mut self) {
Expand All @@ -72,6 +78,8 @@ impl UpstreamPeer {
self.agent_name = status_update.agent_name.to_owned();
self.error = status_update.error.to_owned();
self.external_llamacpp_addr = status_update.external_llamacpp_addr;
self.is_authorized = status_update.is_authorized;
self.is_slots_endpoint_enabled = status_update.is_slots_endpoint_enabled;
self.last_update = SystemTime::now();
self.quarantined_until = None;
self.slots_idle = status_update.idle_slots_count;
Expand Down
18 changes: 15 additions & 3 deletions src/llamacpp/llamacpp_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,15 @@ pub struct LlamacppClient {
impl LlamacppClient {
pub fn new(addr: SocketAddr, api_key: Option<String>) -> Result<Self> {
let mut builder = reqwest::Client::builder().timeout(Duration::from_secs(3));
let mut headers = header::HeaderMap::new();

headers.insert(
header::ACCEPT,
header::HeaderValue::from_static("application/json"),
);

builder = match api_key {
Some(api_key) => {
let mut headers = header::HeaderMap::new();
let mut auth_value = header::HeaderValue::from_str(&format!("Bearer {}", api_key))?;

auth_value.set_sensitive(true);
Expand All @@ -45,11 +50,18 @@ impl LlamacppClient {

match response.status() {
reqwest::StatusCode::OK => Ok(SlotsResponse {
is_authorized: true,
is_authorized: Some(true),
is_slot_endpoint_enabled: Some(true),
slots: response.json::<Vec<Slot>>().await?,
}),
reqwest::StatusCode::UNAUTHORIZED => Ok(SlotsResponse {
is_authorized: false,
is_authorized: Some(false),
is_slot_endpoint_enabled: None,
slots: vec![],
}),
reqwest::StatusCode::NOT_IMPLEMENTED => Ok(SlotsResponse {
is_authorized: None,
is_slot_endpoint_enabled: Some(false),
slots: vec![],
}),
_ => Err("Unexpected response status".into()),
Expand Down
3 changes: 2 additions & 1 deletion src/llamacpp/slots_response.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::llamacpp::slot::Slot;

pub struct SlotsResponse {
pub is_authorized: bool,
pub is_authorized: Option<bool>,
pub is_slot_endpoint_enabled: Option<bool>,
pub slots: Vec<Slot>,
}

0 comments on commit aa8902f

Please sign in to comment.