Skip to content

Commit

Permalink
Break apart metrics into their own module (#335)
Browse files Browse the repository at this point in the history
Signed-off-by: José Ulises Niño Rivera <[email protected]>
  • Loading branch information
junr03 authored Dec 9, 2024
1 parent d002b20 commit cd1b561
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 61 deletions.
5 changes: 0 additions & 5 deletions crates/common/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use log::error;
use proxy_wasm::hostcalls;
use proxy_wasm::types::*;

#[allow(unused)]
pub trait Metric {
fn id(&self) -> u32;
fn value(&self) -> Result<u64, String> {
Expand All @@ -14,7 +13,6 @@ pub trait Metric {
}
}

#[allow(unused)]
pub trait IncrementingMetric: Metric {
fn increment(&self, offset: i64) {
match hostcalls::increment_metric(self.id(), offset) {
Expand All @@ -24,7 +22,6 @@ pub trait IncrementingMetric: Metric {
}
}

#[allow(unused)]
pub trait RecordingMetric: Metric {
fn record(&self, value: u64) {
match hostcalls::record_metric(self.id(), value) {
Expand All @@ -39,7 +36,6 @@ pub struct Counter {
id: u32,
}

#[allow(unused)]
impl Counter {
pub fn new(name: String) -> Counter {
let returned_id = hostcalls::define_metric(MetricType::Counter, &name)
Expand Down Expand Up @@ -85,7 +81,6 @@ pub struct Histogram {
id: u32,
}

#[allow(unused)]
impl Histogram {
pub fn new(name: String) -> Histogram {
let returned_id = hostcalls::define_metric(MetricType::Histogram, &name)
Expand Down
34 changes: 3 additions & 31 deletions crates/llm_gateway/src/filter_context.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::metrics::Metrics;
use crate::stream_context::StreamContext;
use common::configuration::Configuration;
use common::consts::OTEL_COLLECTOR_HTTP;
Expand All @@ -6,9 +7,7 @@ use common::http::CallArgs;
use common::http::Client;
use common::llm_providers::LlmProviders;
use common::ratelimit;
use common::stats::Counter;
use common::stats::Gauge;
use common::stats::Histogram;
use common::tracing::TraceData;
use log::debug;
use log::warn;
Expand All @@ -22,39 +21,12 @@ use std::time::Duration;

use std::sync::{Arc, Mutex};

#[derive(Copy, Clone, Debug)]
pub struct WasmMetrics {
pub active_http_calls: Gauge,
pub ratelimited_rq: Counter,
pub time_to_first_token: Histogram,
pub time_per_output_token: Histogram,
pub tokens_per_second: Histogram,
pub request_latency: Histogram,
pub output_sequence_length: Histogram,
pub input_sequence_length: Histogram,
}

impl WasmMetrics {
fn new() -> WasmMetrics {
WasmMetrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
time_to_first_token: Histogram::new(String::from("time_to_first_token")),
time_per_output_token: Histogram::new(String::from("time_per_output_token")),
tokens_per_second: Histogram::new(String::from("tokens_per_second")),
request_latency: Histogram::new(String::from("request_latency")),
output_sequence_length: Histogram::new(String::from("output_sequence_length")),
input_sequence_length: Histogram::new(String::from("input_sequence_length")),
}
}
}

#[derive(Debug)]
pub struct CallContext {}

#[derive(Debug)]
pub struct FilterContext {
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
callouts: RefCell<HashMap<u32, CallContext>>,
llm_providers: Option<Rc<LlmProviders>>,
Expand All @@ -65,7 +37,7 @@ impl FilterContext {
pub fn new() -> FilterContext {
FilterContext {
callouts: RefCell::new(HashMap::new()),
metrics: Rc::new(WasmMetrics::new()),
metrics: Rc::new(Metrics::new()),
llm_providers: None,
traces_queue: Arc::new(Mutex::new(VecDeque::new())),
}
Expand Down
1 change: 1 addition & 0 deletions crates/llm_gateway/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use proxy_wasm::traits::*;
use proxy_wasm::types::*;

mod filter_context;
mod metrics;
mod stream_context;

proxy_wasm::main! {{
Expand Down
28 changes: 28 additions & 0 deletions crates/llm_gateway/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use common::stats::{Counter, Gauge, Histogram};

#[derive(Copy, Clone, Debug)]
pub struct Metrics {
pub active_http_calls: Gauge,
pub ratelimited_rq: Counter,
pub time_to_first_token: Histogram,
pub time_per_output_token: Histogram,
pub tokens_per_second: Histogram,
pub request_latency: Histogram,
pub output_sequence_length: Histogram,
pub input_sequence_length: Histogram,
}

impl Metrics {
pub fn new() -> Metrics {
Metrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
ratelimited_rq: Counter::new(String::from("ratelimited_rq")),
time_to_first_token: Histogram::new(String::from("time_to_first_token")),
time_per_output_token: Histogram::new(String::from("time_per_output_token")),
tokens_per_second: Histogram::new(String::from("tokens_per_second")),
request_latency: Histogram::new(String::from("request_latency")),
output_sequence_length: Histogram::new(String::from("output_sequence_length")),
input_sequence_length: Histogram::new(String::from("input_sequence_length")),
}
}
}
12 changes: 5 additions & 7 deletions crates/llm_gateway/src/stream_context.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::filter_context::WasmMetrics;
use crate::metrics::Metrics;
use common::api::open_ai::{
ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
Message, StreamOptions,
Expand All @@ -12,25 +12,23 @@ use common::errors::ServerError;
use common::llm_providers::LlmProviders;
use common::pii::obfuscate_auth_header;
use common::ratelimit::Header;
use common::stats::{IncrementingMetric, RecordingMetric};
use common::tracing::{Event, Span, TraceData, Traceparent};
use common::{ratelimit, routing, tokenizer};
use http::StatusCode;
use log::{debug, trace, warn};
use proxy_wasm::hostcalls::get_current_time;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::collections::VecDeque;
use std::num::NonZero;
use std::rc::Rc;
use std::sync::{Arc, Mutex};

use common::stats::{IncrementingMetric, RecordingMetric};

use proxy_wasm::hostcalls::get_current_time;
use std::time::{Duration, SystemTime, UNIX_EPOCH};

pub struct StreamContext {
context_id: u32,
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
ratelimit_selector: Option<Header>,
streaming_response: bool,
response_tokens: usize,
Expand All @@ -50,7 +48,7 @@ pub struct StreamContext {
impl StreamContext {
pub fn new(
context_id: u32,
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
llm_providers: Rc<LlmProviders>,
traces_queue: Arc<Mutex<VecDeque<TraceData>>>,
) -> Self {
Expand Down
18 changes: 3 additions & 15 deletions crates/prompt_gateway/src/filter_context.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::embeddings::EmbeddingType;
use crate::metrics::Metrics;
use crate::stream_context::StreamContext;
use common::configuration::{Configuration, Overrides, PromptGuards, PromptTarget, Tracing};
use common::consts::ARCH_UPSTREAM_HOST_HEADER;
Expand All @@ -21,19 +22,6 @@ use std::collections::HashMap;
use std::rc::Rc;
use std::time::Duration;

#[derive(Copy, Clone, Debug)]
pub struct WasmMetrics {
pub active_http_calls: Gauge,
}

impl WasmMetrics {
fn new() -> WasmMetrics {
WasmMetrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
}
}
}

pub type EmbeddingTypeMap = HashMap<EmbeddingType, Vec<f64>>;
pub type EmbeddingsStore = HashMap<String, EmbeddingTypeMap>;

Expand All @@ -45,7 +33,7 @@ pub struct FilterCallContext {

#[derive(Debug)]
pub struct FilterContext {
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
// callouts stores token_id to request mapping that we use during #on_http_call_response to match the response to the request.
callouts: RefCell<HashMap<u32, FilterCallContext>>,
overrides: Rc<Option<Overrides>>,
Expand All @@ -62,7 +50,7 @@ impl FilterContext {
pub fn new() -> FilterContext {
FilterContext {
callouts: RefCell::new(HashMap::new()),
metrics: Rc::new(WasmMetrics::new()),
metrics: Rc::new(Metrics::new()),
system_prompt: Rc::new(None),
prompt_targets: Rc::new(HashMap::new()),
overrides: Rc::new(None),
Expand Down
1 change: 1 addition & 0 deletions crates/prompt_gateway/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod context;
mod embeddings;
mod filter_context;
mod http_context;
mod metrics;
mod stream_context;

proxy_wasm::main! {{
Expand Down
14 changes: 14 additions & 0 deletions crates/prompt_gateway/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use common::stats::Gauge;

#[derive(Copy, Clone, Debug)]
pub struct Metrics {
pub active_http_calls: Gauge,
}

impl Metrics {
pub fn new() -> Metrics {
Metrics {
active_http_calls: Gauge::new(String::from("active_http_calls")),
}
}
}
7 changes: 4 additions & 3 deletions crates/prompt_gateway/src/stream_context.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::embeddings::EmbeddingType;
use crate::filter_context::{EmbeddingsStore, WasmMetrics};
use crate::filter_context::EmbeddingsStore;
use crate::metrics::Metrics;
use acap::cos;
use common::api::hallucination::{
extract_messages_for_hallucination, HallucinationClassificationRequest,
Expand Down Expand Up @@ -67,7 +68,7 @@ pub struct StreamContext {
pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
pub embeddings_store: Option<Rc<EmbeddingsStore>>,
overrides: Rc<Option<Overrides>>,
pub metrics: Rc<WasmMetrics>,
pub metrics: Rc<Metrics>,
pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
pub context_id: u32,
pub tool_calls: Option<Vec<ToolCall>>,
Expand All @@ -90,7 +91,7 @@ impl StreamContext {
#[allow(clippy::too_many_arguments)]
pub fn new(
context_id: u32,
metrics: Rc<WasmMetrics>,
metrics: Rc<Metrics>,
system_prompt: Rc<Option<String>>,
prompt_targets: Rc<HashMap<String, PromptTarget>>,
prompt_guards: Rc<PromptGuards>,
Expand Down

0 comments on commit cd1b561

Please sign in to comment.