Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(encoding): implement UTF-8 support in metric and label names #236

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions examples/axum-utf-8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
use axum::body::Body;
use axum::extract::State;
use axum::http::header::CONTENT_TYPE;
use axum::http::{HeaderMap, StatusCode};
use axum::response::{IntoResponse, Response};
use axum::routing::get;
use axum::Router;
use prometheus_client::encoding::negotiate_escaping_scheme;
use prometheus_client::encoding::text::encode;
use prometheus_client::encoding::EscapingScheme::UnderscoreEscaping;
use prometheus_client::encoding::ValidationScheme::UTF8Validation;
use prometheus_client::metrics::counter::Counter;
use prometheus_client::metrics::family::Family;
use prometheus_client::registry::{Registry, RegistryBuilder};
use std::sync::Arc;
use tokio::sync::Mutex;

#[derive(Debug)]
pub struct Metrics {
requests: Family<Vec<(String, String)>, Counter>,
}

impl Metrics {
pub fn inc_requests(&self, method: String) {
self.requests
.get_or_create(&vec![("method.label".to_owned(), method)])
.inc();
}
}

#[derive(Debug)]
pub struct AppState {
pub registry: Registry,
}

pub async fn metrics_handler(
State(state): State<Arc<Mutex<AppState>>>,
headers: HeaderMap,
) -> impl IntoResponse {
let mut state = state.lock().await;
let mut buffer = String::new();
if let Some(accept) = headers.get("Accept") {
let escaping_scheme =
negotiate_escaping_scheme(accept.to_str().unwrap(), state.registry.escaping_scheme());
state.registry.set_escaping_scheme(escaping_scheme);
}
encode(&mut buffer, &state.registry).unwrap();

Response::builder()
.status(StatusCode::OK)
.header(
CONTENT_TYPE,
"application/openmetrics-text; version=1.0.0; charset=utf-8; escaping=".to_owned()
+ state.registry.escaping_scheme().as_str(),
)
.body(Body::from(buffer))
.unwrap()
}

pub async fn some_handler(State(metrics): State<Arc<Mutex<Metrics>>>) -> impl IntoResponse {
metrics.lock().await.inc_requests("Get".to_owned());
"okay".to_string()
}

#[tokio::main]
async fn main() {
let metrics = Metrics {
requests: Family::default(),
};
let mut state = AppState {
registry: RegistryBuilder::new()
.with_name_validation_scheme(UTF8Validation)
.with_escaping_scheme(UnderscoreEscaping)
.build(),
};
state.registry.register(
"requests.count",
"Count of requests",
metrics.requests.clone(),
);
let metrics = Arc::new(Mutex::new(metrics));
let state = Arc::new(Mutex::new(state));

let router = Router::new()
.route("/metrics", get(metrics_handler))
.with_state(state)
.route("/handler", get(some_handler))
.with_state(metrics);
let port = 8080;
let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{}", port))
.await
.unwrap();

axum::serve(listener, router).await.unwrap();
}
168 changes: 168 additions & 0 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -767,3 +767,171 @@ impl ExemplarValueEncoder<'_> {
for_both_mut!(self, ExemplarValueEncoderInner, e, e.encode(v))
}
}

/// Enum for determining how metric and label names will
/// be validated.
#[derive(Debug, PartialEq, Default, Clone)]
pub enum ValidationScheme {
/// Setting that requires that metric and label names
/// conform to the original OpenMetrics character requirements.
#[default]
LegacyValidation,
/// Only requires that metric and label names be valid UTF-8
/// strings.
UTF8Validation,
}

fn is_valid_legacy_char(c: char, i: usize) -> bool {
c.is_ascii_alphabetic() || c == '_' || c == ':' || (c.is_ascii_digit() && i > 0)
}

fn is_valid_legacy_metric_name(name: &str) -> bool {
if name.is_empty() {
return false;
}
for (i, c) in name.chars().enumerate() {
if !is_valid_legacy_char(c, i) {
return false;
}
}
true
}

fn is_valid_legacy_prefix(prefix: Option<&Prefix>) -> bool {
match prefix {
Some(prefix) => is_valid_legacy_metric_name(prefix.as_str()),
None => true,
}
}

fn is_quoted_metric_name(
name: &str,
prefix: Option<&Prefix>,
validation_scheme: &ValidationScheme,
) -> bool {
*validation_scheme == ValidationScheme::UTF8Validation
&& (!is_valid_legacy_metric_name(name) || !is_valid_legacy_prefix(prefix))
}

fn is_valid_legacy_label_name(label_name: &str) -> bool {
if label_name.is_empty() {
return false;
}
for (i, b) in label_name.chars().enumerate() {
if !((b >= 'a' && b <= 'z')
|| (b >= 'A' && b <= 'Z')
|| b == '_'
|| (b >= '0' && b <= '9' && i > 0))
{
return false;
}
}
true
}

fn is_quoted_label_name(name: &str, validation_scheme: &ValidationScheme) -> bool {
*validation_scheme == ValidationScheme::UTF8Validation && !is_valid_legacy_label_name(name)
}

/// Enum for determining how metric and label names will
/// be escaped.
#[derive(Debug, Default, Clone)]
pub enum EscapingScheme {
/// Replaces all legacy-invalid characters with underscores.
#[default]
UnderscoreEscaping,
/// Similar to UnderscoreEscaping, except that dots are
/// converted to `_dot_` and pre-existing underscores are converted to `__`.
DotsEscaping,
/// Prepends the name with `U__` and replaces all invalid
/// characters with the Unicode value, surrounded by underscores. Single
/// underscores are replaced with double underscores.
ValueEncodingEscaping,
/// Indicates that a name will not be escaped.
NoEscaping,
}

impl EscapingScheme {
/// Returns a string representation of a `EscapingScheme`.
pub fn as_str(&self) -> &str {
match self {
EscapingScheme::UnderscoreEscaping => "underscores",
EscapingScheme::DotsEscaping => "dots",
EscapingScheme::ValueEncodingEscaping => "values",
EscapingScheme::NoEscaping => "allow-utf-8",
}
}
}

fn escape_name(name: &str, scheme: &EscapingScheme) -> String {
if name.is_empty() {
return name.to_string();
}
let mut escaped = String::new();
match scheme {
EscapingScheme::NoEscaping => return name.to_string(),
EscapingScheme::UnderscoreEscaping => {
if is_valid_legacy_metric_name(name) {
return name.to_string();
}
for (i, b) in name.chars().enumerate() {
if is_valid_legacy_char(b, i) {
escaped.push(b);
} else {
escaped.push('_');
}
}
}
EscapingScheme::DotsEscaping => {
for (i, b) in name.chars().enumerate() {
if b == '_' {
escaped.push_str("__");
} else if b == '.' {
escaped.push_str("_dot_");
} else if is_valid_legacy_char(b, i) {
escaped.push(b);
} else {
escaped.push('_');
}
}
}
EscapingScheme::ValueEncodingEscaping => {
if is_valid_legacy_metric_name(name) {
return name.to_string();
}
escaped.push_str("U__");
for (i, b) in name.chars().enumerate() {
if is_valid_legacy_char(b, i) {
escaped.push(b);
} else if !b.is_ascii() {
escaped.push_str("_FFFD_");
} else if b as u32 <= 0xFF {
write!(escaped, "_{:02X}_", b as u32).unwrap();
} else if b as u32 <= 0xFFFF {
write!(escaped, "_{:04X}_", b as u32).unwrap();
}
}
}
}
escaped
}

/// Returns the escaping scheme to use based on the given header.
pub fn negotiate_escaping_scheme(
header: &str,
default_escaping_scheme: EscapingScheme,
) -> EscapingScheme {
if header.contains("underscores") {
return EscapingScheme::UnderscoreEscaping;
}
if header.contains("dots") {
return EscapingScheme::DotsEscaping;
}
if header.contains("values") {
return EscapingScheme::ValueEncodingEscaping;
}
if header.contains("allow-utf-8") {
return EscapingScheme::NoEscaping;
}
default_escaping_scheme
}
Loading