Skip to content

Commit

Permalink
Don't fail requests if we cannot track their latencies
Browse files Browse the repository at this point in the history
- Fixes #5998
- Add a helper trait to extract the status code from a generic kind of
  response, so that we can use the latency tracker for methods that
  return a `http::Response` directly. Implement this for the extant
  response types we need to instrument.
- Add latency tracking to the device OAuth endpoints that were
  previously untracked.
  • Loading branch information
bnaecker committed Jul 5, 2024
1 parent 30b6713 commit 811d857
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 16 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 10 additions & 6 deletions nexus/src/external_api/device_auth.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,11 @@ pub(crate) async fn device_auth_request(
&model.into_response(rqctx.server.using_tls(), host),
)
};
// TODO: instrumentation doesn't work because we use `Response<Body>`
//apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
handler.await
apictx
.context
.external_latencies
.instrument_dropshot_handler(&rqctx, handler)
.await
}

#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
Expand Down Expand Up @@ -250,7 +252,9 @@ pub(crate) async fn device_access_token(
),
}
};
// TODO: instrumentation doesn't work because we use `Response<Body>`
//apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await
handler.await
apictx
.context
.external_latencies
.instrument_dropshot_handler(&rqctx, handler)
.await
}
6 changes: 6 additions & 0 deletions oximeter/instruments/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@ chrono = { workspace = true, optional = true }
dropshot = { workspace = true, optional = true }
futures = { workspace = true, optional = true }
http = { workspace = true, optional = true }
hyper = { workspace = true, optional = true }
kstat-rs = { workspace = true, optional = true }
libc = { workspace = true, optional = true }
oximeter = { workspace = true, optional = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, optional = true }
slog = { workspace = true, optional = true }
tokio = { workspace = true, optional = true }
thiserror = { workspace = true, optional = true }
Expand All @@ -29,7 +32,10 @@ http-instruments = [
"dep:dropshot",
"dep:futures",
"dep:http",
"dep:hyper",
"dep:oximeter",
"dep:schemars",
"dep:serde",
"dep:uuid"
]
kstat = [
Expand Down
78 changes: 68 additions & 10 deletions oximeter/instruments/src/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@

//! Instrumentation tools for HTTP services.

// Copyright 2021 Oxide Computer Company
// Copyright 2024 Oxide Computer Company

use dropshot::{
HttpError, HttpResponse, RequestContext, RequestInfo, ServerContext,
};
use futures::Future;
use http::Response;
use http::StatusCode;
use http::Uri;
use hyper::body::Body;
use oximeter::{
histogram::Histogram, histogram::Record, Metric, MetricsError, Producer,
Sample, Target,
Expand Down Expand Up @@ -93,6 +95,56 @@ impl RequestLatencyHistogram {
}
}

/// A helper trait to extract a status code from a Dropshot response.
pub trait TrackableResponse: HttpResponse {
fn status_code(&self) -> StatusCode;
}

macro_rules! impl_generic_response {
($dropshot_type:ty) => {
impl<T> TrackableResponse for $dropshot_type
where
T: serde::Serialize + schemars::JsonSchema + Send + Sync + 'static,
{
fn status_code(&self) -> StatusCode {
<Self as dropshot::HttpCodedResponse>::STATUS_CODE
}
}
};
}

macro_rules! impl_response {
($dropshot_type:ty) => {
impl TrackableResponse for $dropshot_type {
fn status_code(&self) -> StatusCode {
<Self as dropshot::HttpCodedResponse>::STATUS_CODE
}
}
};
}

impl_generic_response!(dropshot::HttpResponseOk<T>);
impl_generic_response!(dropshot::HttpResponseAccepted<T>);
impl_generic_response!(dropshot::HttpResponseCreated<T>);
impl_response!(dropshot::HttpResponseUpdatedNoContent);
impl_response!(dropshot::HttpResponseDeleted);

impl<T, H> TrackableResponse for dropshot::HttpResponseHeaders<T, H>
where
T: dropshot::HttpCodedResponse,
H: serde::Serialize + schemars::JsonSchema + Send + Sync + 'static,
{
fn status_code(&self) -> StatusCode {
<T as dropshot::HttpCodedResponse>::STATUS_CODE
}
}

impl TrackableResponse for Response<Body> {
fn status_code(&self) -> StatusCode {
self.status()
}
}

/// The `LatencyTracker` is an [`oximeter::Producer`] that tracks the latencies of requests for an
/// HTTP service, in seconds.
///
Expand Down Expand Up @@ -171,23 +223,29 @@ impl LatencyTracker {
handler: H,
) -> Result<R, HttpError>
where
R: HttpResponse,
R: TrackableResponse,
H: Future<Output = Result<R, HttpError>>,
T: ServerContext,
{
let start = Instant::now();
let result = handler.await;
let latency = start.elapsed();
let status_code = match result {
Ok(_) => R::response_metadata().success.unwrap(),
let status_code = match &result {
Ok(response) => response.status_code(),
Err(ref e) => e.status_code,
};
self.update(&context.request, status_code, latency).map_err(|e| {
HttpError::for_internal_error(format!(
"error instrumenting dropshot request handler: {}",
e
))
})?;
if let Err(e) = self.update(&context.request, status_code, latency) {
slog::error!(
&context.log,
"error instrumenting dropshot handler";
"error" => ?e,
"status_code" => status_code.as_u16(),
"method" => %context.request.method(),
"uri" => %context.request.uri(),
"remote_addr" => context.request.remote_addr(),
"latency" => ?latency,
);
}
result
}
}
Expand Down

0 comments on commit 811d857

Please sign in to comment.