Skip to content

Commit

Permalink
safekeeper,pageserver: add heap profiling (#9778)
Browse files Browse the repository at this point in the history
## Problem

We don't have good observability for memory usage. This would be useful
e.g. to debug OOM incidents or optimize performance or resource usage.

We would also like to use continuous profiling with e.g. [Grafana Cloud
Profiles](https://grafana.com/products/cloud/profiles-for-continuous-profiling/)
(see neondatabase/cloud#14888).

This PR is intended as a proof of concept, to try it out in staging and
drive further discussions about profiling more broadly.

Touches #9534.
Touches neondatabase/cloud#14888.
Depends on #9779.
Depends on #9780.

## Summary of changes

Adds a HTTP route `/profile/heap` that takes a heap profile and returns
it. Query parameters:

* `format`: output format (`jemalloc` or `pprof`; default `pprof`).

Unlike CPU profiles (see #9764), heap profiles are not symbolized and
require the original binary to translate addresses to function names. To
make this work with Grafana, we'll probably have to symbolize the
process server-side -- this is left as future work, as is other output
formats like SVG.

Heap profiles don't work on macOS due to limitations in jemalloc.
  • Loading branch information
erikgrinaker authored Dec 3, 2024
1 parent a2a942f commit dcb24ce
Show file tree
Hide file tree
Showing 10 changed files with 175 additions and 28 deletions.
89 changes: 70 additions & 19 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ indoc = "2"
ipnet = "2.10.0"
itertools = "0.10"
itoa = "1.0.11"
jemalloc_pprof = "0.6"
jsonwebtoken = "9"
lasso = "0.7"
libc = "0.2"
Expand Down Expand Up @@ -175,7 +176,7 @@ sync_wrapper = "0.1.2"
tar = "0.4"
test-context = "0.3"
thiserror = "1.0"
tikv-jemallocator = { version = "0.6", features = ["stats"] }
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
tokio = { version = "1.17", features = ["macros"] }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
Expand Down
1 change: 1 addition & 0 deletions libs/utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ humantime.workspace = true
hyper0 = { workspace = true, features = ["full"] }
fail.workspace = true
futures = { workspace = true}
jemalloc_pprof.workspace = true
jsonwebtoken.workspace = true
nix.workspace = true
once_cell.workspace = true
Expand Down
64 changes: 64 additions & 0 deletions libs/utils/src/http/endpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
use once_cell::sync::Lazy;
use routerify::ext::RequestExt;
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
use tokio_util::io::ReaderStream;
use tracing::{debug, info, info_span, warn, Instrument};

use std::future::Future;
Expand Down Expand Up @@ -407,6 +408,69 @@ pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, A
}
}

/// Generates heap profiles.
///
/// This only works with jemalloc on Linux.
pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
enum Format {
Jemalloc,
Pprof,
}

// Parameters.
let format = match get_query_param(&req, "format")?.as_deref() {
None => Format::Pprof,
Some("jemalloc") => Format::Jemalloc,
Some("pprof") => Format::Pprof,
Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
};

// Obtain profiler handle.
let mut prof_ctl = jemalloc_pprof::PROF_CTL
.as_ref()
.ok_or(ApiError::InternalServerError(anyhow!(
"heap profiling not enabled"
)))?
.lock()
.await;
if !prof_ctl.activated() {
return Err(ApiError::InternalServerError(anyhow!(
"heap profiling not enabled"
)));
}

// Take and return the profile.
match format {
Format::Jemalloc => {
// NB: file is an open handle to a tempfile that's already deleted.
let file = tokio::task::spawn_blocking(move || prof_ctl.dump())
.await
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
.map_err(ApiError::InternalServerError)?;
let stream = ReaderStream::new(tokio::fs::File::from_std(file));
Response::builder()
.status(200)
.header(CONTENT_TYPE, "application/octet-stream")
.header(CONTENT_DISPOSITION, "attachment; filename=\"heap.dump\"")
.body(Body::wrap_stream(stream))
.map_err(|err| ApiError::InternalServerError(err.into()))
}

Format::Pprof => {
let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof())
.await
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
.map_err(ApiError::InternalServerError)?;
Response::builder()
.status(200)
.header(CONTENT_TYPE, "application/octet-stream")
.header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb\"")
.body(Body::from(data))
.map_err(|err| ApiError::InternalServerError(err.into()))
}
}
}

pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
) -> Middleware<B, ApiError> {
Middleware::pre(move |req| async move {
Expand Down
5 changes: 5 additions & 0 deletions pageserver/src/bin/pageserver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ project_build_tag!(BUILD_TAG);
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
#[allow(non_upper_case_globals)]
#[export_name = "malloc_conf"]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";

const PID_FILE_NAME: &str = "pageserver.pid";

const FEATURES: &[&str] = &[
Expand Down
8 changes: 5 additions & 3 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::auth::JwtAuth;
use utils::failpoint_support::failpoints_handler;
use utils::http::endpoint::profile_cpu_handler;
use utils::http::endpoint::prometheus_metrics_handler;
use utils::http::endpoint::request_span;
use utils::http::endpoint::{
profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span,
};
use utils::http::request::must_parse_query_param;
use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};

Expand Down Expand Up @@ -155,6 +155,7 @@ impl State {
"/swagger.yml",
"/metrics",
"/profile/cpu",
"/profile/heap",
];
Ok(Self {
conf,
Expand Down Expand Up @@ -3203,6 +3204,7 @@ pub fn make_router(
.data(state)
.get("/metrics", |r| request_span(r, prometheus_metrics_handler))
.get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
.get("/profile/heap", |r| request_span(r, profile_heap_handler))
.get("/v1/status", |r| api_handler(r, status_handler))
.put("/v1/failpoints", |r| {
testing_api_handler("manage failpoints", r, failpoints_handler)
Expand Down
6 changes: 6 additions & 0 deletions safekeeper/benches/receive_wal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,15 @@ const KB: usize = 1024;
const MB: usize = 1024 * KB;
const GB: usize = 1024 * MB;

/// Use jemalloc, and configure it to sample allocations for profiles every 1 MB.
/// This mirrors the configuration in bin/safekeeper.rs.
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

#[allow(non_upper_case_globals)]
#[export_name = "malloc_conf"]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";

// Register benchmarks with Criterion.
criterion_group!(
name = benches;
Expand Down
Loading

1 comment on commit dcb24ce

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3828 tests run: 3655 passed, 0 failed, 173 skipped (full report)


Test coverage report is not available

The comment gets automatically updated with the latest test results
dcb24ce at 2024-12-03T12:36:27.885Z :recycle:

Please sign in to comment.