Skip to content

Commit

Permalink
initial logical size calculation: add a bunch of metrics (#5995)
Browse files Browse the repository at this point in the history
These will help us answer questions such as:
- when & at what do calculations get started after PS restart?
- how often is the api to get current incrementally-computed logical
  size called, and does it return Exact vs Approximate?

I'd also be interested in a histogram of how much wall clock
time size calculations take, but, I don't know good bucket sizes,
and, logging it would introduce yet another per-timeline log
message during startup; don't think that's worth it just yet.

Context

- https://neondb.slack.com/archives/C033RQ5SPDH/p1701197668789769
- #5962
- #5963
- #5955
- neondatabase/cloud#7408
  • Loading branch information
problame authored Dec 1, 2023
1 parent 1ce1c82 commit 1c88824
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 7 deletions.
128 changes: 128 additions & 0 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,134 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
.expect("failed to define current logical size metric")
});

pub(crate) mod initial_logical_size {
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
use once_cell::sync::Lazy;

use crate::task_mgr::TaskKind;

pub(crate) struct StartCalculation(IntCounterVec);
pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
StartCalculation(
register_int_counter_vec!(
"pageserver_initial_logical_size_start_calculation",
"Incremented each time we start an initial logical size calculation attempt. \
The `task_kind` label is for the task kind that caused this attempt.",
&["attempt", "task_kind"]
)
.unwrap(),
)
});

struct DropCalculation {
first: IntCounter,
retry: IntCounter,
}

static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
let vec = register_int_counter_vec!(
"pageserver_initial_logical_size_drop_calculation",
"Incremented each time we abort a started size calculation attmpt.",
&["attempt"]
)
.unwrap();
DropCalculation {
first: vec.with_label_values(&["first"]),
retry: vec.with_label_values(&["retry"]),
}
});

pub(crate) struct Calculated {
pub(crate) births: IntCounter,
pub(crate) deaths: IntCounter,
}

pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
births: register_int_counter!(
"pageserver_initial_logical_size_finish_calculation",
"Incremented every time we finish calculation of initial logical size.\
If everything is working well, this should happen at most once per Timeline object."
)
.unwrap(),
deaths: register_int_counter!(
"pageserver_initial_logical_size_drop_finished_calculation",
"Incremented when we drop a finished initial logical size calculation result.\
Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
)
.unwrap(),
});

pub(crate) struct OngoingCalculationGuard {
inc_drop_calculation: Option<IntCounter>,
}

impl StartCalculation {
pub(crate) fn first(&self, causing_task_kind: Option<TaskKind>) -> OngoingCalculationGuard {
let task_kind_label: &'static str =
causing_task_kind.map(|k| k.into()).unwrap_or_default();
self.0.with_label_values(&["first", task_kind_label]);
OngoingCalculationGuard {
inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
}
}
pub(crate) fn retry(&self, causing_task_kind: Option<TaskKind>) -> OngoingCalculationGuard {
let task_kind_label: &'static str =
causing_task_kind.map(|k| k.into()).unwrap_or_default();
self.0.with_label_values(&["retry", task_kind_label]);
OngoingCalculationGuard {
inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
}
}
}

impl Drop for OngoingCalculationGuard {
fn drop(&mut self) {
if let Some(counter) = self.inc_drop_calculation.take() {
counter.inc();
}
}
}

impl OngoingCalculationGuard {
pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
drop(self.inc_drop_calculation.take());
CALCULATED.births.inc();
FinishedCalculationGuard {
inc_on_drop: CALCULATED.deaths.clone(),
}
}
}

pub(crate) struct FinishedCalculationGuard {
inc_on_drop: IntCounter,
}

impl Drop for FinishedCalculationGuard {
fn drop(&mut self) {
self.inc_on_drop.inc();
}
}

pub(crate) struct Calls {
pub(crate) approximate: IntCounter,
pub(crate) exact: IntCounter,
}

pub(crate) static CALLS: Lazy<Calls> = Lazy::new(|| {
let vec = register_int_counter_vec!(
"pageserver_initial_logical_size_calls",
"Incremented each time some code asks for incremental logical size.\
The label records the accuracy of the result.",
&["accuracy"]
)
.unwrap();
Calls {
approximate: vec.with_label_values(&["approximate"]),
exact: vec.with_label_values(&["exact"]),
}
});
}

pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"pageserver_tenant_states_count",
Expand Down
14 changes: 11 additions & 3 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1812,6 +1812,7 @@ impl Timeline {
"spawning logical size computation from context of task kind {:?}",
ctx.task_kind()
);
let causing_task_kind = ctx.task_kind();
// We need to start the computation task.
// It gets a separate context since it will outlive the request that called this function.
let self_clone = Arc::clone(self);
Expand Down Expand Up @@ -1839,14 +1840,21 @@ impl Timeline {
_ = completion::Barrier::maybe_wait(self_clone.initial_logical_size_can_start.clone()) => {}
};



// hold off background tasks from starting until all timelines get to try at least
// once initial logical size calculation; though retry will rarely be useful.
// holding off is done because heavier tasks execute blockingly on the same
// runtime.
//
// dropping this at every outcome is probably better than trying to cling on to it,
// delay will be terminated by a timeout regardless.
let _completion = { self_clone.initial_logical_size_attempt.lock().expect("unexpected initial_logical_size_attempt poisoned").take() };
let completion = { self_clone.initial_logical_size_attempt.lock().expect("unexpected initial_logical_size_attempt poisoned").take() };

let metrics_guard = match &completion {
Some(_) => crate::metrics::initial_logical_size::START_CALCULATION.first(Some(causing_task_kind)),
None => crate::metrics::initial_logical_size::START_CALCULATION.retry(Some(causing_task_kind)),
};

let calculated_size = match self_clone
.logical_size_calculation_task(lsn, LogicalSizeCalculationCause::Initial, &background_ctx)
Expand Down Expand Up @@ -1891,11 +1899,11 @@ impl Timeline {
match self_clone
.current_logical_size
.initial_logical_size
.set(calculated_size)
.set((calculated_size, metrics_guard.calculation_result_saved()))
{
Ok(()) => (),
Err(_what_we_just_attempted_to_set) => {
let existing_size = self_clone
let (existing_size, _) = self_clone
.current_logical_size
.initial_logical_size
.get()
Expand Down
19 changes: 15 additions & 4 deletions pageserver/src/tenant/timeline/logical_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ pub(super) struct LogicalSize {
///
/// NOTE: size at a given LSN is constant, but after a restart we will calculate
/// the initial size at a different LSN.
pub initial_logical_size: OnceCell<u64>,
pub initial_logical_size: OnceCell<(
u64,
crate::metrics::initial_logical_size::FinishedCalculationGuard,
)>,

/// Semaphore to track ongoing calculation of `initial_logical_size`.
pub initial_size_computation: Arc<tokio::sync::Semaphore>,
Expand Down Expand Up @@ -78,7 +81,11 @@ impl CurrentLogicalSize {
impl LogicalSize {
pub(super) fn empty_initial() -> Self {
Self {
initial_logical_size: OnceCell::with_value(0),
initial_logical_size: OnceCell::with_value((0, {
crate::metrics::initial_logical_size::START_CALCULATION
.first(None)
.calculation_result_saved()
})),
// initial_logical_size already computed, so, don't admit any calculations
initial_size_computation: Arc::new(Semaphore::new(0)),
initial_part_end: None,
Expand All @@ -100,12 +107,16 @@ impl LogicalSize {
// ^^^ keep this type explicit so that the casts in this function break if
// we change the type.
match self.initial_logical_size.get() {
Some(initial_size) => {
Some((initial_size, _)) => {
crate::metrics::initial_logical_size::CALLS.exact.inc();
initial_size.checked_add_signed(size_increment)
.with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
.map(CurrentLogicalSize::Exact)
}
None => {
crate::metrics::initial_logical_size::CALLS
.approximate
.inc();
let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
Ok(CurrentLogicalSize::Approximate(non_negative_size_increment))
}
Expand All @@ -121,7 +132,7 @@ impl LogicalSize {
/// available for re-use. This doesn't contain the incremental part.
pub(super) fn initialized_size(&self, lsn: Lsn) -> Option<u64> {
match self.initial_part_end {
Some(v) if v == lsn => self.initial_logical_size.get().copied(),
Some(v) if v == lsn => self.initial_logical_size.get().map(|(s, _)| *s),
_ => None,
}
}
Expand Down

1 comment on commit 1c88824

@github-actions
Copy link

@github-actions github-actions bot commented on 1c88824 Dec 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2498 tests run: 2395 passed, 1 failed, 102 skipped (full report)


Failures on Postgres 16

  • test_branching_with_pgbench[flat-1-10]: debug
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_branching_with_pgbench[debug-pg16-flat-1-10]"
Flaky tests (6)

Postgres 16

  • test_branching_with_pgbench[cascade-1-10]: debug
  • test_branching_with_pgbench[flat-1-10]: debug
  • test_ondemand_download_timetravel[real_s3]: debug
  • test_deletion_queue_recovery[no-validate-keep]: debug

Postgres 15

  • test_branching_with_pgbench[cascade-1-10]: debug
  • test_empty_tenant_size: debug

Test coverage report is not available

The comment gets automatically updated with the latest test results
1c88824 at 2023-12-01T13:03:47.495Z :recycle:

Please sign in to comment.