Skip to content

Commit

Permalink
remove percentile counter caching
Browse files Browse the repository at this point in the history
A metrics reporting issue was observed where infrequent updates to percentile
timers resulted in inconsistent reporting of metrics values. Unusual peaks and
valleys were seen on the query side. Capturing measurement reporting from the
spectatord process revealed that the base timer values were being reported in
a consistent manner, but some of the percentile counters were missing.

The percentile timer meters kept a local cache of the percentile counters, with
shared pointer references to corresponding meters in the Registry. When the
Registry expires meters, it does so based upon last update time, rather than
the number of pointers still held. Thus, infrequently used percentile counters
were removed from the Registry, but remained within the local percentile timer
cache. The cached values would be updated, but this would not translate to the
Registry, and there was no opportunity to recreate them.

The fix is to remove the local percentile counter cache from the percentile
timer meter, and rely on the Registry to keep track of all counters. If one is
expired, it will then be recreated. This will add a little bit of overhead, but
it should be minimal.

The same applies to the percentile distribution summaries.
  • Loading branch information
copperlight committed Dec 2, 2024
1 parent b1fa055 commit 6461d38
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 92 deletions.
28 changes: 6 additions & 22 deletions spectator/detail/perc_policy.h
Original file line number Diff line number Diff line change
@@ -1,35 +1,19 @@
#pragma once

#include "../percentile_buckets.h"
#include "../registry.h"
#include <array>

namespace spectator::detail {

#include "../percentile_bucket_tags.inc"

using counters_t =
std::array<std::shared_ptr<Counter>, PercentileBucketsLength()>;

struct lazy_policy {
static void init(Registry* /* r */, const Id& /* id */,
counters_t* /* counters */,
const std::string* /* perc_tags */) {}
static auto get_counter(Registry* r, const Id& id,
counters_t* counters,
size_t index,
const std::string* perc_tags) -> std::shared_ptr<Counter> {
static auto get_counter(Registry* r, const Id& id, size_t index, const std::string* perc_tags)
-> std::shared_ptr<Counter> {
using spectator::refs;
auto& c = counters->at(index);
if (!c) {
auto counterId =
id.WithTags(refs().statistic(), refs().percentile(),
refs().percentile(), intern_str(perc_tags[index]));
counters->at(index) = r->GetCounter(std::move(counterId));
c = counters->at(index);
}
return c;
auto counterId = id.WithTags(refs().statistic(), refs().percentile(), refs().percentile(),
intern_str(perc_tags[index]));
return r->GetCounter(std::move(counterId));
}
};

} // namespace spectator::detail
} // namespace spectator::detail
14 changes: 0 additions & 14 deletions spectator/perc_dist_summary_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,6 @@ class PercentileDistributionSummaryTest : public ::testing::Test {

TYPED_TEST_SUITE(PercentileDistributionSummaryTest, Implementations, NameGenerator);

TYPED_TEST(PercentileDistributionSummaryTest, Percentile) {
auto& ds = this->ds;

for (auto i = 0; i < 100000; ++i) {
ds->Record(i);
}

for (auto i = 0; i <= 100; ++i) {
auto expected = 1e3 * i;
auto threshold = 0.15 * expected;
EXPECT_NEAR(expected, ds->Percentile(i), threshold);
}
}

TYPED_TEST(PercentileDistributionSummaryTest, Measure) {
this->ds->Record(42);

Expand Down
12 changes: 0 additions & 12 deletions spectator/perc_timer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,6 @@ class PercentileTimerTest : public ::testing::Test {

TYPED_TEST_SUITE(PercentileTimerTest, Implementations, NameGenerator);

TYPED_TEST(PercentileTimerTest, Percentile) {
for (auto i = 0; i < 100000; ++i) {
this->timer->Record(absl::Milliseconds(i));
}

for (auto i = 0; i <= 100; ++i) {
auto expected = static_cast<double>(i);
auto threshold = 0.15 * expected;
EXPECT_NEAR(expected, this->timer->Percentile(i), threshold);
}
}

TYPED_TEST(PercentileTimerTest, Measure) {
auto elapsed = absl::Milliseconds(42);
this->timer->Record(elapsed);
Expand Down
25 changes: 4 additions & 21 deletions spectator/percentile_distribution_summary.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@ namespace spectator {
template <typename policy>
class percentile_distribution_summary {
public:
percentile_distribution_summary(Registry* registry, Id id, int64_t min,
int64_t max) noexcept
percentile_distribution_summary(Registry* registry, Id id, int64_t min, int64_t max) noexcept
: registry_{registry},
id_{std::move(id)},
min_{min},
max_{max},
dist_summary_{registry->GetDistributionSummary(id_)} {
policy::init(registry_, id_, &counters_, detail::kDistTags.begin());
}

void Record(int64_t amount) noexcept {
Expand All @@ -29,37 +27,22 @@ class percentile_distribution_summary {
dist_summary_->Record(amount);
auto restricted = restrict(amount, min_, max_);
auto index = PercentileBucketIndexOf(restricted);
auto c = policy::get_counter(registry_, id_, &counters_, index,
detail::kDistTags.begin());
auto c = policy::get_counter(registry_, id_, index, detail::kDistTags.begin());
c->Increment();
}

auto MeterId() const noexcept -> const Id& { return id_; }
auto Count() const noexcept -> int64_t { return dist_summary_->Count(); }
auto TotalAmount() const noexcept -> double {
return dist_summary_->TotalAmount();
}
auto Percentile(double p) const noexcept -> double {
std::array<int64_t, PercentileBucketsLength()> counts{};
for (size_t i = 0; i < PercentileBucketsLength(); ++i) {
auto& c = counters_.at(i);
if (c) {
counts.at(i) = static_cast<int64_t>(c->Count());
}
}
return spectator::Percentile(counts, p);
}
auto TotalAmount() const noexcept -> double { return dist_summary_->TotalAmount(); }

private:
Registry* registry_;
Id id_;
int64_t min_;
int64_t max_;
std::shared_ptr<DistributionSummary> dist_summary_;
mutable detail::counters_t counters_{};
};

using PercentileDistributionSummary =
percentile_distribution_summary<detail::lazy_policy>;
using PercentileDistributionSummary = percentile_distribution_summary<detail::lazy_policy>;

} // namespace spectator
27 changes: 4 additions & 23 deletions spectator/percentile_timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,32 @@ namespace spectator {
template <typename policy>
class percentile_timer {
public:
percentile_timer(Registry* registry, Id id, absl::Duration min,
absl::Duration max) noexcept
percentile_timer(Registry* registry, Id id, absl::Duration min, absl::Duration max) noexcept
: registry_{registry},
id_{std::move(id)},
min_{min},
max_{max},
timer_{registry->GetTimer(id_)} {
policy::init(registry_, id_, &counters_, detail::kTimerTags.begin());
}
timer_{registry->GetTimer(id_)} {}

void Record(absl::Duration amount) noexcept {
timer_->Record(amount);
auto restricted = restrict(amount, min_, max_);
auto index = PercentileBucketIndexOf(absl::ToInt64Nanoseconds(restricted));
auto c = policy::get_counter(registry_, id_, &counters_, index,
detail::kTimerTags.begin());
auto c = policy::get_counter(registry_, id_, index, detail::kTimerTags.begin());
c->Increment();
}

void Record(std::chrono::nanoseconds amount) noexcept {
Record(absl::FromChrono(amount));
}

void Record(std::chrono::nanoseconds amount) noexcept { Record(absl::FromChrono(amount)); }
auto MeterId() const noexcept -> const Id& { return id_; }
auto Count() const noexcept -> int64_t { return timer_->Count(); }
auto TotalTime() const noexcept -> int64_t { return timer_->TotalTime(); }
auto Percentile(double p) const noexcept -> double {
std::array<int64_t, PercentileBucketsLength()> counts{};
for (size_t i = 0; i < PercentileBucketsLength(); ++i) {
auto& c = counters_.at(i);
if (c) {
counts.at(i) = c->Count();
}
}
auto v = spectator::Percentile(counts, p);
return v / 1e9;
};

private:
Registry* registry_;
Id id_;
absl::Duration min_;
absl::Duration max_;
std::shared_ptr<Timer> timer_;
mutable detail::counters_t counters_{};
};

using PercentileTimer = percentile_timer<detail::lazy_policy>;
Expand Down

0 comments on commit 6461d38

Please sign in to comment.