From 6461d38e92ce2f18deacade58e9b13b90b3a28e0 Mon Sep 17 00:00:00 2001 From: Matthew Johnson Date: Mon, 2 Dec 2024 16:00:44 -0500 Subject: [PATCH] remove percentile counter caching A metrics reporting issue was observed where infrequent updates to percentile timers resulted in inconsistent reporting of metrics values. Unusual peaks and valleys were seen on the query side. Capturing measurement reporting from the spectatord process revealed that the base timer values were being reported in a consistent manner, but some of the percentile counters were missing. The percentile timer meters kept a local cache of the percentile counters, with shared pointer references to corresponding meters in the Registry. When the Registry expires meters, it does so based upon last update time, rather than the number of pointers still held. Thus, infrequently used percentile counters were removed from the Registry, but remained within the local percentile timer cache. The cached values would be updated, but this would not translate to the Registry, and there was no opportunity to recreate them. The fix is to remove the local percentile counter cache from the percentile timer meter, and rely on the Registry to keep track of all counters. If one is expired, it will then be recreated. This will add a little bit of overhead, but it should be minimal. The same applies to the percentile distribution summaries. --- spectator/detail/perc_policy.h | 28 +++++---------------- spectator/perc_dist_summary_test.cc | 14 ----------- spectator/perc_timer_test.cc | 12 --------- spectator/percentile_distribution_summary.h | 25 +++--------------- spectator/percentile_timer.h | 27 +++----------------- 5 files changed, 14 insertions(+), 92 deletions(-) diff --git a/spectator/detail/perc_policy.h b/spectator/detail/perc_policy.h index 9bd43ca..0973d9a 100644 --- a/spectator/detail/perc_policy.h +++ b/spectator/detail/perc_policy.h @@ -1,35 +1,19 @@ #pragma once -#include "../percentile_buckets.h" #include "../registry.h" -#include namespace spectator::detail { #include "../percentile_bucket_tags.inc" -using counters_t = - std::array, PercentileBucketsLength()>; - struct lazy_policy { - static void init(Registry* /* r */, const Id& /* id */, - counters_t* /* counters */, - const std::string* /* perc_tags */) {} - static auto get_counter(Registry* r, const Id& id, - counters_t* counters, - size_t index, - const std::string* perc_tags) -> std::shared_ptr { + static auto get_counter(Registry* r, const Id& id, size_t index, const std::string* perc_tags) + -> std::shared_ptr { using spectator::refs; - auto& c = counters->at(index); - if (!c) { - auto counterId = - id.WithTags(refs().statistic(), refs().percentile(), - refs().percentile(), intern_str(perc_tags[index])); - counters->at(index) = r->GetCounter(std::move(counterId)); - c = counters->at(index); - } - return c; + auto counterId = id.WithTags(refs().statistic(), refs().percentile(), refs().percentile(), + intern_str(perc_tags[index])); + return r->GetCounter(std::move(counterId)); } }; -} // namespace spectator::detail \ No newline at end of file +} // namespace spectator::detail diff --git a/spectator/perc_dist_summary_test.cc b/spectator/perc_dist_summary_test.cc index 8b18122..5c6faf7 100644 --- a/spectator/perc_dist_summary_test.cc +++ b/spectator/perc_dist_summary_test.cc @@ -39,20 +39,6 @@ class PercentileDistributionSummaryTest : public ::testing::Test { TYPED_TEST_SUITE(PercentileDistributionSummaryTest, Implementations, NameGenerator); -TYPED_TEST(PercentileDistributionSummaryTest, Percentile) { - auto& ds = this->ds; - - for (auto i = 0; i < 100000; ++i) { - ds->Record(i); - } - - for (auto i = 0; i <= 100; ++i) { - auto expected = 1e3 * i; - auto threshold = 0.15 * expected; - EXPECT_NEAR(expected, ds->Percentile(i), threshold); - } -} - TYPED_TEST(PercentileDistributionSummaryTest, Measure) { this->ds->Record(42); diff --git a/spectator/perc_timer_test.cc b/spectator/perc_timer_test.cc index 222e01d..270635b 100644 --- a/spectator/perc_timer_test.cc +++ b/spectator/perc_timer_test.cc @@ -39,18 +39,6 @@ class PercentileTimerTest : public ::testing::Test { TYPED_TEST_SUITE(PercentileTimerTest, Implementations, NameGenerator); -TYPED_TEST(PercentileTimerTest, Percentile) { - for (auto i = 0; i < 100000; ++i) { - this->timer->Record(absl::Milliseconds(i)); - } - - for (auto i = 0; i <= 100; ++i) { - auto expected = static_cast(i); - auto threshold = 0.15 * expected; - EXPECT_NEAR(expected, this->timer->Percentile(i), threshold); - } -} - TYPED_TEST(PercentileTimerTest, Measure) { auto elapsed = absl::Milliseconds(42); this->timer->Record(elapsed); diff --git a/spectator/percentile_distribution_summary.h b/spectator/percentile_distribution_summary.h index 6cee7e6..5f3f942 100644 --- a/spectator/percentile_distribution_summary.h +++ b/spectator/percentile_distribution_summary.h @@ -11,14 +11,12 @@ namespace spectator { template class percentile_distribution_summary { public: - percentile_distribution_summary(Registry* registry, Id id, int64_t min, - int64_t max) noexcept + percentile_distribution_summary(Registry* registry, Id id, int64_t min, int64_t max) noexcept : registry_{registry}, id_{std::move(id)}, min_{min}, max_{max}, dist_summary_{registry->GetDistributionSummary(id_)} { - policy::init(registry_, id_, &counters_, detail::kDistTags.begin()); } void Record(int64_t amount) noexcept { @@ -29,26 +27,13 @@ class percentile_distribution_summary { dist_summary_->Record(amount); auto restricted = restrict(amount, min_, max_); auto index = PercentileBucketIndexOf(restricted); - auto c = policy::get_counter(registry_, id_, &counters_, index, - detail::kDistTags.begin()); + auto c = policy::get_counter(registry_, id_, index, detail::kDistTags.begin()); c->Increment(); } auto MeterId() const noexcept -> const Id& { return id_; } auto Count() const noexcept -> int64_t { return dist_summary_->Count(); } - auto TotalAmount() const noexcept -> double { - return dist_summary_->TotalAmount(); - } - auto Percentile(double p) const noexcept -> double { - std::array counts{}; - for (size_t i = 0; i < PercentileBucketsLength(); ++i) { - auto& c = counters_.at(i); - if (c) { - counts.at(i) = static_cast(c->Count()); - } - } - return spectator::Percentile(counts, p); - } + auto TotalAmount() const noexcept -> double { return dist_summary_->TotalAmount(); } private: Registry* registry_; @@ -56,10 +41,8 @@ class percentile_distribution_summary { int64_t min_; int64_t max_; std::shared_ptr dist_summary_; - mutable detail::counters_t counters_{}; }; -using PercentileDistributionSummary = - percentile_distribution_summary; +using PercentileDistributionSummary = percentile_distribution_summary; } // namespace spectator diff --git a/spectator/percentile_timer.h b/spectator/percentile_timer.h index 697d907..aa5006a 100644 --- a/spectator/percentile_timer.h +++ b/spectator/percentile_timer.h @@ -11,43 +11,25 @@ namespace spectator { template class percentile_timer { public: - percentile_timer(Registry* registry, Id id, absl::Duration min, - absl::Duration max) noexcept + percentile_timer(Registry* registry, Id id, absl::Duration min, absl::Duration max) noexcept : registry_{registry}, id_{std::move(id)}, min_{min}, max_{max}, - timer_{registry->GetTimer(id_)} { - policy::init(registry_, id_, &counters_, detail::kTimerTags.begin()); - } + timer_{registry->GetTimer(id_)} {} void Record(absl::Duration amount) noexcept { timer_->Record(amount); auto restricted = restrict(amount, min_, max_); auto index = PercentileBucketIndexOf(absl::ToInt64Nanoseconds(restricted)); - auto c = policy::get_counter(registry_, id_, &counters_, index, - detail::kTimerTags.begin()); + auto c = policy::get_counter(registry_, id_, index, detail::kTimerTags.begin()); c->Increment(); } - void Record(std::chrono::nanoseconds amount) noexcept { - Record(absl::FromChrono(amount)); - } - + void Record(std::chrono::nanoseconds amount) noexcept { Record(absl::FromChrono(amount)); } auto MeterId() const noexcept -> const Id& { return id_; } auto Count() const noexcept -> int64_t { return timer_->Count(); } auto TotalTime() const noexcept -> int64_t { return timer_->TotalTime(); } - auto Percentile(double p) const noexcept -> double { - std::array counts{}; - for (size_t i = 0; i < PercentileBucketsLength(); ++i) { - auto& c = counters_.at(i); - if (c) { - counts.at(i) = c->Count(); - } - } - auto v = spectator::Percentile(counts, p); - return v / 1e9; - }; private: Registry* registry_; @@ -55,7 +37,6 @@ class percentile_timer { absl::Duration min_; absl::Duration max_; std::shared_ptr timer_; - mutable detail::counters_t counters_{}; }; using PercentileTimer = percentile_timer;