Skip to content

Commit

Permalink
remove percentile counter caching
Browse files Browse the repository at this point in the history
A metrics reporting issue was observed where infrequent updates to percentile
timers resulted in inconsistent reporting of metrics values. Unusual peaks and
valleys were seen on the query side. Capturing measurement reporting from the
spectatord process revealed that the base timer values were being reported in
a consistent manner, but some of the percentile counters were missing.

The percentile timer meters kept a local cache of the percentile counters, with
shared pointer references to corresponding meters in the Registry. When the
Registry expires meters, it does so based upon last update time, rather than
the number of pointers still held. Thus, infrequently used percentile counters
were removed from the Registry, but remained within the local percentile timer
cache. The cached values would be updated, but this would not translate to the
Registry, and there was no opportunity to recreate them.

The fix is to remove the local percentile counter cache from the percentile
timer meter, and rely on the Registry to keep track of all counters. If one is
expired, it will then be recreated. This will add a little bit of overhead, but
it should be minimal.

The same applies to the percentile distribution summaries.
  • Loading branch information
copperlight committed Dec 2, 2024
1 parent b1fa055 commit 075e1c2
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 122 deletions.
1 change: 0 additions & 1 deletion spectator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ add_library(spectator OBJECT
"config.h"
"counter.cc"
"counter.h"
"detail/perc_policy.h"
"dist_stats.h"
"dist_summary.cc"
"dist_summary.h"
Expand Down
35 changes: 0 additions & 35 deletions spectator/detail/perc_policy.h

This file was deleted.

14 changes: 0 additions & 14 deletions spectator/perc_dist_summary_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,6 @@ class PercentileDistributionSummaryTest : public ::testing::Test {

TYPED_TEST_SUITE(PercentileDistributionSummaryTest, Implementations, NameGenerator);

TYPED_TEST(PercentileDistributionSummaryTest, Percentile) {
auto& ds = this->ds;

for (auto i = 0; i < 100000; ++i) {
ds->Record(i);
}

for (auto i = 0; i <= 100; ++i) {
auto expected = 1e3 * i;
auto threshold = 0.15 * expected;
EXPECT_NEAR(expected, ds->Percentile(i), threshold);
}
}

TYPED_TEST(PercentileDistributionSummaryTest, Measure) {
this->ds->Record(42);

Expand Down
12 changes: 0 additions & 12 deletions spectator/perc_timer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,6 @@ class PercentileTimerTest : public ::testing::Test {

TYPED_TEST_SUITE(PercentileTimerTest, Implementations, NameGenerator);

TYPED_TEST(PercentileTimerTest, Percentile) {
for (auto i = 0; i < 100000; ++i) {
this->timer->Record(absl::Milliseconds(i));
}

for (auto i = 0; i <= 100; ++i) {
auto expected = static_cast<double>(i);
auto threshold = 0.15 * expected;
EXPECT_NEAR(expected, this->timer->Percentile(i), threshold);
}
}

TYPED_TEST(PercentileTimerTest, Measure) {
auto elapsed = absl::Milliseconds(42);
this->timer->Record(elapsed);
Expand Down
4 changes: 1 addition & 3 deletions spectator/percentile_buckets.cc
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#include "percentile_bucket_values.inc"
#include "percentile_buckets.h"
#include <limits>

namespace spectator {

#include "percentile_bucket_values.inc"

auto GetPercBucketValue(size_t i) -> int64_t { return kBucketValues.at(i); }

auto PercentileBucketIndexOf(int64_t v) -> size_t {
Expand Down
37 changes: 12 additions & 25 deletions spectator/percentile_distribution_summary.h
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
#pragma once

#include "detail/perc_policy.h"
#include "id.h"
#include "percentile_bucket_tags.inc"
#include "percentile_buckets.h"
#include "registry.h"
#include "util.h"

namespace spectator {

template <typename policy>
class percentile_distribution_summary {
class PercentileDistributionSummary {
public:
percentile_distribution_summary(Registry* registry, Id id, int64_t min,
int64_t max) noexcept
PercentileDistributionSummary(Registry* registry, Id id, int64_t min, int64_t max) noexcept
: registry_{registry},
id_{std::move(id)},
min_{min},
max_{max},
dist_summary_{registry->GetDistributionSummary(id_)} {
policy::init(registry_, id_, &counters_, detail::kDistTags.begin());
}

auto get_counter(size_t index, const std::string* perc_tags) -> std::shared_ptr<Counter> {
using spectator::refs;
auto counterId = id_.WithTags(refs().statistic(), refs().percentile(), refs().percentile(),
intern_str(perc_tags[index]));
return registry_->GetCounter(std::move(counterId));
}

void Record(int64_t amount) noexcept {
Expand All @@ -29,37 +33,20 @@ class percentile_distribution_summary {
dist_summary_->Record(amount);
auto restricted = restrict(amount, min_, max_);
auto index = PercentileBucketIndexOf(restricted);
auto c = policy::get_counter(registry_, id_, &counters_, index,
detail::kDistTags.begin());
auto c = get_counter(index, kDistTags.begin());
c->Increment();
}

auto MeterId() const noexcept -> const Id& { return id_; }
auto Count() const noexcept -> int64_t { return dist_summary_->Count(); }
auto TotalAmount() const noexcept -> double {
return dist_summary_->TotalAmount();
}
auto Percentile(double p) const noexcept -> double {
std::array<int64_t, PercentileBucketsLength()> counts{};
for (size_t i = 0; i < PercentileBucketsLength(); ++i) {
auto& c = counters_.at(i);
if (c) {
counts.at(i) = static_cast<int64_t>(c->Count());
}
}
return spectator::Percentile(counts, p);
}
auto TotalAmount() const noexcept -> double { return dist_summary_->TotalAmount(); }

private:
Registry* registry_;
Id id_;
int64_t min_;
int64_t max_;
std::shared_ptr<DistributionSummary> dist_summary_;
mutable detail::counters_t counters_{};
};

using PercentileDistributionSummary =
percentile_distribution_summary<detail::lazy_policy>;

} // namespace spectator
39 changes: 12 additions & 27 deletions spectator/percentile_timer.h
Original file line number Diff line number Diff line change
@@ -1,63 +1,48 @@
#pragma once

#include "detail/perc_policy.h"
#include "id.h"
#include "percentile_bucket_tags.inc"
#include "percentile_buckets.h"
#include "registry.h"
#include "util.h"

namespace spectator {

template <typename policy>
class percentile_timer {
class PercentileTimer {
public:
percentile_timer(Registry* registry, Id id, absl::Duration min,
absl::Duration max) noexcept
PercentileTimer(Registry* registry, Id id, absl::Duration min, absl::Duration max) noexcept
: registry_{registry},
id_{std::move(id)},
min_{min},
max_{max},
timer_{registry->GetTimer(id_)} {
policy::init(registry_, id_, &counters_, detail::kTimerTags.begin());
timer_{registry->GetTimer(id_)} {}

auto get_counter(size_t index, const std::string* perc_tags) -> std::shared_ptr<Counter> {
using spectator::refs;
auto counterId = id_.WithTags(refs().statistic(), refs().percentile(), refs().percentile(),
intern_str(perc_tags[index]));
return registry_->GetCounter(std::move(counterId));
}

void Record(absl::Duration amount) noexcept {
timer_->Record(amount);
auto restricted = restrict(amount, min_, max_);
auto index = PercentileBucketIndexOf(absl::ToInt64Nanoseconds(restricted));
auto c = policy::get_counter(registry_, id_, &counters_, index,
detail::kTimerTags.begin());
auto c = get_counter(index, kTimerTags.begin());
c->Increment();
}

void Record(std::chrono::nanoseconds amount) noexcept {
Record(absl::FromChrono(amount));
}

void Record(std::chrono::nanoseconds amount) noexcept { Record(absl::FromChrono(amount)); }
auto MeterId() const noexcept -> const Id& { return id_; }
auto Count() const noexcept -> int64_t { return timer_->Count(); }
auto TotalTime() const noexcept -> int64_t { return timer_->TotalTime(); }
auto Percentile(double p) const noexcept -> double {
std::array<int64_t, PercentileBucketsLength()> counts{};
for (size_t i = 0; i < PercentileBucketsLength(); ++i) {
auto& c = counters_.at(i);
if (c) {
counts.at(i) = c->Count();
}
}
auto v = spectator::Percentile(counts, p);
return v / 1e9;
};

private:
Registry* registry_;
Id id_;
absl::Duration min_;
absl::Duration max_;
std::shared_ptr<Timer> timer_;
mutable detail::counters_t counters_{};
};

using PercentileTimer = percentile_timer<detail::lazy_policy>;

} // namespace spectator
6 changes: 6 additions & 0 deletions tools/gen_perc_bucket_tags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ int main(int argc, char* argv[]) {
of.open("/dev/stdout");
}

of << "#pragma once\n\n"
<< "namespace spectator {\n\n";

output_array(of, 276, 'T', "kTimerTags");
of << "\n";
output_array(of, 276, 'D', "kDistTags");

of << "\n} // namespace spectator\n";
}
16 changes: 11 additions & 5 deletions tools/gen_perc_bucket_values.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <fstream>
#include <limits>
#include <fstream>
#include <vector>

// Number of positions of base-2 digits to shift when iterating over the long
Expand Down Expand Up @@ -39,9 +39,14 @@ int main(int argc, char* argv[]) {
} else {
of.open("/dev/stdout");
}
of << "// Do not modify - auto-generated\n//\n"

of << "#pragma once\n\n"
<< "#include <array>\n"
<< "#include <cstddef>\n"
<< "#include <cstdint>\n\n"
<< "namespace spectator {\n\n"
<< "static constexpr std::array<int64_t, " << bucketValues.size()
<< "> kBucketValues = {{";
<< "> kBucketValues = {{\n";
bool first = true;
for (auto v : bucketValues) {
if (!first) {
Expand All @@ -51,7 +56,7 @@ int main(int argc, char* argv[]) {
}
of << " " << v << "LL";
}
of << "}};\n";
of << "}};\n\n";

of << "static constexpr std::array<size_t, " << powerOf4Index.size()
<< "> kPowerOf4Index = {{\n";
Expand All @@ -64,5 +69,6 @@ int main(int argc, char* argv[]) {
}
of << " " << v << "u";
}
of << "\n}};\n";
of << "\n}};\n"
<< "\n} // namespace spectator\n";
}

0 comments on commit 075e1c2

Please sign in to comment.