Skip to content

Commit

Permalink
Document stats counters and stats macros rename (#7803)
Browse files Browse the repository at this point in the history
Summary:
Add velox metrics counter documentation and code refactor:
rename stats to metric and update the macros for metric definition
and record:
DEFINE_METRIC/DEFINE_HISTOGRAM_METRICS: define metrics
RECORD_METRIC_VALUE/RECORD_HISTOGRAM_METRIC_VALUE: record metrics

The followup will migrate Prestissimo to use new apis and deprecate
the old ones.

Pull Request resolved: #7803

Reviewed By: mbasmanova

Differential Revision: D51696300

Pulled By: xiaoxmeng

fbshipit-source-id: 355c98d3a131cc74169cbee315320ef74c8aa1f7
  • Loading branch information
xiaoxmeng authored and facebook-github-bot committed Dec 5, 2023
1 parent 0b3407e commit 156abe3
Show file tree
Hide file tree
Showing 14 changed files with 245 additions and 72 deletions.
37 changes: 17 additions & 20 deletions velox/common/base/Counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,47 +19,44 @@

namespace facebook::velox {

void registerVeloxCounters() {
void registerVeloxMetrics() {
// Tracks hive handle generation latency in range of [0, 100s] and reports
// P50, P90, P99, and P100.
REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE(
kCounterHiveFileHandleGenerateLatencyMs, 10, 0, 100000, 50, 90, 99, 100);
DEFINE_HISTOGRAM_METRIC(
kMetricHiveFileHandleGenerateLatencyMs, 10, 0, 100000, 50, 90, 99, 100);

REPORT_ADD_STAT_EXPORT_TYPE(
kCounterCacheShrinkCount, facebook::velox::StatType::COUNT);
DEFINE_METRIC(kMetricCacheShrinkCount, facebook::velox::StatType::COUNT);

// Tracks cache shrink latency in range of [0, 100s] and reports P50, P90,
// P99, and P100.
REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE(
kCounterCacheShrinkTimeMs, 10, 0, 100'000, 50, 90, 99, 100);
DEFINE_HISTOGRAM_METRIC(
kMetricCacheShrinkTimeMs, 10, 0, 100'000, 50, 90, 99, 100);

// Tracks memory reclaim exec time in range of [0, 600s] and reports
// P50, P90, P99, and P100.
REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE(
kCounterMemoryReclaimExecTimeMs, 20, 0, 600'000, 50, 90, 99, 100);
DEFINE_HISTOGRAM_METRIC(
kMetricMemoryReclaimExecTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// Tracks memory reclaim task wait time in range of [0, 60s] and reports
// P50, P90, P99, and P100.
REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE(
kCounterMemoryReclaimWaitTimeMs, 10, 0, 60'000, 50, 90, 99, 100);
DEFINE_HISTOGRAM_METRIC(
kMetricMemoryReclaimWaitTimeMs, 10, 0, 60'000, 50, 90, 99, 100);

// Tracks memory reclaim bytes.
REPORT_ADD_STAT_EXPORT_TYPE(
kCounterMemoryReclaimedBytes, facebook::velox::StatType::SUM);
DEFINE_METRIC(kMetricMemoryReclaimedBytes, facebook::velox::StatType::SUM);

// Tracks the number of times that the memory reclaim wait timeouts.
REPORT_ADD_STAT_EXPORT_TYPE(
kCounterMemoryReclaimWaitTimeoutCount, facebook::velox::StatType::SUM);
DEFINE_METRIC(
kMetricMemoryReclaimWaitTimeoutCount, facebook::velox::StatType::SUM);

// Tracks the number of times that the memory reclaim fails because of
// non-reclaimable section which is an indicator that the memory reservation
// is not sufficient.
REPORT_ADD_STAT_EXPORT_TYPE(
kCounterMemoryNonReclaimableCount, facebook::velox::StatType::COUNT);
DEFINE_METRIC(
kMetricMemoryNonReclaimableCount, facebook::velox::StatType::COUNT);

// Tracks the number of times that we hit the max spill level limit.
REPORT_ADD_STAT_EXPORT_TYPE(
kCounterMaxSpillLevelExceededCount, facebook::velox::StatType::COUNT);
DEFINE_METRIC(
kMetricMaxSpillLevelExceededCount, facebook::velox::StatType::COUNT);
}

} // namespace facebook::velox
28 changes: 17 additions & 11 deletions velox/common/base/Counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,38 @@

namespace facebook::velox {

/// Velox Counter Registration
void registerVeloxCounters();
/// Velox metrics Registration.
void registerVeloxMetrics();

constexpr folly::StringPiece kCounterHiveFileHandleGenerateLatencyMs{
#ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY
inline void registerVeloxCounters() {
registerVeloxMetrics();
}
#endif

constexpr folly::StringPiece kMetricHiveFileHandleGenerateLatencyMs{
"velox.hive_file_handle_generate_latency_ms"};

constexpr folly::StringPiece kCounterCacheShrinkCount{
constexpr folly::StringPiece kMetricCacheShrinkCount{
"velox.cache_shrink_count"};

constexpr folly::StringPiece kCounterCacheShrinkTimeMs{"velox.cache_shrink_ms"};
constexpr folly::StringPiece kMetricCacheShrinkTimeMs{"velox.cache_shrink_ms"};

constexpr folly::StringPiece kCounterMemoryReclaimExecTimeMs{
constexpr folly::StringPiece kMetricMemoryReclaimExecTimeMs{
"velox.memory_reclaim_exec_ms"};

constexpr folly::StringPiece kCounterMemoryReclaimedBytes{
constexpr folly::StringPiece kMetricMemoryReclaimedBytes{
"velox.memory_reclaim_bytes"};

constexpr folly::StringPiece kCounterMemoryReclaimWaitTimeMs{
constexpr folly::StringPiece kMetricMemoryReclaimWaitTimeMs{
"velox.memory_reclaim_wait_ms"};

constexpr folly::StringPiece kCounterMemoryReclaimWaitTimeoutCount{
constexpr folly::StringPiece kMetricMemoryReclaimWaitTimeoutCount{
"velox.memory_reclaim_wait_timeout_count"};

constexpr folly::StringPiece kCounterMemoryNonReclaimableCount{
constexpr folly::StringPiece kMetricMemoryNonReclaimableCount{
"velox.memory_non_reclaimable_count"};

constexpr folly::StringPiece kCounterMaxSpillLevelExceededCount{
constexpr folly::StringPiece kMetricMaxSpillLevelExceededCount{
"velox.spill_max_level_exceeded_count"};
} // namespace facebook::velox
77 changes: 63 additions & 14 deletions velox/common/base/StatsReporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,35 @@
#include <folly/Singleton.h>
#include <memory>

/// StatsReporter designed to assist in reporting various stats of the
/// StatsReporter designed to assist in reporting various metrics of the
/// application that uses velox library. The library itself does not implement
/// the StatsReporter and it should be implemented by the application.
///
/// To inialize the reporter singleton in your application use this pattern
/// To initialize the reporter singleton in your application use this pattern
/// (note that MyReporter should implement the abstract class
/// BaseStatsReporter):
///
/// folly::Singleton<facebook::velox::BaseStatsReporter> reporter([]() {
/// return new MyReporter();
/// });
///
/// Then, for every stat that needs to be reported, it is required to register
/// one (usually) or more types (StatType) before reporting the stat:
/// Then, for every metric that needs to be reported, it is required to register
/// one (usually) or more types (StatType) before reporting the metric:
///
/// REPORT_ADD_STAT_EXPORT_TYPE("my_stat1", facebook::velox::StatType::COUNT);
/// DEFINE_METRIC("my_stat1", facebook::velox::StatType::COUNT);
///
/// To register one histogram, it requires the min and max value of
// the range, the bucket width as well as the percentiles to be reported.
/// REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE("my_stat2", 10, 0, 100, 50, 99,
/// 100);
/// the range, the bucket width as well as the percentiles to be reported.
/// DEFINE_HISTOGRAM_METRIC("my_stat2", 10, 0, 100, 50, 99, 100);
///
/// The StatType controls how counter/stat is aggregated.
/// After that, every call to REPORT_ADD_STAT_VALUE increases the counter by the
/// The StatType controls how metric is aggregated.
/// After that, every call to RECORD_METRIC_VALUE increases the metric by the
/// given value:
///
/// By default the following will add 1 to the stat if not provided value
/// REPORT_ADD_STAT_VALUE("my_stat1");
/// REPORT_ADD_STAT_VALUE("my_stat2", 10);
/// REPORT_ADD_STAT_VALUE("my_stat1", numOfFailures);
/// By default the following will add 1 to the metric if not provided value
/// RECORD_METRIC_VALUE("my_stat1");
/// RECORD_METRIC_VALUE("my_stat2", 10);
/// RECORD_METRIC_VALUE("my_stat1", numOfFailures);

namespace facebook::velox {

Expand Down Expand Up @@ -157,6 +156,7 @@ class DummyStatsReporter : public BaseStatsReporter {
const override {}
};

#ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY
#define REPORT_ADD_STAT_VALUE(key, ...) \
{ \
if (::facebook::velox::BaseStatsReporter::registered) { \
Expand Down Expand Up @@ -205,5 +205,54 @@ class DummyStatsReporter : public BaseStatsReporter {
} \
} \
}
#endif

#define DEFINE_METRIC(key, type) \
{ \
if (::facebook::velox::BaseStatsReporter::registered) { \
auto reporter = folly::Singleton< \
facebook::velox::BaseStatsReporter>::try_get_fast(); \
if (FOLLY_LIKELY(reporter != nullptr)) { \
reporter->addStatExportType((key), (type)); \
} \
} \
}

#define RECORD_METRIC_VALUE(key, ...) \
{ \
if (::facebook::velox::BaseStatsReporter::registered) { \
auto reporter = folly::Singleton< \
facebook::velox::BaseStatsReporter>::try_get_fast(); \
if (FOLLY_LIKELY(reporter != nullptr)) { \
reporter->addStatValue((key), ##__VA_ARGS__); \
} \
} \
}

#define DEFINE_HISTOGRAM_METRIC(key, bucket, min, max, ...) \
{ \
if (::facebook::velox::BaseStatsReporter::registered) { \
auto reporter = folly::Singleton< \
facebook::velox::BaseStatsReporter>::try_get_fast(); \
if (FOLLY_LIKELY(reporter != nullptr)) { \
reporter->addHistogramExportPercentiles( \
(key), \
(bucket), \
(min), \
(max), \
(std::vector<int32_t>({__VA_ARGS__}))); \
} \
} \
}

#define RECORD_HISTOGRAM_METRIC_VALUE(key, ...) \
{ \
if (::facebook::velox::BaseStatsReporter::registered) { \
auto reporter = folly::Singleton< \
facebook::velox::BaseStatsReporter>::try_get_fast(); \
if (FOLLY_LIKELY(reporter != nullptr)) { \
reporter->addHistogramValue((key), ##__VA_ARGS__); \
} \
} \
}
} // namespace facebook::velox
26 changes: 13 additions & 13 deletions velox/common/base/tests/StatsReporterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ TEST_F(StatsReporterTest, trivialReporter) {
auto reporter = std::dynamic_pointer_cast<TestReporter>(
folly::Singleton<BaseStatsReporter>::try_get());

REPORT_ADD_STAT_EXPORT_TYPE("key1", StatType::COUNT);
REPORT_ADD_STAT_EXPORT_TYPE("key2", StatType::SUM);
REPORT_ADD_STAT_EXPORT_TYPE("key3", StatType::RATE);
REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE("key4", 10, 0, 100, 50, 99, 100);
DEFINE_METRIC("key1", StatType::COUNT);
DEFINE_METRIC("key2", StatType::SUM);
DEFINE_METRIC("key3", StatType::RATE);
DEFINE_HISTOGRAM_METRIC("key4", 10, 0, 100, 50, 99, 100);

EXPECT_EQ(StatType::COUNT, reporter->statTypeMap["key1"]);
EXPECT_EQ(StatType::SUM, reporter->statTypeMap["key2"]);
Expand All @@ -105,15 +105,15 @@ TEST_F(StatsReporterTest, trivialReporter) {
EXPECT_TRUE(
reporter->statTypeMap.find("key5") == reporter->statTypeMap.end());

REPORT_ADD_STAT_VALUE("key1", 10);
REPORT_ADD_STAT_VALUE("key1", 11);
REPORT_ADD_STAT_VALUE("key1", 15);
REPORT_ADD_STAT_VALUE("key2", 1001);
REPORT_ADD_STAT_VALUE("key2", 1200);
REPORT_ADD_STAT_VALUE("key3");
REPORT_ADD_STAT_VALUE("key3", 1100);
REPORT_ADD_HISTOGRAM_VALUE("key4", 50);
REPORT_ADD_HISTOGRAM_VALUE("key4", 100);
RECORD_METRIC_VALUE("key1", 10);
RECORD_METRIC_VALUE("key1", 11);
RECORD_METRIC_VALUE("key1", 15);
RECORD_METRIC_VALUE("key2", 1001);
RECORD_METRIC_VALUE("key2", 1200);
RECORD_METRIC_VALUE("key3");
RECORD_METRIC_VALUE("key3", 1100);
RECORD_HISTOGRAM_METRIC_VALUE("key4", 50);
RECORD_HISTOGRAM_METRIC_VALUE("key4", 100);

EXPECT_EQ(36, reporter->counterMap["key1"]);
EXPECT_EQ(2201, reporter->counterMap["key2"]);
Expand Down
4 changes: 2 additions & 2 deletions velox/common/caching/AsyncDataCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ bool AsyncDataCache::makeSpace(
uint64_t AsyncDataCache::shrink(uint64_t targetBytes) {
VELOX_CHECK_GT(targetBytes, 0);

REPORT_ADD_STAT_VALUE(kCounterCacheShrinkCount);
RECORD_METRIC_VALUE(kMetricCacheShrinkCount);
LOG(INFO) << "Try to shrink cache to free up "
<< velox::succinctBytes(targetBytes) << " memory";

Expand Down Expand Up @@ -724,7 +724,7 @@ uint64_t AsyncDataCache::shrink(uint64_t targetBytes) {
allocator_->unmap(memory::AllocationTraits::numPages(targetBytes));
}

REPORT_ADD_HISTOGRAM_VALUE(kCounterCacheShrinkTimeMs, shrinkTimeUs / 1'000);
RECORD_HISTOGRAM_METRIC_VALUE(kMetricCacheShrinkTimeMs, shrinkTimeUs / 1'000);
LOG(INFO) << "Freed " << velox::succinctBytes(evictedBytes)
<< " cache memory, spent " << velox::succinctMicros(shrinkTimeUs)
<< "\n"
Expand Down
6 changes: 3 additions & 3 deletions velox/common/memory/MemoryArbitrator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,9 @@ uint64_t MemoryReclaimer::run(
}
stats.reclaimExecTimeUs += execTimeUs;
stats.reclaimedBytes += reclaimedBytes;
REPORT_ADD_HISTOGRAM_VALUE(
kCounterMemoryReclaimExecTimeMs, execTimeUs / 1'000);
REPORT_ADD_STAT_VALUE(kCounterMemoryReclaimedBytes, reclaimedBytes);
RECORD_HISTOGRAM_METRIC_VALUE(
kMetricMemoryReclaimExecTimeMs, execTimeUs / 1'000);
RECORD_METRIC_VALUE(kMetricMemoryReclaimedBytes, reclaimedBytes);
return reclaimedBytes;
}

Expand Down
4 changes: 2 additions & 2 deletions velox/connectors/hive/FileHandle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ std::shared_ptr<FileHandle> FileHandleGenerator::operator()(
VLOG(1) << "Generating file handle for: " << filename
<< " uuid: " << fileHandle->uuid.id();
}
REPORT_ADD_HISTOGRAM_VALUE(
kCounterHiveFileHandleGenerateLatencyMs, elapsedTimeUs / 1000);
RECORD_HISTOGRAM_METRIC_VALUE(
kMetricHiveFileHandleGenerateLatencyMs, elapsedTimeUs / 1000);
// TODO: build the hash map/etc per file type -- presumably after reading
// the appropriate magic number from the file, or perhaps we include the file
// type in the file handle key.
Expand Down
2 changes: 1 addition & 1 deletion velox/connectors/hive/HiveDataSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,7 @@ uint64_t HiveDataSink::WriterReclaimer::reclaim(
}

if (*writerInfo_->nonReclaimableSectionHolder.get()) {
REPORT_ADD_STAT_VALUE(kCounterMemoryNonReclaimableCount);
RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
LOG(WARNING) << "Can't reclaim from hive writer pool " << pool->name()
<< " which is under non-reclaimable section, "
<< " used memory: " << succinctBytes(pool->currentBytes())
Expand Down
1 change: 1 addition & 0 deletions velox/docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Velox Documentation
functions
spark_functions
configs
stats
bindings/python/README_generated_pyvelox
develop
programming-guide
Loading

0 comments on commit 156abe3

Please sign in to comment.