Skip to content

Commit

Permalink
Expose metrics related to memory arbitration
Browse files Browse the repository at this point in the history
This adds the following stats:

"velox.arbitrator_requests_count"
"velox.arbitrator_aborted_count"
"velox.arbitrator_failures_count"
"velox.arbitrator_queue_time_ms"
"velox.arbitrator_arbitration_time_ms"
"velox.arbitrator_free_capacity_bytes"

Also fixed accounting for:
"velox.memory_non_reclaimable_count"
  • Loading branch information
bikramSingh91 committed Dec 8, 2023
1 parent ddc3471 commit 31fb677
Show file tree
Hide file tree
Showing 10 changed files with 138 additions and 16 deletions.
63 changes: 52 additions & 11 deletions velox/common/base/Counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,24 @@ void registerVeloxMetrics() {

DEFINE_METRIC(kMetricCacheShrinkCount, facebook::velox::StatType::COUNT);

// Tracks cache shrink latency in range of [0, 100s] and reports P50, P90,
// P99, and P100.
// Tracks cache shrink latency in range of [0, 100s] with 10 buckets and
// reports P50, P90, P99, and P100.
DEFINE_HISTOGRAM_METRIC(
kMetricCacheShrinkTimeMs, 10, 0, 100'000, 50, 90, 99, 100);

// Tracks memory reclaim exec time in range of [0, 600s] and reports
// P50, P90, P99, and P100.
// Tracks the number of times that we hit the max spill level limit.
DEFINE_METRIC(
kMetricMaxSpillLevelExceededCount, facebook::velox::StatType::COUNT);

/// ================== Memory Arbitration Counters =================

// Tracks memory reclaim exec time in range of [0, 600s] with 20 buckets and
// reports P50, P90, P99, and P100.
DEFINE_HISTOGRAM_METRIC(
kMetricMemoryReclaimExecTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// Tracks memory reclaim task wait time in range of [0, 60s] and reports
// P50, P90, P99, and P100.
// Tracks memory reclaim task wait time in range of [0, 60s] with 10 buckets
// and reports P50, P90, P99, and P100.
DEFINE_HISTOGRAM_METRIC(
kMetricMemoryReclaimWaitTimeMs, 10, 0, 60'000, 50, 90, 99, 100);

Expand All @@ -49,14 +55,49 @@ void registerVeloxMetrics() {
DEFINE_METRIC(
kMetricMemoryReclaimWaitTimeoutCount, facebook::velox::StatType::SUM);

// Tracks the number of times that the memory reclaim fails because of
// non-reclaimable section which is an indicator that the memory reservation
// is not sufficient.
// The number of times that the memory reclaim fails because the operator is
// executing a non-reclaimable section where it is expected to have reserved
// enough memory to execute without asking for more. Therefore, it is an
// indicator that the memory reservation is not sufficient. It excludes
// counting instances where the operator is in a non-reclaimable state due to
// currently being on-thread and running or being already cancelled.
DEFINE_METRIC(
kMetricMemoryNonReclaimableCount, facebook::velox::StatType::COUNT);

// Tracks the number of times that we hit the max spill level limit.
// The number of arbitration requests.
DEFINE_METRIC(
kMetricMaxSpillLevelExceededCount, facebook::velox::StatType::COUNT);
kMetricArbitratorRequestsCount, facebook::velox::StatType::COUNT);

// The number of times a query level memory pool is aborted as a result of a
// memory arbitration process. The memory pool aborted will eventually result
// in a cancelling the original query.
DEFINE_METRIC(
kMetricArbitratorAbortedCount, facebook::velox::StatType::COUNT);

// The number of times a memory arbitration request failed. This may occur
// either because the requester was terminated during the processing of its
// request, the arbitration request would surpass the maximum allowed capacity
// for the requester, or the arbitration process couldn't release the
// requested amount of memory.
DEFINE_METRIC(
kMetricArbitratorFailuresCount, facebook::velox::StatType::COUNT);

// The distribution of the amount of time an arbitration request stays queued
// in range of [0, 600s] with 20 buckets. It is configured to report the
// latency at P50, P90, P99, and P100 percentiles.
DEFINE_HISTOGRAM_METRIC(
kMetricArbitratorQueueTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// The distribution of the amount of time it take to complete a single
// arbitration request stays queued in range of [0, 600s] with 20
// buckets. It is configured to report the latency at P50, P90, P99,
// and P100 percentiles.
DEFINE_HISTOGRAM_METRIC(
kMetricArbitratorArbitrationTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

/// Tracks the average of free memory capacity managed by the arbitrator in
/// bytes.
DEFINE_METRIC(
kMetricArbitratorFreeCapacityBytes, facebook::velox::StatType::AVG);
}
} // namespace facebook::velox
22 changes: 20 additions & 2 deletions velox/common/base/Counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ constexpr folly::StringPiece kMetricCacheShrinkCount{

constexpr folly::StringPiece kMetricCacheShrinkTimeMs{"velox.cache_shrink_ms"};

constexpr folly::StringPiece kMetricMaxSpillLevelExceededCount{
"velox.spill_max_level_exceeded_count"};

constexpr folly::StringPiece kMetricMemoryReclaimExecTimeMs{
"velox.memory_reclaim_exec_ms"};

Expand All @@ -52,6 +55,21 @@ constexpr folly::StringPiece kMetricMemoryReclaimWaitTimeoutCount{
constexpr folly::StringPiece kMetricMemoryNonReclaimableCount{
"velox.memory_non_reclaimable_count"};

constexpr folly::StringPiece kMetricMaxSpillLevelExceededCount{
"velox.spill_max_level_exceeded_count"};
constexpr folly::StringPiece kMetricArbitratorRequestsCount{
"velox.arbitrator_requests_count"};

constexpr folly::StringPiece kMetricArbitratorAbortedCount{
"velox.arbitrator_aborted_count"};

constexpr folly::StringPiece kMetricArbitratorFailuresCount{
"velox.arbitrator_failures_count"};

constexpr folly::StringPiece kMetricArbitratorQueueTimeMs{
"velox.arbitrator_queue_time_ms"};

constexpr folly::StringPiece kMetricArbitratorArbitrationTimeMs{
"velox.arbitrator_arbitration_time_ms"};

constexpr folly::StringPiece kMetricArbitratorFreeCapacityBytes{
"velox.arbitrator_free_capacity_bytes"};
} // namespace facebook::velox
41 changes: 38 additions & 3 deletions velox/docs/metrics.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

==============
Runtime Metric
==============
Expand Down Expand Up @@ -86,9 +87,43 @@ Memory Management
- The number of times that the memory reclaim wait timeouts.
* - memory_non_reclaimable_count
- Count
- The number of times that the memory reclaim fails because of
non-reclaimable section which is an indicator that the memory reservation
is not sufficient.
- The number of times that the memory reclaim fails because the operator is executing a
non-reclaimable section where it is expected to have reserved enough memory to execute
without asking for more. Therefore, it is an indicator that the memory reservation
is not sufficient. It excludes counting instances where the operator is in a
non-reclaimable state due to currently being on-thread and running or being already
cancelled.
* - arbitrator_requests_count
- Count
- The number of times a memory arbitration request was initiated by a
memory pool attempting to grow its capacity.
* - arbitrator_aborted_count
- Count
- The number of times a query level memory pool is aborted as a result of
a memory arbitration process. The memory pool aborted will eventually
result in a cancelling the original query.
* - arbitrator_failures_count
- Count
- The number of times a memory arbitration request failed. This may occur
either because the requester was terminated during the processing of
its request, the arbitration request would surpass the maximum allowed
capacity for the requester, or the arbitration process couldn't release
the requested amount of memory.
* - arbitrator_queue_time_ms
- Histogram
- The distribution of the amount of time an arbitration request stays queued
in range of [0, 600s] with 20 buckets. It is configured to report the
latency at P50, P90, P99, and P100 percentiles.
* - arbitrator_arbitration_time_ms
- Histogram
- The distribution of the amount of time it take to complete a single
arbitration request stays queued in range of [0, 600s] with 20
buckets. It is configured to report the latency at P50, P90, P99,
and P100 percentiles.
* - arbitrator_free_capacity_bytes
- Average
- The average of total free memory capacity which is managed by the
memory arbitrator.

Spilling
--------
Expand Down
2 changes: 2 additions & 0 deletions velox/dwio/common/SortingWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/

#include "velox/dwio/common/SortingWriter.h"
#include "velox/common/base/Counters.h"
#include "velox/common/base/StatsReporter.h"

namespace facebook::velox::dwio::common {

Expand Down
1 change: 1 addition & 0 deletions velox/dwio/dwrf/writer/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,7 @@ uint64_t Writer::MemoryReclaimer::reclaim(
}
const uint64_t memoryUsage = writer_->getContext().getTotalMemoryUsage();
if (memoryUsage < writer_->spillConfig_->writerFlushThresholdSize) {
RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
LOG(WARNING)
<< "Can't reclaim memory from dwrf writer pool " << pool->name()
<< " which doesn't have sufficient memory to flush, writer memory usage: "
Expand Down
2 changes: 2 additions & 0 deletions velox/exec/HashBuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1087,6 +1087,7 @@ void HashBuild::reclaim(
// build processing and is not under non-reclaimable execution section.
if (nonReclaimableState()) {
// TODO: reduce the log frequency if it is too verbose.
RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
++stats.numNonReclaimableAttempts;
LOG(WARNING) << "Can't reclaim from hash build operator, state_["
<< stateName(state_) << "], nonReclaimableSection_["
Expand All @@ -1107,6 +1108,7 @@ void HashBuild::reclaim(
VELOX_CHECK(buildOp->canReclaim());
if (buildOp->nonReclaimableState()) {
// TODO: reduce the log frequency if it is too verbose.
RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
++stats.numNonReclaimableAttempts;
LOG(WARNING) << "Can't reclaim from hash build operator, state_["
<< stateName(buildOp->state_) << "], nonReclaimableSection_["
Expand Down
16 changes: 16 additions & 0 deletions velox/exec/SharedArbitrator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

#include "velox/exec/SharedArbitrator.h"

#include "velox/common/base/Counters.h"
#include "velox/common/base/Exceptions.h"
#include "velox/common/base/StatsReporter.h"
#include "velox/common/testutil/TestValue.h"
#include "velox/common/time/Timer.h"

Expand Down Expand Up @@ -61,6 +63,7 @@ std::string memoryPoolAbortMessage(

SharedArbitrator::SharedArbitrator(const MemoryArbitrator::Config& config)
: MemoryArbitrator(config), freeCapacity_(capacity_) {
RECORD_METRIC_VALUE(kMetricArbitratorFreeCapacityBytes, capacity_);
VELOX_CHECK_EQ(kind_, config.kind);
}

Expand Down Expand Up @@ -190,11 +193,13 @@ bool SharedArbitrator::growMemory(
ScopedArbitration scopedArbitration(pool, this);
MemoryPool* requestor = pool->root();
if (FOLLY_UNLIKELY(requestor->aborted())) {
RECORD_METRIC_VALUE(kMetricArbitratorFailuresCount);
++numFailures_;
VELOX_MEM_POOL_ABORTED("The requestor has already been aborted");
}

if (FOLLY_UNLIKELY(!ensureCapacity(requestor, targetBytes))) {
RECORD_METRIC_VALUE(kMetricArbitratorFailuresCount);
++numFailures_;
VELOX_MEM_LOG(ERROR) << "Can't grow " << requestor->name()
<< " capacity to "
Expand Down Expand Up @@ -227,6 +232,7 @@ bool SharedArbitrator::growMemory(
<< requestor->name() << ", request " << succinctBytes(targetBytes)
<< " after " << numRetries
<< " retries, Arbitrator state: " << toString();
RECORD_METRIC_VALUE(kMetricArbitratorFailuresCount);
++numFailures_;
return false;
}
Expand All @@ -253,6 +259,7 @@ bool SharedArbitrator::ensureCapacity(
incrementFreeCapacity(reclaimedBytes);
// Check if the requestor has been aborted in reclaim operation above.
if (requestor->aborted()) {
RECORD_METRIC_VALUE(kMetricArbitratorFailuresCount);
++numFailures_;
VELOX_MEM_POOL_ABORTED("The requestor pool has been aborted");
}
Expand Down Expand Up @@ -327,6 +334,7 @@ bool SharedArbitrator::arbitrateMemory(
freedBytes += reclaimUsedMemoryFromCandidates(
requestor, candidates, growTarget - freedBytes);
if (requestor->aborted()) {
RECORD_METRIC_VALUE(kMetricArbitratorFailuresCount);
++numFailures_;
VELOX_MEM_POOL_ABORTED("The requestor pool has been aborted.");
}
Expand Down Expand Up @@ -442,6 +450,7 @@ uint64_t SharedArbitrator::reclaim(
void SharedArbitrator::abort(
MemoryPool* pool,
const std::exception_ptr& error) {
RECORD_METRIC_VALUE(kMetricArbitratorAbortedCount);
++numAborted_;
try {
pool->abort(error);
Expand All @@ -463,6 +472,7 @@ uint64_t SharedArbitrator::decrementFreeCapacity(uint64_t bytes) {
uint64_t SharedArbitrator::decrementFreeCapacityLocked(uint64_t bytes) {
const uint64_t targetBytes = std::min(freeCapacity_, bytes);
VELOX_CHECK_LE(targetBytes, freeCapacity_);
RECORD_METRIC_VALUE(kMetricArbitratorFreeCapacityBytes, -1 * targetBytes);
freeCapacity_ -= targetBytes;
return targetBytes;
}
Expand All @@ -473,6 +483,7 @@ void SharedArbitrator::incrementFreeCapacity(uint64_t bytes) {
}

void SharedArbitrator::incrementFreeCapacityLocked(uint64_t bytes) {
RECORD_METRIC_VALUE(kMetricArbitratorFreeCapacityBytes, bytes);
freeCapacity_ += bytes;
if (FOLLY_UNLIKELY(freeCapacity_ > capacity_)) {
VELOX_FAIL(
Expand Down Expand Up @@ -539,6 +550,8 @@ SharedArbitrator::ScopedArbitration::~ScopedArbitration() {
const auto arbitrationTime =
std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::steady_clock::now() - startTime_);
RECORD_HISTOGRAM_METRIC_VALUE(
kMetricArbitratorArbitrationTimeMs, arbitrationTime.count() / 1'000);
arbitrator_->arbitrationTimeUs_ += arbitrationTime.count();
arbitrator_->finishArbitration();
}
Expand All @@ -548,6 +561,7 @@ void SharedArbitrator::startArbitration(MemoryPool* requestor) {
ContinueFuture waitPromise{ContinueFuture::makeEmpty()};
{
std::lock_guard<std::mutex> l(mutex_);
RECORD_METRIC_VALUE(kMetricArbitratorRequestsCount);
++numRequests_;
if (running_) {
waitPromises_.emplace_back(fmt::format(
Expand All @@ -570,6 +584,8 @@ void SharedArbitrator::startArbitration(MemoryPool* requestor) {
MicrosecondTimer timer(&waitTimeUs);
waitPromise.wait();
}
RECORD_HISTOGRAM_METRIC_VALUE(
kMetricArbitratorQueueTimeMs, waitTimeUs / 1'000);
queueTimeUs_ += waitTimeUs;
}
}
Expand Down
2 changes: 2 additions & 0 deletions velox/exec/TableWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include "velox/exec/TableWriter.h"

#include "HashAggregation.h"
#include "velox/common/base/Counters.h"
#include "velox/common/base/StatsReporter.h"
#include "velox/exec/Task.h"

namespace facebook::velox::exec {
Expand Down
2 changes: 2 additions & 0 deletions velox/exec/TopNRowNumber.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/
#include "velox/exec/TopNRowNumber.h"
#include "velox/common/base/Counters.h"
#include "velox/common/base/StatsReporter.h"
#include "velox/exec/OperatorUtils.h"

namespace facebook::velox::exec {
Expand Down
3 changes: 3 additions & 0 deletions velox/exec/Window.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
* limitations under the License.
*/
#include "velox/exec/Window.h"

#include "velox/common/base/Counters.h"
#include "velox/common/base/StatsReporter.h"
#include "velox/exec/OperatorUtils.h"
#include "velox/exec/SortWindowBuild.h"
#include "velox/exec/StreamingWindowBuild.h"
Expand Down

0 comments on commit 31fb677

Please sign in to comment.