Skip to content

Commit

Permalink
Fix and extend arbitration related metrics
Browse files Browse the repository at this point in the history
Fixes accounting of kMetricArbitratorLocalArbitrationCountwhich was
previously sometimes incremented for global arbitration.

Also adds additional operator level metrics for keeping track of
global and local arbitration attempts initiated by them.
  • Loading branch information
bikramSingh91 committed Apr 5, 2024
1 parent 41bed84 commit 2d37fcb
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 7 deletions.
1 change: 1 addition & 0 deletions velox/common/memory/Memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ class MemoryManager {
std::vector<std::shared_ptr<MemoryPool>> sharedLeafPools_;

mutable folly::SharedMutex mutex_;
// All root pools allocated from 'this'.
std::unordered_map<std::string, std::weak_ptr<MemoryPool>> pools_;
};

Expand Down
16 changes: 12 additions & 4 deletions velox/common/memory/SharedArbitrator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ bool SharedArbitrator::ensureCapacity(
if (checkCapacityGrowth(*requestor, targetBytes)) {
return true;
}
const uint64_t reclaimedBytes = reclaim(requestor, targetBytes);
const uint64_t reclaimedBytes = reclaim(requestor, targetBytes, true);
// NOTE: return the reclaimed bytes back to the arbitrator and let the memory
// arbitration process to grow the requestor's memory capacity accordingly.
incrementFreeCapacity(reclaimedBytes);
Expand Down Expand Up @@ -427,6 +427,8 @@ bool SharedArbitrator::arbitrateMemory(

VELOX_CHECK_LT(freedBytes, growTarget);
RECORD_METRIC_VALUE(kMetricArbitratorGlobalArbitrationCount);
addThreadLocalRuntimeStat(
"globalArbitrationCount", RuntimeCounter(1, RuntimeCounter::Unit::kNone));
freedBytes += reclaimUsedMemoryFromCandidatesBySpill(
requestor, candidates, growTarget - freedBytes);
if (requestor->aborted()) {
Expand Down Expand Up @@ -494,7 +496,7 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesBySpill(
const int64_t bytesToReclaim = std::max<int64_t>(
targetBytes - freedBytes, memoryPoolTransferCapacity_);
VELOX_CHECK_GT(bytesToReclaim, 0);
freedBytes += reclaim(candidate.pool, bytesToReclaim);
freedBytes += reclaim(candidate.pool, bytesToReclaim, false);
if ((freedBytes >= targetBytes) ||
(requestor != nullptr && requestor->aborted())) {
break;
Expand Down Expand Up @@ -531,7 +533,8 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesByAbort(

uint64_t SharedArbitrator::reclaim(
MemoryPool* pool,
uint64_t targetBytes) noexcept {
uint64_t targetBytes,
bool isLocalArbitration) noexcept {
uint64_t reclaimDurationUs{0};
uint64_t reclaimedBytes{0};
uint64_t freedBytes{0};
Expand All @@ -542,7 +545,12 @@ uint64_t SharedArbitrator::reclaim(
try {
freedBytes = pool->shrink(targetBytes);
if (freedBytes < targetBytes) {
RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount);
if (isLocalArbitration) {
RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount);
addThreadLocalRuntimeStat(
"localArbitrationCount",
RuntimeCounter(1, RuntimeCounter::Unit::kNone));
}
pool->reclaim(
targetBytes - freedBytes, memoryReclaimWaitMs_, reclaimerStats);
}
Expand Down
11 changes: 8 additions & 3 deletions velox/common/memory/SharedArbitrator.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,14 @@ class SharedArbitrator : public memory::MemoryArbitrator {
std::vector<Candidate>& candidates,
uint64_t targetBytes);

// Invoked to reclaim used memory from 'pool' with specified 'targetBytes'.
// The function returns the actually freed capacity.
uint64_t reclaim(MemoryPool* pool, uint64_t targetBytes) noexcept;
// Invoked to reclaim used memory from 'targetPool' with specified
// 'targetBytes'. The function returns the actually freed capacity.
// 'isLocalArbitration' is true when the reclaim attempt is within a local
// arbitration.
uint64_t reclaim(
MemoryPool* targetPool,
uint64_t targetBytes,
bool isLocalArbitration) noexcept;

// Invoked to abort memory 'pool'.
void abort(MemoryPool* pool, const std::exception_ptr& error);
Expand Down
11 changes: 11 additions & 0 deletions velox/docs/monitoring/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ These stats are reported by all operators.
- bytes
- The reclaimed memory bytes of an operator during the memory arbitration.
This stats only applies for spillable operators.
* - globalArbitrationCount
-
- The number of times a request for more memory hit the arbitrator's
capacity limit and initiated a global arbitration attempt where
memory is reclaimed from viable candidates chosen among all running
queries based on a criterion.
* - localArbitrationCount
-
- The number of times a request for more memory hit the query memory
limit and initiated a local arbitration attempt where memory is
reclaimed from the requestor itself.

HashBuild, HashAggregation
--------------------------
Expand Down

0 comments on commit 2d37fcb

Please sign in to comment.