From 2d37fcb45b055c04528f48f45e31c66e9646372d Mon Sep 17 00:00:00 2001 From: Bikramjeet Vig Date: Fri, 5 Apr 2024 16:33:45 -0700 Subject: [PATCH] Fix and extend arbitration related metrics Fixes accounting of kMetricArbitratorLocalArbitrationCountwhich was previously sometimes incremented for global arbitration. Also adds additional operator level metrics for keeping track of global and local arbitration attempts initiated by them. --- velox/common/memory/Memory.h | 1 + velox/common/memory/SharedArbitrator.cpp | 16 ++++++++++++---- velox/common/memory/SharedArbitrator.h | 11 ++++++++--- velox/docs/monitoring/stats.rst | 11 +++++++++++ 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/velox/common/memory/Memory.h b/velox/common/memory/Memory.h index ce0738795f236..3772f4f31f898 100644 --- a/velox/common/memory/Memory.h +++ b/velox/common/memory/Memory.h @@ -314,6 +314,7 @@ class MemoryManager { std::vector> sharedLeafPools_; mutable folly::SharedMutex mutex_; + // All root pools allocated from 'this'. std::unordered_map> pools_; }; diff --git a/velox/common/memory/SharedArbitrator.cpp b/velox/common/memory/SharedArbitrator.cpp index 886680680979a..0b2f757ac4691 100644 --- a/velox/common/memory/SharedArbitrator.cpp +++ b/velox/common/memory/SharedArbitrator.cpp @@ -348,7 +348,7 @@ bool SharedArbitrator::ensureCapacity( if (checkCapacityGrowth(*requestor, targetBytes)) { return true; } - const uint64_t reclaimedBytes = reclaim(requestor, targetBytes); + const uint64_t reclaimedBytes = reclaim(requestor, targetBytes, true); // NOTE: return the reclaimed bytes back to the arbitrator and let the memory // arbitration process to grow the requestor's memory capacity accordingly. incrementFreeCapacity(reclaimedBytes); @@ -427,6 +427,8 @@ bool SharedArbitrator::arbitrateMemory( VELOX_CHECK_LT(freedBytes, growTarget); RECORD_METRIC_VALUE(kMetricArbitratorGlobalArbitrationCount); + addThreadLocalRuntimeStat( + "globalArbitrationCount", RuntimeCounter(1, RuntimeCounter::Unit::kNone)); freedBytes += reclaimUsedMemoryFromCandidatesBySpill( requestor, candidates, growTarget - freedBytes); if (requestor->aborted()) { @@ -494,7 +496,7 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesBySpill( const int64_t bytesToReclaim = std::max( targetBytes - freedBytes, memoryPoolTransferCapacity_); VELOX_CHECK_GT(bytesToReclaim, 0); - freedBytes += reclaim(candidate.pool, bytesToReclaim); + freedBytes += reclaim(candidate.pool, bytesToReclaim, false); if ((freedBytes >= targetBytes) || (requestor != nullptr && requestor->aborted())) { break; @@ -531,7 +533,8 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesByAbort( uint64_t SharedArbitrator::reclaim( MemoryPool* pool, - uint64_t targetBytes) noexcept { + uint64_t targetBytes, + bool isLocalArbitration) noexcept { uint64_t reclaimDurationUs{0}; uint64_t reclaimedBytes{0}; uint64_t freedBytes{0}; @@ -542,7 +545,12 @@ uint64_t SharedArbitrator::reclaim( try { freedBytes = pool->shrink(targetBytes); if (freedBytes < targetBytes) { - RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount); + if (isLocalArbitration) { + RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount); + addThreadLocalRuntimeStat( + "localArbitrationCount", + RuntimeCounter(1, RuntimeCounter::Unit::kNone)); + } pool->reclaim( targetBytes - freedBytes, memoryReclaimWaitMs_, reclaimerStats); } diff --git a/velox/common/memory/SharedArbitrator.h b/velox/common/memory/SharedArbitrator.h index d6649f1b75dbd..c37de46250bd1 100644 --- a/velox/common/memory/SharedArbitrator.h +++ b/velox/common/memory/SharedArbitrator.h @@ -151,9 +151,14 @@ class SharedArbitrator : public memory::MemoryArbitrator { std::vector& candidates, uint64_t targetBytes); - // Invoked to reclaim used memory from 'pool' with specified 'targetBytes'. - // The function returns the actually freed capacity. - uint64_t reclaim(MemoryPool* pool, uint64_t targetBytes) noexcept; + // Invoked to reclaim used memory from 'targetPool' with specified + // 'targetBytes'. The function returns the actually freed capacity. + // 'isLocalArbitration' is true when the reclaim attempt is within a local + // arbitration. + uint64_t reclaim( + MemoryPool* targetPool, + uint64_t targetBytes, + bool isLocalArbitration) noexcept; // Invoked to abort memory 'pool'. void abort(MemoryPool* pool, const std::exception_ptr& error); diff --git a/velox/docs/monitoring/stats.rst b/velox/docs/monitoring/stats.rst index acbb458530922..076ca7c722c35 100644 --- a/velox/docs/monitoring/stats.rst +++ b/velox/docs/monitoring/stats.rst @@ -41,6 +41,17 @@ These stats are reported by all operators. - bytes - The reclaimed memory bytes of an operator during the memory arbitration. This stats only applies for spillable operators. + * - globalArbitrationCount + - + - The number of times a request for more memory hit the arbitrator's + capacity limit and initiated a global arbitration attempt where + memory is reclaimed from viable candidates chosen among all running + queries based on a criterion. + * - localArbitrationCount + - + - The number of times a request for more memory hit the query memory + limit and initiated a local arbitration attempt where memory is + reclaimed from the requestor itself. HashBuild, HashAggregation --------------------------