diff --git a/velox/common/memory/Memory.h b/velox/common/memory/Memory.h index ce0738795f236..3772f4f31f898 100644 --- a/velox/common/memory/Memory.h +++ b/velox/common/memory/Memory.h @@ -314,6 +314,7 @@ class MemoryManager { std::vector> sharedLeafPools_; mutable folly::SharedMutex mutex_; + // All root pools allocated from 'this'. std::unordered_map> pools_; }; diff --git a/velox/common/memory/SharedArbitrator.cpp b/velox/common/memory/SharedArbitrator.cpp index 886680680979a..0b2f757ac4691 100644 --- a/velox/common/memory/SharedArbitrator.cpp +++ b/velox/common/memory/SharedArbitrator.cpp @@ -348,7 +348,7 @@ bool SharedArbitrator::ensureCapacity( if (checkCapacityGrowth(*requestor, targetBytes)) { return true; } - const uint64_t reclaimedBytes = reclaim(requestor, targetBytes); + const uint64_t reclaimedBytes = reclaim(requestor, targetBytes, true); // NOTE: return the reclaimed bytes back to the arbitrator and let the memory // arbitration process to grow the requestor's memory capacity accordingly. incrementFreeCapacity(reclaimedBytes); @@ -427,6 +427,8 @@ bool SharedArbitrator::arbitrateMemory( VELOX_CHECK_LT(freedBytes, growTarget); RECORD_METRIC_VALUE(kMetricArbitratorGlobalArbitrationCount); + addThreadLocalRuntimeStat( + "globalArbitrationCount", RuntimeCounter(1, RuntimeCounter::Unit::kNone)); freedBytes += reclaimUsedMemoryFromCandidatesBySpill( requestor, candidates, growTarget - freedBytes); if (requestor->aborted()) { @@ -494,7 +496,7 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesBySpill( const int64_t bytesToReclaim = std::max( targetBytes - freedBytes, memoryPoolTransferCapacity_); VELOX_CHECK_GT(bytesToReclaim, 0); - freedBytes += reclaim(candidate.pool, bytesToReclaim); + freedBytes += reclaim(candidate.pool, bytesToReclaim, false); if ((freedBytes >= targetBytes) || (requestor != nullptr && requestor->aborted())) { break; @@ -531,7 +533,8 @@ uint64_t SharedArbitrator::reclaimUsedMemoryFromCandidatesByAbort( uint64_t SharedArbitrator::reclaim( MemoryPool* pool, - uint64_t targetBytes) noexcept { + uint64_t targetBytes, + bool isLocalArbitration) noexcept { uint64_t reclaimDurationUs{0}; uint64_t reclaimedBytes{0}; uint64_t freedBytes{0}; @@ -542,7 +545,12 @@ uint64_t SharedArbitrator::reclaim( try { freedBytes = pool->shrink(targetBytes); if (freedBytes < targetBytes) { - RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount); + if (isLocalArbitration) { + RECORD_METRIC_VALUE(kMetricArbitratorLocalArbitrationCount); + addThreadLocalRuntimeStat( + "localArbitrationCount", + RuntimeCounter(1, RuntimeCounter::Unit::kNone)); + } pool->reclaim( targetBytes - freedBytes, memoryReclaimWaitMs_, reclaimerStats); } diff --git a/velox/common/memory/SharedArbitrator.h b/velox/common/memory/SharedArbitrator.h index d6649f1b75dbd..c37de46250bd1 100644 --- a/velox/common/memory/SharedArbitrator.h +++ b/velox/common/memory/SharedArbitrator.h @@ -151,9 +151,14 @@ class SharedArbitrator : public memory::MemoryArbitrator { std::vector& candidates, uint64_t targetBytes); - // Invoked to reclaim used memory from 'pool' with specified 'targetBytes'. - // The function returns the actually freed capacity. - uint64_t reclaim(MemoryPool* pool, uint64_t targetBytes) noexcept; + // Invoked to reclaim used memory from 'targetPool' with specified + // 'targetBytes'. The function returns the actually freed capacity. + // 'isLocalArbitration' is true when the reclaim attempt is within a local + // arbitration. + uint64_t reclaim( + MemoryPool* targetPool, + uint64_t targetBytes, + bool isLocalArbitration) noexcept; // Invoked to abort memory 'pool'. void abort(MemoryPool* pool, const std::exception_ptr& error); diff --git a/velox/docs/monitoring/stats.rst b/velox/docs/monitoring/stats.rst index acbb458530922..076ca7c722c35 100644 --- a/velox/docs/monitoring/stats.rst +++ b/velox/docs/monitoring/stats.rst @@ -41,6 +41,17 @@ These stats are reported by all operators. - bytes - The reclaimed memory bytes of an operator during the memory arbitration. This stats only applies for spillable operators. + * - globalArbitrationCount + - + - The number of times a request for more memory hit the arbitrator's + capacity limit and initiated a global arbitration attempt where + memory is reclaimed from viable candidates chosen among all running + queries based on a criterion. + * - localArbitrationCount + - + - The number of times a request for more memory hit the query memory + limit and initiated a local arbitration attempt where memory is + reclaimed from the requestor itself. HashBuild, HashAggregation --------------------------