From 70c9f6c7126725247bfb8193067b18e11897cb97 Mon Sep 17 00:00:00 2001 From: xiaoxmeng Date: Sun, 31 Mar 2024 13:20:51 -0700 Subject: [PATCH] Fix tsan test failures caused by LOG_EVERY_N (#9320) Summary: LOG_EVERY_N is not thread-safe and caused tsan failure in Meta internal testing The fix is to use VELOX_MEM_LOG_EVERY_MS which is thread-safe Pull Request resolved: https://github.com/facebookincubator/velox/pull/9320 Reviewed By: amitkdutta Differential Revision: D55573476 Pulled By: xiaoxmeng fbshipit-source-id: 31629624cb3118060fc7fd59a6e7bd3fb701af93 --- velox/common/memory/MemoryAllocator.cpp | 4 ++-- velox/common/memory/MemoryPool.cpp | 6 +++--- velox/exec/HashBuild.cpp | 6 +++--- velox/exec/HashProbe.cpp | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/velox/common/memory/MemoryAllocator.cpp b/velox/common/memory/MemoryAllocator.cpp index 4553b85b3fd9..3995b1841929 100644 --- a/velox/common/memory/MemoryAllocator.cpp +++ b/velox/common/memory/MemoryAllocator.cpp @@ -183,7 +183,7 @@ bool MemoryAllocator::allocateNonContiguous( try { reservationCB(AllocationTraits::pageBytes(numNeededPages), true); } catch (const std::exception&) { - VELOX_MEM_LOG_EVERY_MS(WARNING, 1000) + VELOX_MEM_LOG_EVERY_MS(WARNING, 1'000) << "Exceeded memory reservation limit when reserve " << numNeededPages << " new pages when allocate " << mix.totalPages << " pages"; @@ -249,7 +249,7 @@ bool MemoryAllocator::allocateContiguous( try { reservationCB(AllocationTraits::pageBytes(numNeededPages), true); } catch (const std::exception& e) { - VELOX_MEM_LOG_EVERY_MS(WARNING, 1000) + VELOX_MEM_LOG_EVERY_MS(WARNING, 1'000) << "Exceeded memory reservation limit when reserve " << numNeededPages << " new pages when allocate " << numPages << " pages, error: " << e.what(); diff --git a/velox/common/memory/MemoryPool.cpp b/velox/common/memory/MemoryPool.cpp index b2acdc006f7e..5e854c5bcdd2 100644 --- a/velox/common/memory/MemoryPool.cpp +++ b/velox/common/memory/MemoryPool.cpp @@ -432,13 +432,13 @@ MemoryPoolImpl::~MemoryPoolImpl() { if (isLeaf()) { if (usedReservationBytes_ > 0) { - LOG(ERROR) << "Memory leak (Used memory): " << toString(); + VELOX_MEM_LOG(ERROR) << "Memory leak (Used memory): " << toString(); RECORD_METRIC_VALUE( kMetricMemoryPoolUsageLeakBytes, usedReservationBytes_); } if (minReservationBytes_ > 0) { - LOG(ERROR) << "Memory leak (Reserved Memory): " << toString(); + VELOX_MEM_LOG(ERROR) << "Memory leak (Reserved Memory): " << toString(); RECORD_METRIC_VALUE( kMetricMemoryPoolReservationLeakBytes, minReservationBytes_); } @@ -1185,7 +1185,7 @@ void MemoryPoolImpl::leakCheckDbg() { void MemoryPoolImpl::handleAllocationFailure( const std::string& failureMessage) { if (coreOnAllocationFailureEnabled_) { - LOG(ERROR) << failureMessage; + VELOX_MEM_LOG(ERROR) << failureMessage; // SIGBUS is one of the standard signals in Linux that triggers a core dump // Normally it is raised by the operating system when a misaligned memory // access occurs. On x86 and aarch64 misaligned access is allowed by default diff --git a/velox/exec/HashBuild.cpp b/velox/exec/HashBuild.cpp index 29fa2c2b539f..6bc0280bee7e 100644 --- a/velox/exec/HashBuild.cpp +++ b/velox/exec/HashBuild.cpp @@ -219,7 +219,7 @@ void HashBuild::setupSpiller(SpillPartition* spillPartition) { // out of memory if the restored partition still can't fit in memory. if (config->exceedSpillLevelLimit(startPartitionBit)) { RECORD_METRIC_VALUE(kMetricMaxSpillLevelExceededCount); - LOG_EVERY_N(WARNING, 1'000) + FB_LOG_EVERY_MS(WARNING, 1'000) << "Exceeded spill level limit: " << config->maxSpillLevel << ", and disable spilling for memory pool: " << pool()->name(); ++spillStats_.wlock()->spillMaxLevelExceededCount; @@ -1027,7 +1027,7 @@ void HashBuild::reclaim( // TODO: reduce the log frequency if it is too verbose. RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount); ++stats.numNonReclaimableAttempts; - LOG_EVERY_N(WARNING, 1'000) + FB_LOG_EVERY_MS(WARNING, 1'000) << "Can't reclaim from hash build operator, state_[" << stateName(state_) << "], nonReclaimableSection_[" << nonReclaimableSection_ << "], spiller_[" @@ -1051,7 +1051,7 @@ void HashBuild::reclaim( // TODO: reduce the log frequency if it is too verbose. RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount); ++stats.numNonReclaimableAttempts; - LOG_EVERY_N(WARNING, 1'000) + FB_LOG_EVERY_MS(WARNING, 1'000) << "Can't reclaim from hash build operator, state_[" << stateName(buildOp->state_) << "], nonReclaimableSection_[" << buildOp->nonReclaimableSection_ << "], " << buildOp->pool()->name() diff --git a/velox/exec/HashProbe.cpp b/velox/exec/HashProbe.cpp index 211c12a52d8f..dffd38a52993 100644 --- a/velox/exec/HashProbe.cpp +++ b/velox/exec/HashProbe.cpp @@ -1567,7 +1567,7 @@ void HashProbe::reclaim( // TODO: reduce the log frequency if it is too verbose. RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount); ++stats.numNonReclaimableAttempts; - LOG_EVERY_N(WARNING, 1'000) + FB_LOG_EVERY_MS(WARNING, 1'000) << "Can't reclaim from hash probe operator, state_[" << ProbeOperatorState(state_) << "], nonReclaimableSection_[" << nonReclaimableSection_ << "], " << pool()->name() @@ -1587,7 +1587,7 @@ void HashProbe::reclaim( if (probeOp->nonReclaimableState()) { RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount); ++stats.numNonReclaimableAttempts; - LOG_EVERY_N(WARNING, 1'000) + FB_LOG_EVERY_MS(WARNING, 1'000) << "Can't reclaim from hash probe operator, state_[" << ProbeOperatorState(probeOp->state_) << "], nonReclaimableSection_[" << probeOp->nonReclaimableSection_ << "], " << probeOp->pool()->name() @@ -1804,7 +1804,7 @@ void HashProbe::prepareTableSpill( // run out of memory if the restored partition still can't fit in memory. if (config->exceedSpillLevelLimit(startPartitionBit)) { RECORD_METRIC_VALUE(kMetricMaxSpillLevelExceededCount); - LOG_EVERY_N(WARNING, 1'000) + FB_LOG_EVERY_MS(WARNING, 1'000) << "Exceeded spill level limit: " << config->maxSpillLevel << ", and disable spilling for memory pool: " << pool()->name(); exceededMaxSpillLevelLimit_ = true;