Skip to content

Commit

Permalink
Fix tsan test failures caused by LOG_EVERY_N (#9320)
Browse files Browse the repository at this point in the history
Summary:
LOG_EVERY_N is not thread-safe and caused tsan failure in Meta internal testing
The fix is to use VELOX_MEM_LOG_EVERY_MS which is thread-safe

Pull Request resolved: #9320

Reviewed By: amitkdutta

Differential Revision: D55573476

Pulled By: xiaoxmeng

fbshipit-source-id: 31629624cb3118060fc7fd59a6e7bd3fb701af93
  • Loading branch information
xiaoxmeng authored and facebook-github-bot committed Mar 31, 2024
1 parent 1d7343e commit 70c9f6c
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
4 changes: 2 additions & 2 deletions velox/common/memory/MemoryAllocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ bool MemoryAllocator::allocateNonContiguous(
try {
reservationCB(AllocationTraits::pageBytes(numNeededPages), true);
} catch (const std::exception&) {
VELOX_MEM_LOG_EVERY_MS(WARNING, 1000)
VELOX_MEM_LOG_EVERY_MS(WARNING, 1'000)
<< "Exceeded memory reservation limit when reserve "
<< numNeededPages << " new pages when allocate " << mix.totalPages
<< " pages";
Expand Down Expand Up @@ -249,7 +249,7 @@ bool MemoryAllocator::allocateContiguous(
try {
reservationCB(AllocationTraits::pageBytes(numNeededPages), true);
} catch (const std::exception& e) {
VELOX_MEM_LOG_EVERY_MS(WARNING, 1000)
VELOX_MEM_LOG_EVERY_MS(WARNING, 1'000)
<< "Exceeded memory reservation limit when reserve "
<< numNeededPages << " new pages when allocate " << numPages
<< " pages, error: " << e.what();
Expand Down
6 changes: 3 additions & 3 deletions velox/common/memory/MemoryPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,13 +432,13 @@ MemoryPoolImpl::~MemoryPoolImpl() {

if (isLeaf()) {
if (usedReservationBytes_ > 0) {
LOG(ERROR) << "Memory leak (Used memory): " << toString();
VELOX_MEM_LOG(ERROR) << "Memory leak (Used memory): " << toString();
RECORD_METRIC_VALUE(
kMetricMemoryPoolUsageLeakBytes, usedReservationBytes_);
}

if (minReservationBytes_ > 0) {
LOG(ERROR) << "Memory leak (Reserved Memory): " << toString();
VELOX_MEM_LOG(ERROR) << "Memory leak (Reserved Memory): " << toString();
RECORD_METRIC_VALUE(
kMetricMemoryPoolReservationLeakBytes, minReservationBytes_);
}
Expand Down Expand Up @@ -1185,7 +1185,7 @@ void MemoryPoolImpl::leakCheckDbg() {
void MemoryPoolImpl::handleAllocationFailure(
const std::string& failureMessage) {
if (coreOnAllocationFailureEnabled_) {
LOG(ERROR) << failureMessage;
VELOX_MEM_LOG(ERROR) << failureMessage;
// SIGBUS is one of the standard signals in Linux that triggers a core dump
// Normally it is raised by the operating system when a misaligned memory
// access occurs. On x86 and aarch64 misaligned access is allowed by default
Expand Down
6 changes: 3 additions & 3 deletions velox/exec/HashBuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ void HashBuild::setupSpiller(SpillPartition* spillPartition) {
// out of memory if the restored partition still can't fit in memory.
if (config->exceedSpillLevelLimit(startPartitionBit)) {
RECORD_METRIC_VALUE(kMetricMaxSpillLevelExceededCount);
LOG_EVERY_N(WARNING, 1'000)
FB_LOG_EVERY_MS(WARNING, 1'000)
<< "Exceeded spill level limit: " << config->maxSpillLevel
<< ", and disable spilling for memory pool: " << pool()->name();
++spillStats_.wlock()->spillMaxLevelExceededCount;
Expand Down Expand Up @@ -1027,7 +1027,7 @@ void HashBuild::reclaim(
// TODO: reduce the log frequency if it is too verbose.
RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
++stats.numNonReclaimableAttempts;
LOG_EVERY_N(WARNING, 1'000)
FB_LOG_EVERY_MS(WARNING, 1'000)
<< "Can't reclaim from hash build operator, state_["
<< stateName(state_) << "], nonReclaimableSection_["
<< nonReclaimableSection_ << "], spiller_["
Expand All @@ -1051,7 +1051,7 @@ void HashBuild::reclaim(
// TODO: reduce the log frequency if it is too verbose.
RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
++stats.numNonReclaimableAttempts;
LOG_EVERY_N(WARNING, 1'000)
FB_LOG_EVERY_MS(WARNING, 1'000)
<< "Can't reclaim from hash build operator, state_["
<< stateName(buildOp->state_) << "], nonReclaimableSection_["
<< buildOp->nonReclaimableSection_ << "], " << buildOp->pool()->name()
Expand Down
6 changes: 3 additions & 3 deletions velox/exec/HashProbe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1567,7 +1567,7 @@ void HashProbe::reclaim(
// TODO: reduce the log frequency if it is too verbose.
RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
++stats.numNonReclaimableAttempts;
LOG_EVERY_N(WARNING, 1'000)
FB_LOG_EVERY_MS(WARNING, 1'000)
<< "Can't reclaim from hash probe operator, state_["
<< ProbeOperatorState(state_) << "], nonReclaimableSection_["
<< nonReclaimableSection_ << "], " << pool()->name()
Expand All @@ -1587,7 +1587,7 @@ void HashProbe::reclaim(
if (probeOp->nonReclaimableState()) {
RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
++stats.numNonReclaimableAttempts;
LOG_EVERY_N(WARNING, 1'000)
FB_LOG_EVERY_MS(WARNING, 1'000)
<< "Can't reclaim from hash probe operator, state_["
<< ProbeOperatorState(probeOp->state_) << "], nonReclaimableSection_["
<< probeOp->nonReclaimableSection_ << "], " << probeOp->pool()->name()
Expand Down Expand Up @@ -1804,7 +1804,7 @@ void HashProbe::prepareTableSpill(
// run out of memory if the restored partition still can't fit in memory.
if (config->exceedSpillLevelLimit(startPartitionBit)) {
RECORD_METRIC_VALUE(kMetricMaxSpillLevelExceededCount);
LOG_EVERY_N(WARNING, 1'000)
FB_LOG_EVERY_MS(WARNING, 1'000)
<< "Exceeded spill level limit: " << config->maxSpillLevel
<< ", and disable spilling for memory pool: " << pool()->name();
exceededMaxSpillLevelLimit_ = true;
Expand Down

0 comments on commit 70c9f6c

Please sign in to comment.