From 70c9f6c7126725247bfb8193067b18e11897cb97 Mon Sep 17 00:00:00 2001
From: xiaoxmeng <xiaoxmeng@fb.com>
Date: Sun, 31 Mar 2024 13:20:51 -0700
Subject: [PATCH] Fix tsan test failures caused by LOG_EVERY_N (#9320)

Summary:
LOG_EVERY_N is not thread-safe and caused tsan failure in Meta internal testing
The fix is to use VELOX_MEM_LOG_EVERY_MS which is thread-safe

Pull Request resolved: https://github.com/facebookincubator/velox/pull/9320

Reviewed By: amitkdutta

Differential Revision: D55573476

Pulled By: xiaoxmeng

fbshipit-source-id: 31629624cb3118060fc7fd59a6e7bd3fb701af93
---
 velox/common/memory/MemoryAllocator.cpp | 4 ++--
 velox/common/memory/MemoryPool.cpp      | 6 +++---
 velox/exec/HashBuild.cpp                | 6 +++---
 velox/exec/HashProbe.cpp                | 6 +++---
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/velox/common/memory/MemoryAllocator.cpp b/velox/common/memory/MemoryAllocator.cpp
index 4553b85b3fd9..3995b1841929 100644
--- a/velox/common/memory/MemoryAllocator.cpp
+++ b/velox/common/memory/MemoryAllocator.cpp
@@ -183,7 +183,7 @@ bool MemoryAllocator::allocateNonContiguous(
       try {
         reservationCB(AllocationTraits::pageBytes(numNeededPages), true);
       } catch (const std::exception&) {
-        VELOX_MEM_LOG_EVERY_MS(WARNING, 1000)
+        VELOX_MEM_LOG_EVERY_MS(WARNING, 1'000)
             << "Exceeded memory reservation limit when reserve "
             << numNeededPages << " new pages when allocate " << mix.totalPages
             << " pages";
@@ -249,7 +249,7 @@ bool MemoryAllocator::allocateContiguous(
       try {
         reservationCB(AllocationTraits::pageBytes(numNeededPages), true);
       } catch (const std::exception& e) {
-        VELOX_MEM_LOG_EVERY_MS(WARNING, 1000)
+        VELOX_MEM_LOG_EVERY_MS(WARNING, 1'000)
             << "Exceeded memory reservation limit when reserve "
             << numNeededPages << " new pages when allocate " << numPages
             << " pages, error: " << e.what();
diff --git a/velox/common/memory/MemoryPool.cpp b/velox/common/memory/MemoryPool.cpp
index b2acdc006f7e..5e854c5bcdd2 100644
--- a/velox/common/memory/MemoryPool.cpp
+++ b/velox/common/memory/MemoryPool.cpp
@@ -432,13 +432,13 @@ MemoryPoolImpl::~MemoryPoolImpl() {
 
   if (isLeaf()) {
     if (usedReservationBytes_ > 0) {
-      LOG(ERROR) << "Memory leak (Used memory): " << toString();
+      VELOX_MEM_LOG(ERROR) << "Memory leak (Used memory): " << toString();
       RECORD_METRIC_VALUE(
           kMetricMemoryPoolUsageLeakBytes, usedReservationBytes_);
     }
 
     if (minReservationBytes_ > 0) {
-      LOG(ERROR) << "Memory leak (Reserved Memory): " << toString();
+      VELOX_MEM_LOG(ERROR) << "Memory leak (Reserved Memory): " << toString();
       RECORD_METRIC_VALUE(
           kMetricMemoryPoolReservationLeakBytes, minReservationBytes_);
     }
@@ -1185,7 +1185,7 @@ void MemoryPoolImpl::leakCheckDbg() {
 void MemoryPoolImpl::handleAllocationFailure(
     const std::string& failureMessage) {
   if (coreOnAllocationFailureEnabled_) {
-    LOG(ERROR) << failureMessage;
+    VELOX_MEM_LOG(ERROR) << failureMessage;
     // SIGBUS is one of the standard signals in Linux that triggers a core dump
     // Normally it is raised by the operating system when a misaligned memory
     // access occurs. On x86 and aarch64 misaligned access is allowed by default
diff --git a/velox/exec/HashBuild.cpp b/velox/exec/HashBuild.cpp
index 29fa2c2b539f..6bc0280bee7e 100644
--- a/velox/exec/HashBuild.cpp
+++ b/velox/exec/HashBuild.cpp
@@ -219,7 +219,7 @@ void HashBuild::setupSpiller(SpillPartition* spillPartition) {
     // out of memory if the restored partition still can't fit in memory.
     if (config->exceedSpillLevelLimit(startPartitionBit)) {
       RECORD_METRIC_VALUE(kMetricMaxSpillLevelExceededCount);
-      LOG_EVERY_N(WARNING, 1'000)
+      FB_LOG_EVERY_MS(WARNING, 1'000)
           << "Exceeded spill level limit: " << config->maxSpillLevel
           << ", and disable spilling for memory pool: " << pool()->name();
       ++spillStats_.wlock()->spillMaxLevelExceededCount;
@@ -1027,7 +1027,7 @@ void HashBuild::reclaim(
     // TODO: reduce the log frequency if it is too verbose.
     RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
     ++stats.numNonReclaimableAttempts;
-    LOG_EVERY_N(WARNING, 1'000)
+    FB_LOG_EVERY_MS(WARNING, 1'000)
         << "Can't reclaim from hash build operator, state_["
         << stateName(state_) << "], nonReclaimableSection_["
         << nonReclaimableSection_ << "], spiller_["
@@ -1051,7 +1051,7 @@ void HashBuild::reclaim(
       // TODO: reduce the log frequency if it is too verbose.
       RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
       ++stats.numNonReclaimableAttempts;
-      LOG_EVERY_N(WARNING, 1'000)
+      FB_LOG_EVERY_MS(WARNING, 1'000)
           << "Can't reclaim from hash build operator, state_["
           << stateName(buildOp->state_) << "], nonReclaimableSection_["
           << buildOp->nonReclaimableSection_ << "], " << buildOp->pool()->name()
diff --git a/velox/exec/HashProbe.cpp b/velox/exec/HashProbe.cpp
index 211c12a52d8f..dffd38a52993 100644
--- a/velox/exec/HashProbe.cpp
+++ b/velox/exec/HashProbe.cpp
@@ -1567,7 +1567,7 @@ void HashProbe::reclaim(
     // TODO: reduce the log frequency if it is too verbose.
     RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
     ++stats.numNonReclaimableAttempts;
-    LOG_EVERY_N(WARNING, 1'000)
+    FB_LOG_EVERY_MS(WARNING, 1'000)
         << "Can't reclaim from hash probe operator, state_["
         << ProbeOperatorState(state_) << "], nonReclaimableSection_["
         << nonReclaimableSection_ << "], " << pool()->name()
@@ -1587,7 +1587,7 @@ void HashProbe::reclaim(
     if (probeOp->nonReclaimableState()) {
       RECORD_METRIC_VALUE(kMetricMemoryNonReclaimableCount);
       ++stats.numNonReclaimableAttempts;
-      LOG_EVERY_N(WARNING, 1'000)
+      FB_LOG_EVERY_MS(WARNING, 1'000)
           << "Can't reclaim from hash probe operator, state_["
           << ProbeOperatorState(probeOp->state_) << "], nonReclaimableSection_["
           << probeOp->nonReclaimableSection_ << "], " << probeOp->pool()->name()
@@ -1804,7 +1804,7 @@ void HashProbe::prepareTableSpill(
     // run out of memory if the restored partition still can't fit in memory.
     if (config->exceedSpillLevelLimit(startPartitionBit)) {
       RECORD_METRIC_VALUE(kMetricMaxSpillLevelExceededCount);
-      LOG_EVERY_N(WARNING, 1'000)
+      FB_LOG_EVERY_MS(WARNING, 1'000)
           << "Exceeded spill level limit: " << config->maxSpillLevel
           << ", and disable spilling for memory pool: " << pool()->name();
       exceededMaxSpillLevelLimit_ = true;