Deflake unit test DBErrorHandlingFSTest.AtomicFlushNoSpaceError (#1…

…3234) Summary: `DBErrorHandlingFSTest.AtomicFlushNoSpaceError` is flaky due to seg fault during error recovery: ``` ... frame #5: 0x00007f0b3ea0a9d6 librocksdb.so.9.10`rocksdb::VersionSet::GetObsoleteFiles(std::vector<rocksdb::ObsoleteFileInfo, std::allocator<rocksdb::ObsoleteFileInfo>>*, std::vector<rocksdb::ObsoleteBlobFileInfo, std::allocator<rocksdb::ObsoleteBlobFileInfo>>*, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>>>*, unsigned long) [inlined] std::vector<rocksdb::ObsoleteFileInfo, std::allocator<rocksdb::ObsoleteFileInfo>>::begin(this=<unavailable>) at stl_vector.h:812:16 frame #6: 0x00007f0b3ea0a9d6 librocksdb.so.9.10`rocksdb::VersionSet::GetObsoleteFiles(this=0x0000000000000000, files=size=0, blob_files=size=0, manifest_filenames=size=0, min_pending_output=18446744073709551615) at version_set.cc:7258:18 frame #7: 0x00007f0b3e8ccbc0 librocksdb.so.9.10`rocksdb::DBImpl::FindObsoleteFiles(this=<unavailable>, job_context=<unavailable>, force=<unavailable>, no_full_scan=<unavailable>) at db_impl_files.cc:162:30 frame #8: 0x00007f0b3e85e698 librocksdb.so.9.10`rocksdb::DBImpl::ResumeImpl(this=<unavailable>, context=<unavailable>) at db_impl.cc:434:20 frame #9: 0x00007f0b3e921516 librocksdb.so.9.10`rocksdb::ErrorHandler::RecoverFromBGError(this=<unavailable>, is_manual=<unavailable>) at error_handler.cc:632:46 ``` I suspect this is due to DB being destructed and reopened during recovery. Specifically, the [ClearBGError() call](https://github.com/facebook/rocksdb/blob/c72e79a262bf696faf5f8becabf92374fc14b464/db/db_impl/db_impl.cc#L425) can release and reacquire mutex, and DB can be closed during this time. So it's not safe to access DB state after ClearBGError(). There was a similar story in #9496. [Moving the obsolete files logic after ClearBGError()](#11955) probably makes the seg fault more easily triggered. This PR updates `ClearBGError()` to guarantee that db close cannot finish until the method is returned and the mutex is released. So that we can safely access DB state after calling it. Pull Request resolved: #13234 Test Plan: I could not trigger the seg fault locally, will just monitor future test failures. Reviewed By: jowlyzhang Differential Revision: D67476836 Pulled By: cbi42 fbshipit-source-id: dfb3e9ccd4eb3d43fc596ec10e4052861eeec002
facebook · Dec 20, 2024 · cc30226 · cc30226
1 parent f7b4216
commit cc30226
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 19 deletions.
diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc
@@ -418,26 +418,26 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
     }
   }
 
-  if (s.ok()) {
-    // This will notify and unblock threads waiting for error recovery to
-    // finish. Those previouly waiting threads can now proceed, which may
-    // include closing the db.
-    s = error_handler_.ClearBGError();
-  } else {
-    // NOTE: this is needed to pass ASSERT_STATUS_CHECKED
-    // in the DBSSTTest.DBWithMaxSpaceAllowedRandomized test.
-    // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952
-    error_handler_.GetRecoveryError().PermitUncheckedError();
-  }
-
   JobContext job_context(0);
   FindObsoleteFiles(&job_context, true);
   mutex_.Unlock();
+  // If DB shutdown initiated here, it will wait for this ongoing recovery.
   job_context.manifest_file_number = 1;
   if (job_context.HaveSomethingToDelete()) {
     PurgeObsoleteFiles(job_context);
   }
   job_context.Clean();
+  mutex_.Lock();
+
+  if (s.ok()) {
+    // Will notify and unblock threads waiting for error recovery to finish.
+    s = error_handler_.ClearBGError();
+  } else {
+    // NOTE: this is needed to pass ASSERT_STATUS_CHECKED
+    // in the DBSSTTest.DBWithMaxSpaceAllowedRandomized test.
+    // See https://github.com/facebook/rocksdb/pull/7715#issuecomment-754947952
+    error_handler_.GetRecoveryError().PermitUncheckedError();
+  }
 
   if (s.ok()) {
     ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB");
@@ -446,7 +446,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
                    s.ToString().c_str());
   }
 
-  mutex_.Lock();
   // Check for shutdown again before scheduling further compactions,
   // since we released and re-acquired the lock above
   if (shutdown_initiated_) {
@@ -540,8 +539,8 @@ Status DBImpl::CloseHelper() {
   // continuing with the shutdown
   mutex_.Lock();
   shutdown_initiated_ = true;
-  error_handler_.CancelErrorRecovery();
-  while (error_handler_.IsRecoveryInProgress()) {
+  error_handler_.CancelErrorRecoveryForShutDown();
+  while (!error_handler_.ReadyForShutdown()) {
     bg_cv_.Wait();
   }
   mutex_.Unlock();

diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc
@@ -159,6 +159,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
 
   // Get obsolete files.  This function will also update the list of
   // pending files in VersionSet().
+  assert(versions_);
   versions_->GetObsoleteFiles(
       &job_context->sst_delete_files, &job_context->blob_delete_files,
       &job_context->manifest_delete_files, job_context->min_pending_output);

diff --git a/db/error_handler.cc b/db/error_handler.cc
@@ -227,7 +227,7 @@ std::map<std::tuple<BackgroundErrorReason, bool>, Status::Severity>
          Status::Severity::kFatalError},
 };
 
-void ErrorHandler::CancelErrorRecovery() {
+void ErrorHandler::CancelErrorRecoveryForShutDown() {
   db_mutex_->AssertHeld();
 
   // We'll release the lock before calling sfm, so make sure no new
@@ -585,8 +585,15 @@ Status ErrorHandler::ClearBGError() {
     recovery_error_.PermitUncheckedError();
     recovery_in_prog_ = false;
     soft_error_no_bg_work_ = false;
-    EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error,
-                                           bg_error_, db_mutex_);
+    if (!db_->shutdown_initiated_) {
+      // NotifyOnErrorRecoveryEnd() may release and re-acquire db_mutex_.
+      // Prevent DB from being closed while we notify listeners. DB close will
+      // wait until allow_db_shutdown_ = true, see ReadyForShutdown().
+      allow_db_shutdown_ = false;
+      EventHelpers::NotifyOnErrorRecoveryEnd(
+          db_options_.listeners, old_bg_error, bg_error_, db_mutex_);
+      allow_db_shutdown_ = true;
+    }
   }
   return recovery_error_;
 }

diff --git a/db/error_handler.h b/db/error_handler.h
@@ -44,6 +44,7 @@ class ErrorHandler {
         auto_recovery_(false),
         recovery_in_prog_(false),
         soft_error_no_bg_work_(false),
+        allow_db_shutdown_(true),
         is_db_stopped_(false),
         bg_error_stats_(db_options.statistics) {
     // Clear the checked flag for uninitialized errors
@@ -63,6 +64,12 @@ class ErrorHandler {
 
   Status GetRecoveryError() const { return recovery_error_; }
 
+  // REQUIREs: db mutex held
+  //
+  // Returns non-OK status if encountered error during recovery.
+  // Returns OK if bg error is successfully cleared. May releases and
+  // re-acquire db mutex to notify listeners. However, DB close (if initiated)
+  // will be blocked until db mutex is released after return.
   Status ClearBGError();
 
   bool IsDBStopped() { return is_db_stopped_.load(std::memory_order_acquire); }
@@ -79,8 +86,14 @@ class ErrorHandler {
 
   bool IsRecoveryInProgress() { return recovery_in_prog_; }
 
+  // REQUIRES: db mutex held
+  bool ReadyForShutdown() {
+    db_mutex_->AssertHeld();
+    return !recovery_in_prog_ && allow_db_shutdown_;
+  }
+
   Status RecoverFromBGError(bool is_manual = false);
-  void CancelErrorRecovery();
+  void CancelErrorRecoveryForShutDown();
 
   void EndAutoRecovery();
 
@@ -121,6 +134,8 @@ class ErrorHandler {
   // A flag to indicate that for the soft error, we should not allow any
   // background work except the work is from recovery.
   bool soft_error_no_bg_work_;
+  // Used in ClearBGError() to prevent DB from being closed.
+  bool allow_db_shutdown_;
 
   // Used to store the context for recover, such as flush reason.
   DBRecoverContext recover_context_;