From 1418e868bbf6dae700904c2e5a85fb57907e026e Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 24 Sep 2021 16:52:30 -0700 Subject: [PATCH] Prevent deadlock in db_stress with DbStressCompactionFilter (#8956) Summary: The cyclic dependency was: - `StressTest::OperateDb()` locks the mutex for key 'k' - `StressTest::OperateDb()` calls a function like `PauseBackgroundWork()`, which waits for pending compaction to complete. - The pending compaction reaches key `k` and `DbStressCompactionFilter::FilterV2()` calls `Lock()` on that key's mutex, which hangs forever. The cycle can be broken by using a new function, `port::Mutex::TryLock()`, which returns immediately upon failure to acquire a lock. In that case `DbStressCompactionFilter::FilterV2()` can just decide to keep the key. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8956 Reviewed By: riversand963 Differential Revision: D31183718 Pulled By: ajkr fbshipit-source-id: 329e4a31ce43085af174cf367ef560b5a04399c5 (cherry picked from commit 791bff5b4e1f0f35448eb2cffa2683e01952038b) Conflicts: port/port_posix.cc -- `errnoStr` is not used in the version we're cherry-picking on. Let's stay with `strerror`. --- db_stress_tool/db_stress_compaction_filter.h | 13 +++++++++++-- port/port_posix.cc | 12 +++++++++++- port/port_posix.h | 3 +++ port/win/port_win.h | 10 ++++++++++ 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/db_stress_tool/db_stress_compaction_filter.h b/db_stress_tool/db_stress_compaction_filter.h index 60f8ac71d6e..4cd076882b9 100644 --- a/db_stress_tool/db_stress_compaction_filter.h +++ b/db_stress_tool/db_stress_compaction_filter.h @@ -37,8 +37,17 @@ class DbStressCompactionFilter : public CompactionFilter { bool ok = GetIntVal(key.ToString(), &key_num); assert(ok); (void)ok; - MutexLock key_lock(state_->GetMutexForKey(cf_id_, key_num)); - if (!state_->Exists(cf_id_, key_num)) { + port::Mutex* key_mutex = state_->GetMutexForKey(cf_id_, key_num); + if (!key_mutex->TryLock()) { + return Decision::kKeep; + } + // Reaching here means we acquired the lock. + + bool key_exists = state_->Exists(cf_id_, key_num); + + key_mutex->Unlock(); + + if (!key_exists) { return Decision::kRemove; } return Decision::kKeep; diff --git a/port/port_posix.cc b/port/port_posix.cc index 59fa6b89b79..42d01fcf68b 100644 --- a/port/port_posix.cc +++ b/port/port_posix.cc @@ -44,7 +44,7 @@ extern const bool kDefaultToAdaptiveMutex = false; namespace port { static int PthreadCall(const char* label, int result) { - if (result != 0 && result != ETIMEDOUT) { + if (result != 0 && result != ETIMEDOUT && result != EBUSY) { fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); abort(); } @@ -87,6 +87,16 @@ void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } +bool Mutex::TryLock() { + bool ret = PthreadCall("trylock", pthread_mutex_trylock(&mu_)) == 0; +#ifndef NDEBUG + if (ret) { + locked_ = true; + } +#endif + return ret; +} + void Mutex::AssertHeld() { #ifndef NDEBUG assert(locked_); diff --git a/port/port_posix.h b/port/port_posix.h index a24c7b690e2..a7709f7fa80 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -116,6 +116,9 @@ class Mutex { void Lock(); void Unlock(); + + bool TryLock(); + // this will assert if the mutex is not locked // it does NOT verify that mutex is held by a calling thread void AssertHeld(); diff --git a/port/win/port_win.h b/port/win/port_win.h index 2c5b8ff0533..0749b91a39d 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -148,6 +148,16 @@ class Mutex { mutex_.unlock(); } + bool TryLock() { + bool ret = mutex_.try_lock(); +#ifndef NDEBUG + if (ret) { + locked_ = true; + } +#endif + return ret; + } + // this will assert if the mutex is not locked // it does NOT verify that mutex is held by a calling thread void AssertHeld() {